From 5eac4489551117652f524485c3ea5c35e065672c Mon Sep 17 00:00:00 2001 From: "miloyip@gmail.com" Date: Tue, 29 Nov 2011 18:39:03 +0000 Subject: [PATCH] Added EncodedInputStream, AutoUTFInputStream, AutoUTF git-svn-id: https://rapidjson.googlecode.com/svn/trunk@40 c5894555-1306-4e8d-425f-1f6f381ee07c --- bin/encodings/utf16bebom.json | Bin 0 -> 370 bytes bin/encodings/utf16lebom.json | Bin 0 -> 370 bytes bin/encodings/utf8.json | Bin 0 -> 322 bytes bin/encodings/utf8bom.json | Bin 0 -> 325 bytes example/pretty/pretty.cpp | 2 +- include/rapidjson/encodedstream.h | 98 +++++++ include/rapidjson/encodings.h | 427 ++++++++++++++++++++++++++++ include/rapidjson/rapidjson.h | 451 +----------------------------- include/rapidjson/reader.h | 1 + test/perftest/perftest.h | 2 +- test/unittest/filestreamtest.cpp | 34 +-- 11 files changed, 549 insertions(+), 466 deletions(-) create mode 100644 bin/encodings/utf16bebom.json create mode 100644 bin/encodings/utf16lebom.json create mode 100644 bin/encodings/utf8.json create mode 100644 bin/encodings/utf8bom.json create mode 100644 include/rapidjson/encodedstream.h create mode 100644 include/rapidjson/encodings.h diff --git a/bin/encodings/utf16bebom.json b/bin/encodings/utf16bebom.json new file mode 100644 index 0000000000000000000000000000000000000000..0a23ae205cb3354e828cd3c0f3fce322b9618815 GIT binary patch literal 370 zcmezOpP`z8mw}6clR=3gl_3vES^=>qg91Y`Ln4r`02EDRC;`&x3^_nnF%T+%Wm152 zCRiqgAs?u!7^qqu%+FvbWher(a)IJ{K=X9qc2qHBFz7OPfK4cY*p?*N@Ha%xkGptx zNpk~_A8)|+zSXm?7%<_~SBzbsze?X~|6nwISwNe>eyTiZkZ-WY;HAM%gGn>)8&n!} z8uS>{8AKW|8N>ov`9S$e0ox5y3<80o(FT*z%*h7&5p2$e8JngsC>&SVxn^e#gTgM$ v<1P#eha`X^9Y?nun*#K$!rC9(XKX&gpm5aXD$B_OK-%l53s6Q090s)jyQo_l literal 0 HcmV?d00001 diff --git a/bin/encodings/utf16lebom.json b/bin/encodings/utf16lebom.json new file mode 100644 index 0000000000000000000000000000000000000000..eaba00132cdfbc222ddce1333d0268821d68db0d GIT binary patch literal 370 zcmezWubP3Efs28YL5U%iArDAe0kJ280z)!GB9N~D6is9(0n+IVIY3r15GsIWQh;Grs9GJ&&tNEJC<3!`f#P~V^K{^LR54^Q=rVYKO(=odCYbcMK`w;bZ+CHX z2~PvB-}ZpjeOG2N8Q{`aj6a&*cuJz%K(SUY3;kVs+`hz1fSCKLrX z^A}sZ*FE^dKpidJS8K5%@%3I7dff*%Ox1dzjO=Ho5%I=N@ z+j>!3nh|@I!U{7i!alr4=8Pw2Q4$}eCvU0mWOhA?k0)Y~y>_SF0B!M?G5=wbQHz*l zeE!31Fsi2=j=Ybeht4yKAqs!`aEF}4A&NTkcjQ+@AaBlXf7Ta19YEef-k29|Q2efL zR%um=P}TxaxA*d~C)*>CPeavoL3tsl$Wxau>N+UG=OC6*C{HQ4aK8dqb!<{KG8Ak1 G+`%^sldGBl literal 0 HcmV?d00001 diff --git a/bin/encodings/utf8bom.json b/bin/encodings/utf8bom.json new file mode 100644 index 0000000000000000000000000000000000000000..b9839fe2fa7d1b34749a999f8228f2a508a4bf81 GIT binary patch literal 325 zcmaKmOHRU27=_1@u!ldLI8heh3UnDa(ul8;*gDae^xk$*5g*iQ>Vs+`iUtxUCKLf( z%wKHrUgH-0VW5tjos)CElk?So+s)@@G~Lh^v?W+(2I$O!l?`T^z>FfST2L(OrZHkk_ZSGwBPT4j^wJuT2YA zD1KMh%d{#*C@TS|>s$HImiGgYk3Cg)L3tsl$W!Of>M|(6M>m#HD32*Pb3X%DHEmKA KG8Ak1+}<}B*RBHq literal 0 HcmV?d00001 diff --git a/example/pretty/pretty.cpp b/example/pretty/pretty.cpp index dae41b4..a48f24f 100644 --- a/example/pretty/pretty.cpp +++ b/example/pretty/pretty.cpp @@ -19,7 +19,7 @@ int main(int argc, char* argv[]) { PrettyWriter writer(os); // JSON reader parse from the input stream and let writer generate the output. - if (!reader.Parse<0>(is, writer)) { + if (!reader.Parse(is, writer)) { fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), reader.GetParseError()); return 1; } diff --git a/include/rapidjson/encodedstream.h b/include/rapidjson/encodedstream.h new file mode 100644 index 0000000..9ad960b --- /dev/null +++ b/include/rapidjson/encodedstream.h @@ -0,0 +1,98 @@ +#ifndef RAPIDJSON_ENCODEDSTREAM_H_ +#define RAPIDJSON_ENCODEDSTREAM_H_ + +#include "rapidjson.h" + +namespace rapidjson { + +//! Adapts an input byte stream with an specified encoding. +template +class EncodedInputStream { +public: + typedef typename Encoding::Ch Ch; + + EncodedInputStream(InputStream& is) : is_(is) { + Encoding::TakeBOM(is_); + Read(); + } + + Ch Peek() const { return current_; } + Ch Take() { Ch c = current_; Read(); return c; } + size_t Tell() const { is_.Tell(); } + + // Not implemented + void Put(Ch c) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + void Read() { + current_ = Encoding::Take(is_); + } + InputStream& is_; + Ch current_; +}; + +template +class AutoUTFInputStream { +public: + typedef CharType Ch; + + AutoUTFInputStream(InputStream& is, UTFType type = kUTF8) : is_(is), type_(type) { + TakeBOM(is); + Read(); + } + + Ch Peek() const { return current_; } + Ch Take() { Ch c = current_; Read(); return c; } + size_t Tell() const { is_.Tell(); } + + // Not implemented + void Put(Ch c) { RAPIDJSON_ASSERT(false); } + void Flush() { RAPIDJSON_ASSERT(false); } + Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; } + size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; } + +private: + friend struct AutoUTF; + + void TakeBOM(InputStream& is) { +#define TAKE() is.Take() +#define PEEK(x) if ((unsigned char)is.Peek() != x) break + switch ((unsigned char)is.Peek()) { + case 0x00: TAKE(); PEEK(0x00); TAKE(); PEEK(0xFE); TAKE(); PEEK(0xFF); type_ = kUTF32BE; return; + case 0xEF: TAKE(); PEEK(0xBB); TAKE(); PEEK(0xBF); TAKE(); type_ = kUTF8; return; + case 0xFE: TAKE(); PEEK(0xFF); TAKE(); type_ = kUTF16BE; return; + case 0xFF: TAKE(); PEEK(0xFE); TAKE(); + if (is.Peek() == 0x00) { + TAKE(); PEEK(0x00); TAKE(); type_ = kUTF32LE; return; + } + type_ = kUTF16LE; + return; + } +#undef TAKE +#undef PEEK + } + + void Read() { + typedef Ch (*TakeFunc)(InputStream& is); + static const TakeFunc f[] = { + UTF8::Take, + UTF16LE::Take, + UTF16BE::Take, + UTF32LE::Take, + UTF32BE::Take, + }; + + current_ = f[type_](is_); + } + + InputStream& is_; + UTFType type_; + Ch current_; +}; + +} // namespace rapidjson + +#endif // RAPIDJSON_FILESTREAM_H_ diff --git a/include/rapidjson/encodings.h b/include/rapidjson/encodings.h new file mode 100644 index 0000000..19ec77c --- /dev/null +++ b/include/rapidjson/encodings.h @@ -0,0 +1,427 @@ +#ifndef RAPIDJSON_ENCODINGS_H_ +#define RAPIDJSON_ENCODINGS_H_ + +#include "rapidjson.h" + +namespace rapidjson { + +/////////////////////////////////////////////////////////////////////////////// +// Encoding + +/*! \class rapidjson::Encoding + \brief Concept for encoding of Unicode characters. + +\code +concept Encoding { + typename Ch; //! Type of character. + + //! \brief Encode a Unicode codepoint to a stream. + //! \param os Output stream. + //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. + template + static void Encode(OutputStream& os, unsigned codepoint) { + + //! \brief Validate one Unicode codepoint from an encoded stream. + //! \param is Input stream to obtain codepoint. + //! \param os Output for copying one codepoint. + //! \return true if it is valid. + //! \note This function just validating and copying the codepoint without actually decode it. + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { +}; +\endcode +*/ + +/////////////////////////////////////////////////////////////////////////////// +// UTF8 + +//! UTF-8 encoding. +/*! http://en.wikipedia.org/wiki/UTF-8 + http://tools.ietf.org/html/rfc3629 + \tparam CharType Type for storing 8-bit UTF-8 data. Default is char. + \implements Encoding +*/ +template +struct UTF8 { + typedef CharType Ch; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + if (codepoint <= 0x7F) + os.Put(codepoint & 0xFF); + else if (codepoint <= 0x7FF) { + os.Put(0xC0 | ((codepoint >> 6) & 0xFF)); + os.Put(0x80 | ((codepoint & 0x3F))); + } + else if (codepoint <= 0xFFFF) { + os.Put(0xE0 | ((codepoint >> 12) & 0xFF)); + os.Put(0x80 | ((codepoint >> 6) & 0x3F)); + os.Put(0x80 | (codepoint & 0x3F)); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + os.Put(0xF0 | ((codepoint >> 18) & 0xFF)); + os.Put(0x80 | ((codepoint >> 12) & 0x3F)); + os.Put(0x80 | ((codepoint >> 6) & 0x3F)); + os.Put(0x80 | (codepoint & 0x3F)); + } + } + + template + RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { +#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu) +#define TRANS(mask) result &= ((GetType((unsigned char)c) & mask) != 0) +#define TAIL() COPY(); TRANS(0x70) + Ch c = is.Take(); + if (!(c & 0x80)) { + *codepoint = (unsigned char)c; + return true; + } + + unsigned char type = GetType((unsigned char)c); + *codepoint = (0xFF >> type) & (unsigned char)c; + bool result = true; + switch (type) { + case 2: TAIL(); return result; + case 3: TAIL(); TAIL(); return result; + case 4: COPY(); TRANS(0x50); TAIL(); return result; + case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return result; + case 6: TAIL(); TAIL(); TAIL(); return result; + case 10: COPY(); TRANS(0x20); TAIL(); return result; + case 11: COPY(); TRANS(0x60); TAIL(); return result; + default: return false; + } +#undef COPY +#undef TRANS +#undef TAIL + } + + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { +#define COPY() os.Put(c = is.Take()) +#define TRANS(mask) result &= ((GetType(c) & mask) != 0) +#define TAIL() COPY(); TRANS(0x70) + Ch c; + COPY(); + if (!(c & 0x80)) + return true; + + bool result = true; + switch (GetType(c)) { + case 2: TAIL(); return result; + case 3: TAIL(); TAIL(); return result; + case 4: COPY(); TRANS(0x50); TAIL(); return result; + case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return result; + case 6: TAIL(); TAIL(); TAIL(); return result; + case 10: COPY(); TRANS(0x20); TAIL(); return result; + case 11: COPY(); TRANS(0x60); TAIL(); return result; + default: return false; + } +#undef COPY +#undef TRANS +#undef TAIL + } + + RAPIDJSON_FORCEINLINE static unsigned char GetType(unsigned char c) { + // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. + static const unsigned char type[] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, + 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, + 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, + 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, + 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, + }; + return type[c]; + } + + template + static void TakeBOM(InputStream& is) { + if ((unsigned char)is.Peek() != 0xEF) return; + is.Take(); + if ((unsigned char)is.Peek() != 0xBB) return; + is.Take(); + if ((unsigned char)is.Peek() != 0xBF) return; + is.Take(); + } + + template + RAPIDJSON_FORCEINLINE static Ch Take(InputStream& is) { + return is.Take(); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF16 + +//! UTF-16 encoding. +/*! http://en.wikipedia.org/wiki/UTF-16 + http://tools.ietf.org/html/rfc2781 + \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. + \implements Encoding +*/ +template +struct UTF16 { + typedef CharType Ch; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + os.Put(codepoint); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + os.Put((v >> 10) | 0xD800); + os.Put((v & 0x3FF) | 0xDC00); + } + } + + template + RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { + Ch c = is.Take(); + if (c < 0xD800 || c > 0xDFFF) { + *codepoint = c; + return true; + } + else if (c < 0xDBFF) { + *codepoint = (c & 0x3FF) << 10; + c = is.Take(); + *codepoint |= (c & 0x3FF); + *codepoint += 0x10000; + return c >= 0xDC00 && c <= 0xDFFF; + } + return false; + } + + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + Ch c; + os.Put(c = is.Take()); + if (c < 0xD800 || c > 0xDFFF) + return true; + else if (c < 0xDBFF) { + os.Put(c = is.Take()); + return c >= 0xDC00 && c <= 0xDFFF; + } + return false; + } +}; + +template +struct UTF16LE : UTF16 { + template + static void TakeBOM(InputStream& is) { + if ((unsigned char)is.Peek() != 0xFF) return; + is.Take(); + if ((unsigned char)is.Peek() != 0xFE) return; + is.Take(); + } + + template + RAPIDJSON_FORCEINLINE static CharType Take(InputStream& is) { + CharType c = (unsigned char)is.Take(); + c |= (unsigned char)is.Take() << 8; + return c; + } +}; + +template +struct UTF16BE : UTF16 { + template + static void TakeBOM(InputStream& is) { + if ((unsigned char)is.Peek() != 0xFE) return; + is.Take(); + if ((unsigned char)is.Peek() != 0xFF) return; + is.Take(); + } + + template + RAPIDJSON_FORCEINLINE static CharType Take(InputStream& is) { + CharType c = (unsigned char)is.Take() << 8; + c |= (unsigned char)is.Take(); + return c; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// UTF32 + +//! UTF-32 encoding. +/*! http://en.wikipedia.org/wiki/UTF-32 + \tparam Ch Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. + \implements Encoding +*/ +template +struct UTF32 { + typedef CharType Ch; + + template + static void Encode(OutputStream& os, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + os.Put(codepoint); + } + + template + RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { + Ch c = is.Take(); + *codepoint = c; + return c <= 0x10FFFF; + } + + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + Ch c; + os.Put(c = is.Take()); + return c <= 0x10FFFF; + } +}; + +template +struct UTF32LE : UTF32 { + template + static void TakeBOM(InputStream& is) { + if ((unsigned char)is.Peek() != 0xFF) return; + is.Take(); + if ((unsigned char)is.Peek() != 0xFE) return; + is.Take(); + if ((unsigned char)is.Peek() != 0x00) return; + is.Take(); + if ((unsigned char)is.Peek() != 0x00) return; + is.Take(); + } + + template + RAPIDJSON_FORCEINLINE static CharType Take(InputStream& is) { + CharType c = (unsigned char)is.Take(); + c |= (unsigned char)is.Take() << 8; + c |= (unsigned char)is.Take() << 16; + c |= (unsigned char)is.Take() << 24; + return c; + } +}; + +template +struct UTF32BE : UTF32 { + template + static void TakeBOM(InputStream& is) { + if ((unsigned char)is.Peek() != 0x00) return; + is.Take(); + if ((unsigned char)is.Peek() != 0x00) return; + is.Take(); + if ((unsigned char)is.Peek() != 0xFE) return; + is.Take(); + if ((unsigned char)is.Peek() != 0xFF) return; + is.Take(); + } + + template + RAPIDJSON_FORCEINLINE static CharType Take(InputStream& is) { + CharType c = (unsigned char)is.Take() << 24; + c |= (unsigned char)is.Take() << 16; + c |= (unsigned char)is.Take() << 8; + c |= (unsigned char)is.Take(); + return c; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// AutoUTF + +enum UTFType { + kUTF8 = 0, + kUTF16LE = 1, + kUTF16BE = 2, + kUTF32LE = 3, + kUTF32BE = 4, +}; + +// Dynamically select encoding according to BOM or user setting. +template +struct AutoUTF { + typedef CharType Ch; + + template + RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) { + typedef void (*EncodeFunc)(OutputStream&, unsigned); + static const EncodeFunc f[] = { + UTF8::Encode, + UTF16::Encode, + UTF16::Encode, + UTF32::Encode, + UTF32::Encode, + }; + (*f[os.type_])(os, codepoint); + } + + template + RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { + typedef bool (*DecodeFunc)(InputStream&, unsigned*); + static const DecodeFunc f[] = { + UTF8::Decode, + UTF16::Decode, + UTF16::Decode, + UTF32::Decode, + UTF32::Decode, + }; + return (*f[is.type_])(is, codepoint); + } + + template + RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { + typedef bool (*ValidateFunc)(InputStream&, unsigned*); + static const ValidateFunc f[] = { + UTF8::Decode, + UTF16::Decode, + UTF16::Decode, + UTF32::Decode, + UTF32::Decode, + }; + return (*f[is.type_])(is, os); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// Transcoder + +template +struct Transcoder { + template + static bool Transcode(InputStream& is, OutputStream& os) { + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + TargetEncoding::Encode(os, codepoint); + return true; + } + + template + static bool Validate(InputStream& is, OutputStream& os) { + return Transcode(is, os); + } +}; + +//! Specialization of Transcoder with same source and target encoding. +template +struct Transcoder { + template + static bool Transcode(InputStream& is, OutputStream& os) { + os.Put(is.Take()); + return true; + } + + template + static bool Validate(InputStream& is, OutputStream& os) { + return Encoding::Validate(is, os); + } +}; + +} // namespace rapidjson + +#endif // RAPIDJSON_ENCODINGS_H_ diff --git a/include/rapidjson/rapidjson.h b/include/rapidjson/rapidjson.h index c646dce..5ea713a 100644 --- a/include/rapidjson/rapidjson.h +++ b/include/rapidjson/rapidjson.h @@ -82,456 +82,11 @@ typedef unsigned SizeType; #define RAPIDJSON_ASSERT(x) assert(x) #endif // RAPIDJSON_ASSERT +#include "allocators.h" +#include "encodings.h" + namespace rapidjson { -/////////////////////////////////////////////////////////////////////////////// -// Allocator - -/*! \class rapidjson::Allocator - \brief Concept for allocating, resizing and freeing memory block. - - Note that Malloc() and Realloc() are non-static but Free() is static. - - So if an allocator need to support Free(), it needs to put its pointer in - the header of memory block. - -\code -concept Allocator { - static const bool kNeedFree; //!< Whether this allocator needs to call Free(). - - // Allocate a memory block. - // \param size of the memory block in bytes. - // \returns pointer to the memory block. - void* Malloc(size_t size); - - // Resize a memory block. - // \param originalPtr The pointer to current memory block. Null pointer is permitted. - // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.) - // \param newSize the new size in bytes. - void* Realloc(void* originalPtr, size_t originalSize, size_t newSize); - - // Free a memory block. - // \param pointer to the memory block. Null pointer is permitted. - static void Free(void *ptr); -}; -\endcode -*/ - -/////////////////////////////////////////////////////////////////////////////// -// CrtAllocator - -//! C-runtime library allocator. -/*! This class is just wrapper for standard C library memory routines. - \implements Allocator -*/ -class CrtAllocator { -public: - static const bool kNeedFree = true; - void* Malloc(size_t size) { return malloc(size); } - void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { return realloc(originalPtr, newSize); } - static void Free(void *ptr) { free(ptr); } -}; - -/////////////////////////////////////////////////////////////////////////////// -// MemoryPoolAllocator - -//! Default memory allocator used by the parser and DOM. -/*! This allocator allocate memory blocks from pre-allocated memory chunks. - - It does not free memory blocks. And Realloc() only allocate new memory. - - The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default. - - User may also supply a buffer as the first chunk. - - If the user-buffer is full then additional chunks are allocated by BaseAllocator. - - The user-buffer is not deallocated by this allocator. - - \tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator. - \implements Allocator -*/ -template -class MemoryPoolAllocator { -public: - static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator) - - //! Constructor with chunkSize. - /*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. - \param baseAllocator The allocator for allocating memory chunks. - */ - MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : - chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0) - { - if (!baseAllocator_) - ownBaseAllocator_ = baseAllocator_ = new BaseAllocator(); - AddChunk(chunk_capacity_); - } - - //! Constructor with user-supplied buffer. - /*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size. - - The user buffer will not be deallocated when this allocator is destructed. - - \param buffer User supplied buffer. - \param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader). - \param chunkSize The size of memory chunk. The default is kDefaultChunkSize. - \param baseAllocator The allocator for allocating memory chunks. - */ - MemoryPoolAllocator(char *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : - chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0) - { - RAPIDJSON_ASSERT(buffer != 0); - RAPIDJSON_ASSERT(size > sizeof(ChunkHeader)); - chunkHead_ = (ChunkHeader*)buffer; - chunkHead_->capacity = size - sizeof(ChunkHeader); - chunkHead_->size = 0; - chunkHead_->next = 0; - } - - //! Destructor. - /*! This deallocates all memory chunks, excluding the user-supplied buffer. - */ - ~MemoryPoolAllocator() { - Clear(); - delete ownBaseAllocator_; - } - - //! Deallocates all memory chunks, excluding the user-supplied buffer. - void Clear() { - while(chunkHead_ != 0 && chunkHead_ != (ChunkHeader *)userBuffer_) { - ChunkHeader* next = chunkHead_->next; - baseAllocator_->Free(chunkHead_); - chunkHead_ = next; - } - } - - //! Computes the total capacity of allocated memory chunks. - /*! \return total capacity in bytes. - */ - size_t Capacity() { - size_t capacity = 0; - for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) - capacity += c->capacity; - return capacity; - } - - //! Computes the memory blocks allocated. - /*! \return total used bytes. - */ - size_t Size() { - size_t size = 0; - for (ChunkHeader* c = chunkHead_; c != 0; c = c->next) - size += c->size; - return size; - } - - //! Allocates a memory block. (concept Allocator) - void* Malloc(size_t size) { - if (chunkHead_->size + size > chunkHead_->capacity) - AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size); - - char *buffer = (char *)(chunkHead_ + 1) + chunkHead_->size; - chunkHead_->size += size; - return buffer; - } - - //! Resizes a memory block (concept Allocator) - void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { - if (originalPtr == 0) - return Malloc(newSize); - - // Do not shrink if new size is smaller than original - if (originalSize >= newSize) - return originalPtr; - - // Simply expand it if it is the last allocation and there is sufficient space - if (originalPtr == (char *)(chunkHead_ + 1) + chunkHead_->size - originalSize) { - size_t increment = newSize - originalSize; - if (chunkHead_->size + increment <= chunkHead_->capacity) { - chunkHead_->size += increment; - return originalPtr; - } - } - - // Realloc process: allocate and copy memory, do not free original buffer. - void* newBuffer = Malloc(newSize); - RAPIDJSON_ASSERT(newBuffer != 0); // Do not handle out-of-memory explicitly. - return memcpy(newBuffer, originalPtr, originalSize); - } - - //! Frees a memory block (concept Allocator) - static void Free(void *ptr) {} // Do nothing - -private: - //! Creates a new chunk. - /*! \param capacity Capacity of the chunk in bytes. - */ - void AddChunk(size_t capacity) { - ChunkHeader* chunk = (ChunkHeader*)baseAllocator_->Malloc(sizeof(ChunkHeader) + capacity); - chunk->capacity = capacity; - chunk->size = 0; - chunk->next = chunkHead_; - chunkHead_ = chunk; - } - - static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity. - - //! Chunk header for perpending to each chunk. - /*! Chunks are stored as a singly linked list. - */ - struct ChunkHeader { - size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself). - size_t size; //!< Current size of allocated memory in bytes. - ChunkHeader *next; //!< Next chunk in the linked list. - }; - - ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation. - size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated. - char *userBuffer_; //!< User supplied buffer. - BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks. - BaseAllocator* ownBaseAllocator_; //!< base allocator created by this object. -}; - -/////////////////////////////////////////////////////////////////////////////// -// Encoding - -/*! \class rapidjson::Encoding - \brief Concept for encoding of Unicode characters. - -\code -concept Encoding { - typename Ch; //! Type of character. - - //! \brief Encode a Unicode codepoint to a stream. - //! \param os Output stream. - //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. - template - static void Encode(OutputStream& os, unsigned codepoint) { - - //! \brief Validate one Unicode codepoint from an encoded stream. - //! \param is Input stream to obtain codepoint. - //! \param os Output for copying one codepoint. - //! \return true if it is valid. - //! \note This function just validating and copying the codepoint without actually decode it. - template - RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { -}; -\endcode -*/ - -/////////////////////////////////////////////////////////////////////////////// -// UTF8 - -//! UTF-8 encoding. -/*! http://en.wikipedia.org/wiki/UTF-8 - http://tools.ietf.org/html/rfc3629 - \tparam CharType Type for storing 8-bit UTF-8 data. Default is char. - \implements Encoding -*/ -template -struct UTF8 { - typedef CharType Ch; - - template - static void Encode(OutputStream& os, unsigned codepoint) { - if (codepoint <= 0x7F) - os.Put(codepoint & 0xFF); - else if (codepoint <= 0x7FF) { - os.Put(0xC0 | ((codepoint >> 6) & 0xFF)); - os.Put(0x80 | ((codepoint & 0x3F))); - } - else if (codepoint <= 0xFFFF) { - os.Put(0xE0 | ((codepoint >> 12) & 0xFF)); - os.Put(0x80 | ((codepoint >> 6) & 0x3F)); - os.Put(0x80 | (codepoint & 0x3F)); - } - else { - RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); - os.Put(0xF0 | ((codepoint >> 18) & 0xFF)); - os.Put(0x80 | ((codepoint >> 12) & 0x3F)); - os.Put(0x80 | ((codepoint >> 6) & 0x3F)); - os.Put(0x80 | (codepoint & 0x3F)); - } - } - - template - static bool Decode(InputStream& is, unsigned* codepoint) { -#define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu) -#define TRANS(mask) result &= ((GetType(c) & mask) != 0) -#define TAIL() COPY(); TRANS(0x70) - Ch c = is.Take(); - if (!(c & 0x80)) { - *codepoint = (unsigned char)c; - return true; - } - - unsigned char type = GetType(c); - *codepoint = (0xFF >> type) & (unsigned char)c; - bool result = true; - switch (type) { - case 2: TAIL(); return result; - case 3: TAIL(); TAIL(); return result; - case 4: COPY(); TRANS(0x50); TAIL(); return result; - case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return result; - case 6: TAIL(); TAIL(); TAIL(); return result; - case 10: COPY(); TRANS(0x20); TAIL(); return result; - case 11: COPY(); TRANS(0x60); TAIL(); return result; - default: return false; - } -#undef COPY -#undef TRANS -#undef TAIL - } - - template - RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { -#define COPY() os.Put(c = is.Take()) -#define TRANS(mask) result &= ((GetType(c) & mask) != 0) -#define TAIL() COPY(); TRANS(0x70) - Ch c; - COPY(); - if (!(c & 0x80)) - return true; - - bool result = true; - switch (GetType(c)) { - case 2: TAIL(); return result; - case 3: TAIL(); TAIL(); return result; - case 4: COPY(); TRANS(0x50); TAIL(); return result; - case 5: COPY(); TRANS(0x10); COPY(); TAIL(); return result; - case 6: TAIL(); TAIL(); TAIL(); return result; - case 10: COPY(); TRANS(0x20); TAIL(); return result; - case 11: COPY(); TRANS(0x60); TAIL(); return result; - default: return false; - } -#undef COPY -#undef TRANS -#undef TAIL - } - - RAPIDJSON_FORCEINLINE static unsigned char GetType(unsigned char c) { - // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ - // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. - static const unsigned char type[] = { - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, - 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, - 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, - 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, - 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, - }; - return type[c]; - } -}; - -/////////////////////////////////////////////////////////////////////////////// -// UTF16 - -//! UTF-16 encoding. -/*! http://en.wikipedia.org/wiki/UTF-16 - http://tools.ietf.org/html/rfc2781 - \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. - \implements Encoding -*/ -template -struct UTF16 { - typedef CharType Ch; - - template - static void Encode(OutputStream& os, unsigned codepoint) { - if (codepoint <= 0xFFFF) { - RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair - os.Put(codepoint); - } - else { - RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); - unsigned v = codepoint - 0x10000; - os.Put((v >> 10) + 0xD800); - os.Put((v & 0x3FF) + 0xDC00); - } - } - - template - RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { - Ch c; - os.Put(c = is.Take()); - if (c < 0xD800 || c > 0xDFFF) - return true; - else if (c < 0xDBFF) { - os.Put(c = is.Take()); - return c >= 0xDC00 && c <= 0xDFFF; - } - else - return false; - } -}; - -/////////////////////////////////////////////////////////////////////////////// -// UTF32 - -//! UTF-32 encoding. -/*! http://en.wikipedia.org/wiki/UTF-32 - \tparam Ch Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. - \implements Encoding -*/ -template -struct UTF32 { - typedef CharType Ch; - - template - static void Encode(OutputStream& os, unsigned codepoint) { - RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); - os.Put(codepoint); - } - - template - RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { - Ch c; - os.Put(c = is.Take()); - return c <= 0x10FFFF; - } -}; - -/////////////////////////////////////////////////////////////////////////////// -// Transcoder - -template -struct Transcoder { - template - static bool Transcode(InputStream& is, OutputStream& os) { - unsigned codepoint; - if (!SourceEncoding::Decode(is, &codepoint)) - return false; - TargetEncoding::Encode(os, codepoint); - return true; - } - - template - static bool Validate(InputStream& is, OutputStream& os) { - return Transcode(is, os); - } -}; - -//! Specialization of Transcoder with same source and target encoding. -template -struct Transcoder { - template - static bool Transcode(InputStream& is, OutputStream& os) { - os.Put(is.Take()); - return true; - } - - template - static bool Validate(InputStream& is, OutputStream& os) { - return Encoding::Validate(is, os); - } -}; - /////////////////////////////////////////////////////////////////////////////// // Stream diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 62ce232..ab2ceac 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -5,6 +5,7 @@ // Version 0.1 #include "rapidjson.h" +#include "encodings.h" #include "internal/pow10.h" #include "internal/stack.h" #include diff --git a/test/perftest/perftest.h b/test/perftest/perftest.h index c206c8c..57640d2 100644 --- a/test/perftest/perftest.h +++ b/test/perftest/perftest.h @@ -6,7 +6,7 @@ #define TEST_YAJL 0 #define TEST_ULTRAJSON 0 #define TEST_PLATFORM 0 -#define TEST_MISC 1 +#define TEST_MISC 0 #if TEST_RAPIDJSON //#define RAPIDJSON_SSE2 diff --git a/test/unittest/filestreamtest.cpp b/test/unittest/filestreamtest.cpp index a154769..7473ff4 100644 --- a/test/unittest/filestreamtest.cpp +++ b/test/unittest/filestreamtest.cpp @@ -2,6 +2,7 @@ #include "rapidjson/filestream.h" #include "rapidjson/filereadstream.h" #include "rapidjson/filewritestream.h" +#include "rapidjson/encodedstream.h" using namespace rapidjson; @@ -31,22 +32,23 @@ protected: size_t length_; }; -TEST_F(FileStreamTest, FileStream_Read) { - FILE *fp = fopen(filename_, "rb"); - ASSERT_TRUE(fp != 0); - FileStream s(fp); - - for (size_t i = 0; i < length_; i++) { - EXPECT_EQ(json_[i], s.Peek()); - EXPECT_EQ(json_[i], s.Peek()); // 2nd time should be the same - EXPECT_EQ(json_[i], s.Take()); - } - - EXPECT_EQ(length_, s.Tell()); - EXPECT_EQ('\0', s.Peek()); - - fclose(fp); -} +// Depreciated +//TEST_F(FileStreamTest, FileStream_Read) { +// FILE *fp = fopen(filename_, "rb"); +// ASSERT_TRUE(fp != 0); +// FileStream s(fp); +// +// for (size_t i = 0; i < length_; i++) { +// EXPECT_EQ(json_[i], s.Peek()); +// EXPECT_EQ(json_[i], s.Peek()); // 2nd time should be the same +// EXPECT_EQ(json_[i], s.Take()); +// } +// +// EXPECT_EQ(length_, s.Tell()); +// EXPECT_EQ('\0', s.Peek()); +// +// fclose(fp); +//} TEST_F(FileStreamTest, FileReadStream) { FILE *fp = fopen(filename_, "rb");