diff --git a/.gitignore b/.gitignore index d97a316..2c412c2 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ !/bin/data !/bin/encodings !/bin/jsonchecker +!/bin/types /build /doc/html /doc/doxygen_*.db diff --git a/bin/types/booleans.json b/bin/types/booleans.json new file mode 100755 index 0000000..2dcbb5f Binary files /dev/null and b/bin/types/booleans.json differ diff --git a/bin/types/floats.json b/bin/types/floats.json new file mode 100755 index 0000000..12b94a1 Binary files /dev/null and b/bin/types/floats.json differ diff --git a/bin/types/guids.json b/bin/types/guids.json new file mode 100755 index 0000000..9d7f5db Binary files /dev/null and b/bin/types/guids.json differ diff --git a/bin/types/integers.json b/bin/types/integers.json new file mode 100755 index 0000000..5dd05e0 Binary files /dev/null and b/bin/types/integers.json differ diff --git a/bin/types/mixed.json b/bin/types/mixed.json new file mode 100755 index 0000000..43e9a1d Binary files /dev/null and b/bin/types/mixed.json differ diff --git a/bin/types/nulls.json b/bin/types/nulls.json new file mode 100755 index 0000000..7a636ec Binary files /dev/null and b/bin/types/nulls.json differ diff --git a/bin/types/paragraphs.json b/bin/types/paragraphs.json new file mode 100755 index 0000000..8ab3e1c Binary files /dev/null and b/bin/types/paragraphs.json differ diff --git a/bin/types/readme.txt b/bin/types/readme.txt new file mode 100644 index 0000000..da1dae6 --- /dev/null +++ b/bin/types/readme.txt @@ -0,0 +1 @@ +Test data obtained from https://github.com/xpol/lua-rapidjson/tree/master/performance diff --git a/include/rapidjson/encodings.h b/include/rapidjson/encodings.h index f37f9e1..cc676d8 100644 --- a/include/rapidjson/encodings.h +++ b/include/rapidjson/encodings.h @@ -120,6 +120,28 @@ struct UTF8 { } } + template + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + if (codepoint <= 0x7F) + PutUnsafe(os, static_cast(codepoint & 0xFF)); + else if (codepoint <= 0x7FF) { + PutUnsafe(os, static_cast(0xC0 | ((codepoint >> 6) & 0xFF))); + PutUnsafe(os, static_cast(0x80 | ((codepoint & 0x3F)))); + } + else if (codepoint <= 0xFFFF) { + PutUnsafe(os, static_cast(0xE0 | ((codepoint >> 12) & 0xFF))); + PutUnsafe(os, static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + PutUnsafe(os, static_cast(0x80 | (codepoint & 0x3F))); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + PutUnsafe(os, static_cast(0xF0 | ((codepoint >> 18) & 0xFF))); + PutUnsafe(os, static_cast(0x80 | ((codepoint >> 12) & 0x3F))); + PutUnsafe(os, static_cast(0x80 | ((codepoint >> 6) & 0x3F))); + PutUnsafe(os, static_cast(0x80 | (codepoint & 0x3F))); + } + } + template static bool Decode(InputStream& is, unsigned* codepoint) { #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast(c) & 0x3Fu) @@ -261,6 +283,22 @@ struct UTF16 { } } + + template + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); + if (codepoint <= 0xFFFF) { + RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair + PutUnsafe(os, static_cast(codepoint)); + } + else { + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + unsigned v = codepoint - 0x10000; + PutUnsafe(os, static_cast((v >> 10) | 0xD800)); + PutUnsafe(os, (v & 0x3FF) | 0xDC00); + } + } + template static bool Decode(InputStream& is, unsigned* codepoint) { RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); @@ -386,6 +424,13 @@ struct UTF32 { os.Put(codepoint); } + template + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); + RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); + PutUnsafe(os, codepoint); + } + template static bool Decode(InputStream& is, unsigned* codepoint) { RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); @@ -501,6 +546,12 @@ struct ASCII { os.Put(static_cast(codepoint & 0xFF)); } + template + static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + RAPIDJSON_ASSERT(codepoint <= 0x7F); + PutUnsafe(os, static_cast(codepoint & 0xFF)); + } + template static bool Decode(InputStream& is, unsigned* codepoint) { uint8_t c = static_cast(is.Take()); @@ -571,6 +622,13 @@ struct AutoUTF { (*f[os.GetType()])(os, codepoint); } + template + RAPIDJSON_FORCEINLINE static void EncodeUnsafe(OutputStream& os, unsigned codepoint) { + typedef void (*EncodeFunc)(OutputStream&, unsigned); + static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) }; + (*f[os.GetType()])(os, codepoint); + } + template RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { typedef bool (*DecodeFunc)(InputStream&, unsigned*); @@ -604,6 +662,15 @@ struct Transcoder { return true; } + template + RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) { + unsigned codepoint; + if (!SourceEncoding::Decode(is, &codepoint)) + return false; + TargetEncoding::EncodeUnsafe(os, codepoint); + return true; + } + //! Validate one Unicode codepoint from an encoded stream. template RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { @@ -611,6 +678,10 @@ struct Transcoder { } }; +// Forward declaration. +template +inline void PutUnsafe(Stream& stream, typename Stream::Ch c); + //! Specialization of Transcoder with same source and target encoding. template struct Transcoder { @@ -620,6 +691,12 @@ struct Transcoder { return true; } + template + RAPIDJSON_FORCEINLINE static bool TranscodeUnsafe(InputStream& is, OutputStream& os) { + PutUnsafe(os, is.Take()); // Just copy one code unit. This semantic is different from primary template class. + return true; + } + template RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { return Encoding::Validate(is, os); // source/target encoding are the same diff --git a/include/rapidjson/internal/stack.h b/include/rapidjson/internal/stack.h index 5e5cda1..7c8294b 100644 --- a/include/rapidjson/internal/stack.h +++ b/include/rapidjson/internal/stack.h @@ -108,11 +108,21 @@ public: // Optimization note: try to minimize the size of this function for force inline. // Expansion is run very infrequently, so it is moved to another (probably non-inline) function. template - RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) { + RAPIDJSON_FORCEINLINE void Reserve(size_t count = 1) { // Expand the stack if needed if (RAPIDJSON_UNLIKELY(stackTop_ + sizeof(T) * count >= stackEnd_)) Expand(count); + } + template + RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) { + Reserve(count); + return PushUnsafe(count); + } + + template + RAPIDJSON_FORCEINLINE T* PushUnsafe(size_t count = 1) { + RAPIDJSON_ASSERT(stackTop_ + sizeof(T) * count < stackEnd_); T* ret = reinterpret_cast(stackTop_); stackTop_ += sizeof(T) * count; return ret; diff --git a/include/rapidjson/rapidjson.h b/include/rapidjson/rapidjson.h index 0023abe..a90a4a0 100644 --- a/include/rapidjson/rapidjson.h +++ b/include/rapidjson/rapidjson.h @@ -614,11 +614,25 @@ struct StreamTraits { enum { copyOptimization = 0 }; }; +//! Reserve n characters for writing to a stream. +template +inline void PutReserve(Stream& stream, size_t count) { + (void)stream; + (void)count; +} + +//! Write character to a stream, presuming buffer is reserved. +template +inline void PutUnsafe(Stream& stream, typename Stream::Ch c) { + stream.Put(c); +} + //! Put N copies of a character to a stream. template inline void PutN(Stream& stream, Ch c, size_t n) { + PutReserve(stream, n); for (size_t i = 0; i < n; i++) - stream.Put(c); + PutUnsafe(stream, c); } /////////////////////////////////////////////////////////////////////////////// diff --git a/include/rapidjson/stringbuffer.h b/include/rapidjson/stringbuffer.h index e9be849..40b51cd 100644 --- a/include/rapidjson/stringbuffer.h +++ b/include/rapidjson/stringbuffer.h @@ -48,6 +48,7 @@ public: #endif void Put(Ch c) { *stack_.template Push() = c; } + void PutUnsafe(Ch c) { *stack_.template PushUnsafe() = c; } void Flush() {} void Clear() { stack_.Clear(); } @@ -57,6 +58,8 @@ public: stack_.ShrinkToFit(); stack_.template Pop(1); } + + void Reserve(size_t count) { stack_.template Reserve(count); } Ch* Push(size_t count) { return stack_.template Push(count); } void Pop(size_t count) { stack_.template Pop(count); } @@ -82,6 +85,16 @@ private: //! String buffer with UTF8 encoding typedef GenericStringBuffer > StringBuffer; +template +inline void PutReserve(GenericStringBuffer& stream, size_t count) { + stream.Reserve(count); +} + +template +inline void PutUnsafe(GenericStringBuffer& stream, typename Encoding::Ch c) { + stream.PutUnsafe(c); +} + //! Implement specialized version of PutN() with memset() for better performance. template<> inline void PutN(GenericStringBuffer >& stream, char c, size_t n) { diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h index a450456..13db449 100644 --- a/include/rapidjson/writer.h +++ b/include/rapidjson/writer.h @@ -189,15 +189,18 @@ protected: static const size_t kDefaultLevelDepth = 32; bool WriteNull() { - os_->Put('n'); os_->Put('u'); os_->Put('l'); os_->Put('l'); return true; + PutReserve(*os_, 4); + PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l'); return true; } bool WriteBool(bool b) { if (b) { - os_->Put('t'); os_->Put('r'); os_->Put('u'); os_->Put('e'); + PutReserve(*os_, 4); + PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'r'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'e'); } else { - os_->Put('f'); os_->Put('a'); os_->Put('l'); os_->Put('s'); os_->Put('e'); + PutReserve(*os_, 5); + PutUnsafe(*os_, 'f'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 's'); PutUnsafe(*os_, 'e'); } return true; } @@ -205,40 +208,45 @@ protected: bool WriteInt(int i) { char buffer[11]; const char* end = internal::i32toa(i, buffer); + PutReserve(*os_, static_cast(end - buffer)); for (const char* p = buffer; p != end; ++p) - os_->Put(static_cast(*p)); + PutUnsafe(*os_, static_cast(*p)); return true; } bool WriteUint(unsigned u) { char buffer[10]; const char* end = internal::u32toa(u, buffer); + PutReserve(*os_, static_cast(end - buffer)); for (const char* p = buffer; p != end; ++p) - os_->Put(static_cast(*p)); + PutUnsafe(*os_, static_cast(*p)); return true; } bool WriteInt64(int64_t i64) { char buffer[21]; const char* end = internal::i64toa(i64, buffer); + PutReserve(*os_, static_cast(end - buffer)); for (const char* p = buffer; p != end; ++p) - os_->Put(static_cast(*p)); + PutUnsafe(*os_, static_cast(*p)); return true; } bool WriteUint64(uint64_t u64) { char buffer[20]; char* end = internal::u64toa(u64, buffer); + PutReserve(*os_, static_cast(end - buffer)); for (char* p = buffer; p != end; ++p) - os_->Put(static_cast(*p)); + PutUnsafe(*os_, static_cast(*p)); return true; } bool WriteDouble(double d) { char buffer[25]; char* end = internal::dtoa(d, buffer); + PutReserve(*os_, static_cast(end - buffer)); for (char* p = buffer; p != end; ++p) - os_->Put(static_cast(*p)); + PutUnsafe(*os_, static_cast(*p)); return true; } @@ -256,7 +264,12 @@ protected: #undef Z16 }; - os_->Put('\"'); + if (TargetEncoding::supportUnicode) + PutReserve(*os_, 2 + length * 6); // "\uxxxx..." + else + PutReserve(*os_, 2 + length * 12); // "\uxxxx\uyyyy..." + + PutUnsafe(*os_, '\"'); GenericStringStream is(str); while (is.Tell() < length) { const Ch c = is.Peek(); @@ -265,13 +278,13 @@ protected: unsigned codepoint; if (!SourceEncoding::Decode(is, &codepoint)) return false; - os_->Put('\\'); - os_->Put('u'); + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, 'u'); if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) { - os_->Put(hexDigits[(codepoint >> 12) & 15]); - os_->Put(hexDigits[(codepoint >> 8) & 15]); - os_->Put(hexDigits[(codepoint >> 4) & 15]); - os_->Put(hexDigits[(codepoint ) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(codepoint ) & 15]); } else { RAPIDJSON_ASSERT(codepoint >= 0x010000 && codepoint <= 0x10FFFF); @@ -279,34 +292,34 @@ protected: unsigned s = codepoint - 0x010000; unsigned lead = (s >> 10) + 0xD800; unsigned trail = (s & 0x3FF) + 0xDC00; - os_->Put(hexDigits[(lead >> 12) & 15]); - os_->Put(hexDigits[(lead >> 8) & 15]); - os_->Put(hexDigits[(lead >> 4) & 15]); - os_->Put(hexDigits[(lead ) & 15]); - os_->Put('\\'); - os_->Put('u'); - os_->Put(hexDigits[(trail >> 12) & 15]); - os_->Put(hexDigits[(trail >> 8) & 15]); - os_->Put(hexDigits[(trail >> 4) & 15]); - os_->Put(hexDigits[(trail ) & 15]); + PutUnsafe(*os_, hexDigits[(lead >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(lead >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(lead >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(lead ) & 15]); + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, 'u'); + PutUnsafe(*os_, hexDigits[(trail >> 12) & 15]); + PutUnsafe(*os_, hexDigits[(trail >> 8) & 15]); + PutUnsafe(*os_, hexDigits[(trail >> 4) & 15]); + PutUnsafe(*os_, hexDigits[(trail ) & 15]); } } else if ((sizeof(Ch) == 1 || static_cast(c) < 256) && escape[static_cast(c)]) { is.Take(); - os_->Put('\\'); - os_->Put(static_cast(escape[static_cast(c)])); + PutUnsafe(*os_, '\\'); + PutUnsafe(*os_, static_cast(escape[static_cast(c)])); if (escape[static_cast(c)] == 'u') { - os_->Put('0'); - os_->Put('0'); - os_->Put(hexDigits[static_cast(c) >> 4]); - os_->Put(hexDigits[static_cast(c) & 0xF]); + PutUnsafe(*os_, '0'); + PutUnsafe(*os_, '0'); + PutUnsafe(*os_, hexDigits[static_cast(c) >> 4]); + PutUnsafe(*os_, hexDigits[static_cast(c) & 0xF]); } } else - if (!Transcoder::Transcode(is, *os_)) + if (!Transcoder::TranscodeUnsafe(is, *os_)) return false; } - os_->Put('\"'); + PutUnsafe(*os_, '\"'); return true; } diff --git a/test/perftest/perftest.h b/test/perftest/perftest.h index 2b0984c..2afe641 100644 --- a/test/perftest/perftest.h +++ b/test/perftest/perftest.h @@ -65,44 +65,87 @@ public: PerfTest() : filename_(), json_(), length_(), whitespace_(), whitespace_length_() {} virtual void SetUp() { + { + const char *paths[] = { + "data/sample.json", + "bin/data/sample.json", + "../bin/data/sample.json", + "../../bin/data/sample.json", + "../../../bin/data/sample.json" + }; - const char *paths[] = { - "data/sample.json", - "bin/data/sample.json", - "../bin/data/sample.json", - "../../bin/data/sample.json", - "../../../bin/data/sample.json" - }; - FILE *fp = 0; - for (size_t i = 0; i < sizeof(paths) / sizeof(paths[0]); i++) { - fp = fopen(filename_ = paths[i], "rb"); - if (fp) - break; + FILE *fp = 0; + for (size_t i = 0; i < sizeof(paths) / sizeof(paths[0]); i++) { + fp = fopen(filename_ = paths[i], "rb"); + if (fp) + break; + } + ASSERT_TRUE(fp != 0); + + fseek(fp, 0, SEEK_END); + length_ = (size_t)ftell(fp); + fseek(fp, 0, SEEK_SET); + json_ = (char*)malloc(length_ + 1); + ASSERT_EQ(length_, fread(json_, 1, length_, fp)); + json_[length_] = '\0'; + fclose(fp); } - ASSERT_TRUE(fp != 0); - - fseek(fp, 0, SEEK_END); - length_ = (size_t)ftell(fp); - fseek(fp, 0, SEEK_SET); - json_ = (char*)malloc(length_ + 1); - ASSERT_EQ(length_, fread(json_, 1, length_, fp)); - json_[length_] = '\0'; - fclose(fp); // whitespace test - whitespace_length_ = 1024 * 1024; - whitespace_ = (char *)malloc(whitespace_length_ + 4); - char *p = whitespace_; - for (size_t i = 0; i < whitespace_length_; i += 4) { - *p++ = ' '; - *p++ = '\n'; - *p++ = '\r'; - *p++ = '\t'; + { + whitespace_length_ = 1024 * 1024; + whitespace_ = (char *)malloc(whitespace_length_ + 4); + char *p = whitespace_; + for (size_t i = 0; i < whitespace_length_; i += 4) { + *p++ = ' '; + *p++ = '\n'; + *p++ = '\r'; + *p++ = '\t'; + } + *p++ = '['; + *p++ = '0'; + *p++ = ']'; + *p++ = '\0'; + } + + // types test + { + const char *typespaths[] = { + "data/types", + "bin/types", + "../bin/types", + "../../bin/types/", + "../../../bin/types" + }; + + const char* typesfilenames[] = { + "booleans.json", + "floats.json", + "guids.json", + "integers.json", + "mixed.json", + "nulls.json", + "paragraphs.json" + }; + + for (size_t j = 0; j < sizeof(typesfilenames) / sizeof(typesfilenames[0]); j++) { + types_[j] = 0; + for (size_t i = 0; i < sizeof(typespaths) / sizeof(typespaths[0]); i++) { + char filename[256]; + sprintf(filename, "%s/%s", typespaths[i], typesfilenames[j]); + if (FILE* fp = fopen(filename, "rb")) { + fseek(fp, 0, SEEK_END); + size_t length = (size_t)ftell(fp); + fseek(fp, 0, SEEK_SET); + types_[j] = (char*)malloc(length + 1); + ASSERT_EQ(length, fread(types_[j], 1, length, fp)); + types_[j][length] = '\0'; + fclose(fp); + break; + } + } + } } - *p++ = '['; - *p++ = '0'; - *p++ = ']'; - *p++ = '\0'; } virtual void TearDown() { @@ -110,6 +153,10 @@ public: free(whitespace_); json_ = 0; whitespace_ = 0; + for (size_t i = 0; i < 7; i++) { + free(types_[i]); + types_[i] = 0; + } } private: @@ -122,6 +169,7 @@ protected: size_t length_; char *whitespace_; size_t whitespace_length_; + char *types_[7]; static const size_t kTrialCount = 1000; }; diff --git a/test/perftest/rapidjsontest.cpp b/test/perftest/rapidjsontest.cpp index 0594171..b9ac395 100644 --- a/test/perftest/rapidjsontest.cpp +++ b/test/perftest/rapidjsontest.cpp @@ -45,7 +45,10 @@ public: temp_ = (char *)malloc(length_ + 1); // Parse as a document - EXPECT_FALSE(doc_.Parse(json_).IsNull()); + EXPECT_FALSE(doc_.Parse(json_).HasParseError()); + + for (size_t i = 0; i < 7; i++) + EXPECT_FALSE(typesDoc_[i].Parse(types_[i]).HasParseError()); } virtual void TearDown() { @@ -60,6 +63,7 @@ private: protected: char *temp_; Document doc_; + Document typesDoc_[7]; }; TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler)) { @@ -250,8 +254,10 @@ TEST_F(RapidJson, DocumentAccept) { } struct NullStream { + typedef char Ch; + NullStream() /*: length_(0)*/ {} - void Put(char) { /*++length_;*/ } + void Put(Ch) { /*++length_;*/ } void Flush() {} //size_t length_; }; @@ -278,6 +284,25 @@ TEST_F(RapidJson, Writer_StringBuffer) { } } +#define TEST_TYPED(index, Name)\ +TEST_F(RapidJson, Writer_StringBuffer_##Name) {\ + for (size_t i = 0; i < kTrialCount * 10; i++) {\ + StringBuffer s(0, 1024 * 1024);\ + Writer writer(s);\ + typesDoc_[index].Accept(writer);\ + const char* str = s.GetString();\ + (void)str;\ + }\ +}\ + +TEST_TYPED(0, Booleans) +TEST_TYPED(1, Floats) +TEST_TYPED(2, Guids) +TEST_TYPED(3, Integers) +TEST_TYPED(4, Mixed) +TEST_TYPED(5, Nulls) +TEST_TYPED(6, Paragraphs) + TEST_F(RapidJson, PrettyWriter_StringBuffer) { for (size_t i = 0; i < kTrialCount; i++) { StringBuffer s(0, 2048 * 1024);