From e9597255b4f2035e1a586933a2f66056c38ed1eb Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 26 Jul 2014 19:14:07 +0800 Subject: [PATCH 01/10] Make variables in Transit() more localized --- include/rapidjson/reader.h | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 69d8cd8..09a929b 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -1026,9 +1026,9 @@ private: // May return a new state on state pop. template IterativeParsingState Transit(IterativeParsingState src, IterativeParsingToken token, IterativeParsingState dst, InputStream& is, Handler& handler) { - int c = 0; - IterativeParsingState n; - bool hr; + //int c = 0; + //IterativeParsingState n; + //bool hr; switch (dst) { case IterativeParsingStartState: @@ -1043,9 +1043,10 @@ private: case IterativeParsingObjectInitialState: case IterativeParsingArrayInitialState: + { // Push the state(Element or MemeberValue) if we are nested in another array or value of member. // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop. - n = src; + IterativeParsingState n = src; if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState) n = IterativeParsingElementState; else if (src == IterativeParsingKeyValueDelimiterState) @@ -1060,10 +1061,7 @@ private: // Initialize and push the member/element count. *stack_.template Push(1) = 0; // Call handler - if (dst == IterativeParsingObjectInitialState) - hr = handler.StartObject(); - else - hr = handler.StartArray(); + bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray(); // On handler short circuits the parsing. if (!hr) { RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); @@ -1073,6 +1071,7 @@ private: is.Take(); return dst; } + } case IterativeParsingMemberKeyState: ParseString(is, handler); @@ -1113,18 +1112,19 @@ private: return dst; case IterativeParsingObjectFinishState: + { // Get member count. - c = *stack_.template Pop(1); + int c = *stack_.template Pop(1); // If the object is not empty, count the last member. if (src == IterativeParsingMemberValueState) ++c; // Restore the state. - n = *stack_.template Pop(1); + IterativeParsingState n = *stack_.template Pop(1); // Transit to Finish state if this is the topmost scope. if (n == IterativeParsingStartState) n = IterativeParsingFinishState; // Call handler - hr = handler.EndObject(c); + bool hr = handler.EndObject(c); // On handler short circuits the parsing. if (!hr) { RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); @@ -1134,20 +1134,22 @@ private: is.Take(); return n; } + } case IterativeParsingArrayFinishState: + { // Get element count. - c = *stack_.template Pop(1); + int c = *stack_.template Pop(1); // If the array is not empty, count the last element. if (src == IterativeParsingElementState) ++c; // Restore the state. - n = *stack_.template Pop(1); + IterativeParsingState n = *stack_.template Pop(1); // Transit to Finish state if this is the topmost scope. if (n == IterativeParsingStartState) n = IterativeParsingFinishState; // Call handler - hr = handler.EndArray(c); + bool hr = handler.EndArray(c); // On handler short circuits the parsing. if (!hr) { RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); @@ -1157,6 +1159,7 @@ private: is.Take(); return n; } + } default: RAPIDJSON_ASSERT(false); From 01126defd2edd6cf2491ddecfe06e441b1a98a3d Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 26 Jul 2014 20:28:35 +0800 Subject: [PATCH 02/10] Make Stack::Push() force inline in normal path --- include/rapidjson/internal/stack.h | 31 ++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/include/rapidjson/internal/stack.h b/include/rapidjson/internal/stack.h index ff5ff82..8548465 100644 --- a/include/rapidjson/internal/stack.h +++ b/include/rapidjson/internal/stack.h @@ -28,20 +28,14 @@ public: void Clear() { /*stack_top_ = 0;*/ stack_top_ = stack_; } + // Optimization note: try to minimize the size of this function for force inline. + // Expansion is run very infrequently, so it is moved to another (probably non-inline) function. template - T* Push(size_t count = 1) { + RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) { // Expand the stack if needed - if (stack_top_ + sizeof(T) * count >= stack_end_) { - size_t new_capacity = stack_capacity_ * 2; - size_t size = GetSize(); - size_t new_size = GetSize() + sizeof(T) * count; - if (new_capacity < new_size) - new_capacity = new_size; - stack_ = (char*)allocator_->Realloc(stack_, stack_capacity_, new_capacity); - stack_capacity_ = new_capacity; - stack_top_ = stack_ + size; - stack_end_ = stack_ + stack_capacity_; - } + if (stack_top_ + sizeof(T) * count >= stack_end_) + Expand(count); + T* ret = reinterpret_cast(stack_top_); stack_top_ += sizeof(T) * count; return ret; @@ -69,6 +63,19 @@ public: size_t GetCapacity() const { return stack_capacity_; } private: + template + void Expand(size_t count) { + size_t new_capacity = stack_capacity_ * 2; + size_t size = GetSize(); + size_t new_size = GetSize() + sizeof(T) * count; + if (new_capacity < new_size) + new_capacity = new_size; + stack_ = (char*)allocator_->Realloc(stack_, stack_capacity_, new_capacity); + stack_capacity_ = new_capacity; + stack_top_ = stack_ + size; + stack_end_ = stack_ + stack_capacity_; + } + // Prohibit copy constructor & assignment operator. Stack(const Stack&); Stack& operator=(const Stack&); From 89865cb919b9b89682a79f7087dddc3b8308e75f Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 26 Jul 2014 21:23:37 +0800 Subject: [PATCH 03/10] Use lookup table for Tokenize() --- include/rapidjson/reader.h | 72 +++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 09a929b..d00c92b 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -829,44 +829,52 @@ private: }; // Tokens - enum IterativeParsingToken { - IterativeParsingLeftBracketToken = 0, - IterativeParsingRightBracketToken, + enum Token { + LeftBracketToken = 0, + RightBracketToken, - IterativeParsingLeftCurlyBracketToken, - IterativeParsingRightCurlyBracketToken, + LeftCurlyBracketToken, + RightCurlyBracketToken, - IterativeParsingCommaToken, - IterativeParsingColonToken, + CommaToken, + ColonToken, - IterativeParsingStringToken, - IterativeParsingFalseToken, - IterativeParsingTrueToken, - IterativeParsingNullToken, - IterativeParsingNumberToken, + StringToken, + FalseToken, + TrueToken, + NullToken, + NumberToken, - cIterativeParsingTokenCount + kTokenCount }; - IterativeParsingToken Tokenize(Ch c) { - switch (c) { - case '[': return IterativeParsingLeftBracketToken; - case ']': return IterativeParsingRightBracketToken; - case '{': return IterativeParsingLeftCurlyBracketToken; - case '}': return IterativeParsingRightCurlyBracketToken; - case ',': return IterativeParsingCommaToken; - case ':': return IterativeParsingColonToken; - case '"': return IterativeParsingStringToken; - case 'f': return IterativeParsingFalseToken; - case 't': return IterativeParsingTrueToken; - case 'n': return IterativeParsingNullToken; - default: return IterativeParsingNumberToken; - } + RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) { +#define N NumberToken +#define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N + // Maps from ASCII to Token + static const unsigned char tokenMap[256] = { + N16, // 00~0F + N16, // 10~1F + N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F + N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F + N16, // 40~4F + N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F + N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F + N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F + N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF + }; +#undef N +#undef N16 + + if (sizeof(Ch) == 1 || unsigned(c) < 256) + return (Token)tokenMap[(unsigned char)c]; + else + return NumberToken; } - IterativeParsingState Predict(IterativeParsingState state, IterativeParsingToken token) { + IterativeParsingState Predict(IterativeParsingState state, Token token) { // current state x one lookahead token -> new state - static const char G[cIterativeParsingStateCount][cIterativeParsingTokenCount] = { + static const char G[cIterativeParsingStateCount][kTokenCount] = { // Start { IterativeParsingArrayInitialState, // Left bracket @@ -1025,7 +1033,7 @@ private: // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). // May return a new state on state pop. template - IterativeParsingState Transit(IterativeParsingState src, IterativeParsingToken token, IterativeParsingState dst, InputStream& is, Handler& handler) { + IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { //int c = 0; //IterativeParsingState n; //bool hr; @@ -1081,7 +1089,7 @@ private: return dst; case IterativeParsingKeyValueDelimiterState: - if (token == IterativeParsingColonToken) { + if (token == ColonToken) { is.Take(); return dst; } @@ -1207,7 +1215,7 @@ private: SkipWhitespace(is); while (is.Peek() != '\0') { - IterativeParsingToken t = Tokenize(is.Peek()); + Token t = Tokenize(is.Peek()); IterativeParsingState n = Predict(state, t); IterativeParsingState d = Transit(state, t, n, is, handler); From e3e8fea0f311f646768438168a9bcfdd812bfff3 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 26 Jul 2014 21:40:11 +0800 Subject: [PATCH 04/10] Remove stack size limit feature It is not very useful for iterative parsing as the worst case of heap size is O(n) where n is number of character in JSON, for the worst synthetic cases. This is reasonable and should not create stack overflow security problem as in recursive parsing. --- include/rapidjson/document.h | 45 +++++++++++++-------------------- include/rapidjson/error/error.h | 1 - include/rapidjson/reader.h | 25 ++---------------- test/unittest/readertest.cpp | 12 --------- 4 files changed, 20 insertions(+), 63 deletions(-) diff --git a/include/rapidjson/document.h b/include/rapidjson/document.h index d94cd62..4448600 100644 --- a/include/rapidjson/document.h +++ b/include/rapidjson/document.h @@ -1221,13 +1221,12 @@ public: \tparam SourceEncoding Encoding of input stream \tparam InputStream Type of input stream, implementing Stream concept \param is Input stream to be parsed. - \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ template - GenericDocument& ParseStream(InputStream& is, size_t limit = 0) { + GenericDocument& ParseStream(InputStream& is) { ValueType::SetNull(); // Remove existing root if exist - GenericReader reader(limit, &GetAllocator()); + GenericReader reader(&GetAllocator()); ClearStackOnExit scope(*this); parseResult_ = reader.template Parse(is, *this); if (parseResult_) { @@ -1241,23 +1240,21 @@ public: /*! \tparam parseFlags Combination of \ref ParseFlag. \tparam InputStream Type of input stream, implementing Stream concept \param is Input stream to be parsed. - \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ template - GenericDocument& ParseStream(InputStream& is, size_t limit = 0) { - return ParseStream(is, limit); + GenericDocument& ParseStream(InputStream& is) { + return ParseStream(is); } //! Parse JSON text from an input stream (with \ref kParseDefaultFlags) /*! \tparam InputStream Type of input stream, implementing Stream concept \param is Input stream to be parsed. - \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ template - GenericDocument& ParseStream(InputStream& is, size_t limit = 0) { - return ParseStream(is, limit); + GenericDocument& ParseStream(InputStream& is) { + return ParseStream(is); } //!@} @@ -1268,33 +1265,30 @@ public: /*! \tparam parseFlags Combination of \ref ParseFlag. \tparam SourceEncoding Transcoding from input Encoding \param str Mutable zero-terminated string to be parsed. - \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ template - GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) { + GenericDocument& ParseInsitu(Ch* str) { GenericInsituStringStream s(str); - return ParseStream(s, limit); + return ParseStream(s); } //! Parse JSON text from a mutable string /*! \tparam parseFlags Combination of \ref ParseFlag. \param str Mutable zero-terminated string to be parsed. - \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ template - GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) { - return ParseInsitu(str, limit); + GenericDocument& ParseInsitu(Ch* str) { + return ParseInsitu(str); } //! Parse JSON text from a mutable string (with \ref kParseDefaultFlags) /*! \param str Mutable zero-terminated string to be parsed. - \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ - GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) { - return ParseInsitu(str, limit); + GenericDocument& ParseInsitu(Ch* str) { + return ParseInsitu(str); } //!@} @@ -1305,31 +1299,28 @@ public: /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). \tparam SourceEncoding Transcoding from input Encoding \param str Read-only zero-terminated string to be parsed. - \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. */ template - GenericDocument& Parse(const Ch* str, size_t limit = 0) { + GenericDocument& Parse(const Ch* str) { RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); GenericStringStream s(str); - return ParseStream(s, limit); + return ParseStream(s); } //! Parse JSON text from a read-only string /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). \param str Read-only zero-terminated string to be parsed. - \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. */ template - GenericDocument& Parse(const Ch* str, size_t limit = 0) { - return Parse(str, limit); + GenericDocument& Parse(const Ch* str) { + return Parse(str); } //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags) /*! \param str Read-only zero-terminated string to be parsed. - \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. */ - GenericDocument& Parse(const Ch* str, size_t limit = 0) { - return Parse(str, limit); + GenericDocument& Parse(const Ch* str) { + return Parse(str); } //!@} diff --git a/include/rapidjson/error/error.h b/include/rapidjson/error/error.h index a47dfaa..981cbd9 100644 --- a/include/rapidjson/error/error.h +++ b/include/rapidjson/error/error.h @@ -60,7 +60,6 @@ enum ParseErrorCode { kParseErrorTermination, //!< Parsing was terminated. kParseErrorUnspecificSyntaxError, //!< Unspecific syntax error. - kParseErrorStackSizeLimitExceeded //!< Parsing stack size limit is exceeded. }; //! Result of parsing (wraps ParseErrorCode) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index d00c92b..64094f6 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -273,11 +273,10 @@ public: typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type //! Constructor. - /*! \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. - \param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) + /*! \param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) */ - GenericReader(size_t limit = 0, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), kStackSizeLimit_(limit), parseResult_() {} + GenericReader(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), parseResult_() {} //! Parse JSON text. /*! \tparam parseFlags Combination of \ref ParseFlag. @@ -572,11 +571,6 @@ private: is.Take(); Ch e = is.Take(); if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) { - if (!(parseFlags & kParseInsituFlag)) { - if (!CheckStackSpaceQuota(sizeof(Ch))) { - RAPIDJSON_PARSE_ERROR(kParseErrorStackSizeLimitExceeded, is.Tell() - 1); - } - } os.Put(escape[(unsigned char)e]); } else if (e == 'u') { // Unicode @@ -597,11 +591,6 @@ private: } else if (c == '"') { // Closing double quote is.Take(); - if (!(parseFlags & kParseInsituFlag)) { - if (!CheckStackSpaceQuota(sizeof(Ch))) { - RAPIDJSON_PARSE_ERROR(kParseErrorStackSizeLimitExceeded, is.Tell() - 1); - } - } os.Put('\0'); // null-terminate the string return; } @@ -1059,11 +1048,6 @@ private: n = IterativeParsingElementState; else if (src == IterativeParsingKeyValueDelimiterState) n = IterativeParsingMemberValueState; - // Check stack space limit. - if (!CheckStackSpaceQuota(sizeof(IterativeParsingState) + sizeof(int))) { - RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStackSizeLimitExceeded, is.Tell()); - return IterativeParsingErrorState; - } // Push current state. *stack_.template Push(1) = n; // Initialize and push the member/element count. @@ -1235,13 +1219,8 @@ private: return parseResult_; } - bool CheckStackSpaceQuota(size_t size) const { - return kStackSizeLimit_ == 0 || (stack_.GetSize() + size <= kStackSizeLimit_); - } - static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. internal::Stack stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. - const size_t kStackSizeLimit_; //!< Stack size limit(in bytes). A value of 0 means no limit. ParseResult parseResult_; }; // class GenericReader diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 1919e94..d6dd4c4 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -932,18 +932,6 @@ TEST(Reader, IterativeParsing_ShortCircuit) { } } -TEST(Reader, IterativeParsing_LimitStackSize) { - BaseReaderHandler<> handler; - Reader reader(20); - StringStream is("[[[]]]"); - - ParseResult r = reader.Parse(is, handler); - - EXPECT_TRUE(reader.HasParseError()); - EXPECT_EQ(kParseErrorStackSizeLimitExceeded, r.Code()); - EXPECT_EQ(2u, r.Offset()); -} - #ifdef __GNUC__ RAPIDJSON_DIAG_POP #endif From 6f382fd9076e817eddfb8851ea0f6f549459ea4e Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 26 Jul 2014 21:51:38 +0800 Subject: [PATCH 05/10] Cleanup code --- include/rapidjson/reader.h | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 64094f6..42384d8 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -1023,10 +1023,6 @@ private: // May return a new state on state pop. template IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { - //int c = 0; - //IterativeParsingState n; - //bool hr; - switch (dst) { case IterativeParsingStartState: RAPIDJSON_ASSERT(false); @@ -1166,29 +1162,16 @@ private: return; } - if (src == IterativeParsingStartState && is.Peek() == '\0') - RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); - - else if (src == IterativeParsingStartState) - RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotObjectOrArray, is.Tell()); - - else if (src == IterativeParsingFinishState) - RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); - - else if (src == IterativeParsingObjectInitialState || src == IterativeParsingMemberDelimiterState) - RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); - - else if (src == IterativeParsingMemberKeyState) - RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); - - else if (src == IterativeParsingMemberValueState) - RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); - - else if (src == IterativeParsingElementState) - RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); - - else - RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); + switch (src) { + case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(is.Peek() == '\0' ? kParseErrorDocumentEmpty : kParseErrorDocumentRootNotObjectOrArray, is.Tell()); + case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); + case IterativeParsingObjectInitialState: + case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); + case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); + case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); + case IterativeParsingElementState: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); + default: RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); + } } template From 7fa194d16568b3c6939c79374f73a91c8d470ef3 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 26 Jul 2014 21:56:11 +0800 Subject: [PATCH 06/10] Force inline some suitable functions. --- include/rapidjson/reader.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 42384d8..5676018 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -512,7 +512,7 @@ private: typedef typename TargetEncoding::Ch Ch; StackStream(internal::Stack& stack) : stack_(stack), length_(0) {} - void Put(Ch c) { + RAPIDJSON_FORCEINLINE void Put(Ch c) { *stack_.template Push() = c; ++length_; } @@ -861,7 +861,7 @@ private: return NumberToken; } - IterativeParsingState Predict(IterativeParsingState state, Token token) { + RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) { // current state x one lookahead token -> new state static const char G[cIterativeParsingStateCount][kTokenCount] = { // Start @@ -1022,7 +1022,7 @@ private: // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). // May return a new state on state pop. template - IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { + RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) { switch (dst) { case IterativeParsingStartState: RAPIDJSON_ASSERT(false); From 58e0fb89b218657ce994b4213f5c8956b7a5ecc5 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 26 Jul 2014 22:21:06 +0800 Subject: [PATCH 07/10] In iterative parsing, always use SizeType to prevent potential alignment problem on some platforms. --- include/rapidjson/reader.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 5676018..aa8ca01 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -1045,9 +1045,9 @@ private: else if (src == IterativeParsingKeyValueDelimiterState) n = IterativeParsingMemberValueState; // Push current state. - *stack_.template Push(1) = n; + *stack_.template Push(1) = n; // Initialize and push the member/element count. - *stack_.template Push(1) = 0; + *stack_.template Push(1) = 0; // Call handler bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray(); // On handler short circuits the parsing. @@ -1096,18 +1096,18 @@ private: case IterativeParsingElementDelimiterState: is.Take(); // Update member/element count. - *stack_.template Top() = *stack_.template Top() + 1; + *stack_.template Top() = *stack_.template Top() + 1; return dst; case IterativeParsingObjectFinishState: { // Get member count. - int c = *stack_.template Pop(1); + SizeType c = *stack_.template Pop(1); // If the object is not empty, count the last member. if (src == IterativeParsingMemberValueState) ++c; // Restore the state. - IterativeParsingState n = *stack_.template Pop(1); + IterativeParsingState n = static_cast(*stack_.template Pop(1)); // Transit to Finish state if this is the topmost scope. if (n == IterativeParsingStartState) n = IterativeParsingFinishState; @@ -1127,12 +1127,12 @@ private: case IterativeParsingArrayFinishState: { // Get element count. - int c = *stack_.template Pop(1); + SizeType c = *stack_.template Pop(1); // If the array is not empty, count the last element. if (src == IterativeParsingElementState) ++c; // Restore the state. - IterativeParsingState n = *stack_.template Pop(1); + IterativeParsingState n = static_cast(*stack_.template Pop(1)); // Transit to Finish state if this is the topmost scope. if (n == IterativeParsingStartState) n = IterativeParsingFinishState; From b99a515ef2f9fc9f9df55a1a051e00db29365129 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 26 Jul 2014 23:23:52 +0800 Subject: [PATCH 08/10] Define RAPIDJSON_FORCEINLINE for gcc Performance boost a lot! --- include/rapidjson/rapidjson.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/rapidjson/rapidjson.h b/include/rapidjson/rapidjson.h index 1608ce8..4aef17d 100644 --- a/include/rapidjson/rapidjson.h +++ b/include/rapidjson/rapidjson.h @@ -41,6 +41,8 @@ #ifndef RAPIDJSON_FORCEINLINE #ifdef _MSC_VER #define RAPIDJSON_FORCEINLINE __forceinline +#elif defined(__GNUC__) && __GNUC__ >= 4 +#define RAPIDJSON_FORCEINLINE __attribute__((always_inline)) #else #define RAPIDJSON_FORCEINLINE #endif From 8081e2a2c85eb78a8af52dd34c3a46c4cac51ad8 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sun, 27 Jul 2014 00:03:37 +0800 Subject: [PATCH 09/10] Add perf test cases for document using iterative parsing --- test/perftest/rapidjsontest.cpp | 41 ++++++++++++--------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/test/perftest/rapidjsontest.cpp b/test/perftest/rapidjsontest.cpp index 0b52e8a..b242717 100644 --- a/test/perftest/rapidjsontest.cpp +++ b/test/perftest/rapidjsontest.cpp @@ -104,48 +104,37 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) { } TEST_F(RapidJson, SIMD_SUFFIX(DocumentParseInsitu_MemoryPoolAllocator)) { - //const size_t userBufferSize = 128 * 1024; - //char* userBuffer = (char*)malloc(userBufferSize); - for (size_t i = 0; i < kTrialCount; i++) { memcpy(temp_, json_, length_ + 1); - //MemoryPoolAllocator<> allocator(userBuffer, userBufferSize); - //Document doc(&allocator); Document doc; doc.ParseInsitu(temp_); ASSERT_TRUE(doc.IsObject()); - //if (i == 0) { - // size_t size = doc.GetAllocator().Size(); - // size_t capacity = doc.GetAllocator().Capacity(); - // size_t stack_capacity = doc.GetStackCapacity(); - // size_t actual = size - stack_capacity; - // std::cout << "Size:" << size << " Capacity:" << capacity << " Stack:" << stack_capacity << " Actual:" << actual << std::endl; - //} } +} - //free(userBuffer); +TEST_F(RapidJson, SIMD_SUFFIX(DocumentParseIterativeInsitu_MemoryPoolAllocator)) { + for (size_t i = 0; i < kTrialCount; i++) { + memcpy(temp_, json_, length_ + 1); + Document doc; + doc.ParseInsitu(temp_); + ASSERT_TRUE(doc.IsObject()); + } } TEST_F(RapidJson, SIMD_SUFFIX(DocumentParse_MemoryPoolAllocator)) { - //const size_t userBufferSize = 128 * 1024; - //char* userBuffer = (char*)malloc(userBufferSize); - for (size_t i = 0; i < kTrialCount; i++) { - //MemoryPoolAllocator<> allocator(userBuffer, userBufferSize); - //Document doc(&allocator); Document doc; doc.Parse(json_); ASSERT_TRUE(doc.IsObject()); - //if (i == 0) { - // size_t size = doc.GetAllocator().Size(); - // size_t capacity = doc.GetAllocator().Capacity(); - // size_t stack_capacity = doc.GetStackCapacity(); - // size_t actual = size - stack_capacity; - // std::cout << "Size:" << size << " Capacity:" << capacity << " Stack:" << stack_capacity << " Actual:" << actual << std::endl; - //} } +} - //free(userBuffer); +TEST_F(RapidJson, SIMD_SUFFIX(DocumentParseIterative_MemoryPoolAllocator)) { + for (size_t i = 0; i < kTrialCount; i++) { + Document doc; + doc.Parse(json_); + ASSERT_TRUE(doc.IsObject()); + } } TEST_F(RapidJson, SIMD_SUFFIX(DocumentParse_CrtAllocator)) { From 857865a166282a92bd18789413bc5802a1361cb0 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sun, 27 Jul 2014 00:36:23 +0800 Subject: [PATCH 10/10] Fix parse error message --- include/rapidjson/error/en.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/rapidjson/error/en.h b/include/rapidjson/error/en.h index e9a5d1d..e9120c5 100644 --- a/include/rapidjson/error/en.h +++ b/include/rapidjson/error/en.h @@ -40,7 +40,6 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error."); case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error."); - case kParseErrorStackSizeLimitExceeded: return RAPIDJSON_ERROR_STRING("Parsing stack size limit is exceeded."); default: return RAPIDJSON_ERROR_STRING("Unknown error.");