From 3006fa7d8c2c5619af52fcca2d195bd53f6fdee5 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Sun, 6 Jul 2014 21:15:38 +0800 Subject: [PATCH 01/20] Try to resolve issue #35: implement iterative parsing. --- include/rapidjson/reader.h | 224 ++++++++++++++++++++++++++++++++++- test/unittest/readertest.cpp | 196 +++++++++++++++++++++++++++++- 2 files changed, 416 insertions(+), 4 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index dac1537..d0f05e4 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -46,7 +46,8 @@ namespace rapidjson { enum ParseFlag { kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer. kParseInsituFlag = 1, //!< In-situ(destructive) parsing. - kParseValidateEncodingFlag = 2 //!< Validate encoding of JSON strings. + kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. + kParseNonRecursiveFlag = 4 //!< Non-recursive(constant complexity in terms of function call stack size) parsing. }; //! Error code of parsing. @@ -69,7 +70,7 @@ enum ParseErrorCode { kParseErrorStringUnicodeSurrogateInvalid, //!< The surrogate pair in string is invalid. kParseErrorStringEscapeInvalid, //!< Invalid escape character in string. kParseErrorStringMissQuotationMark, //!< Missing a closing quotation mark in string. - kParseErrorStringInvalidEncoding, //!< Invalid encoidng in string. + kParseErrorStringInvalidEncoding, //!< Invalid encoding in string. kParseErrorNumberTooBig, //!< Number too big to be stored in double. kParseErrorNumberMissFraction, //!< Miss fraction part in number. @@ -134,7 +135,7 @@ namespace internal { template::copyOptimization> class StreamLocalCopy; -//! Do copy optimziation. +//! Do copy optimization. template class StreamLocalCopy { public: @@ -297,6 +298,9 @@ public: parseErrorCode_ = kParseErrorNone; errorOffset_ = 0; + if (parseFlags & kParseNonRecursiveFlag) + return NonRecursiveParse(is, handler); + SkipWhitespace(is); if (is.Peek() == '\0') @@ -748,6 +752,220 @@ private: } } + // Non-recursive parsing + enum NonRecursiveParsingState { + NonRecursiveParsingStartState, + NonRecursiveParsingFinishState, + NonRecursiveParsingErrorState, + // Object states + NonRecursiveParsingObjectInitialState, + NonRecursiveParsingObjectContentState, + // Array states + NonRecursiveParsingArrayInitialState, + NonRecursiveParsingArrayContentState + }; + + template + NonRecursiveParsingState TransitToCompoundValueTypeState(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + // For compound value type(object and array), we should push the current state and start a new stack frame for this type. + NonRecursiveParsingState r = NonRecursiveParsingErrorState; + + switch (is.Take()) { + case '{': + handler.StartObject(); + r = NonRecursiveParsingObjectInitialState; + // Push current state. + *stack_.template Push(1) = state; + // Initialize and push member count. + *stack_.template Push(1) = 0; + break; + case '[': + handler.StartArray(); + r = NonRecursiveParsingArrayInitialState; + // Push current state. + *stack_.template Push(1) = state; + // Initialize and push element count. + *stack_.template Push(1) = 0; + break; + } + return r; + } + + // Inner transition of object or array states(ObjectInitial->ObjectContent, ArrayInitial->ArrayContent). + template + NonRecursiveParsingState TransitByValue(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + RAPIDJSON_ASSERT( + state == NonRecursiveParsingObjectInitialState || + state == NonRecursiveParsingArrayInitialState || + state == NonRecursiveParsingObjectContentState || + state == NonRecursiveParsingArrayContentState); + + NonRecursiveParsingState t; + if (state == NonRecursiveParsingObjectInitialState) + t = NonRecursiveParsingObjectContentState; + else if (state == NonRecursiveParsingArrayInitialState) + t = NonRecursiveParsingArrayContentState; + else + t = state; + + NonRecursiveParsingState r = NonRecursiveParsingErrorState; + + switch (is.Peek()) { + // For plain value state is not changed. + case 'n': ParseNull (is, handler); r = t; break; + case 't': ParseTrue (is, handler); r = t; break; + case 'f': ParseFalse (is, handler); r = t; break; + case '"': ParseString(is, handler); r = t; break; + // Transit when value is object or array. + case '{': + case '[': + r = TransitToCompoundValueTypeState(state, is, handler); break; + default: ParseNumber(is, handler); r = t; break; + } + + if (HasParseError()) + r = NonRecursiveParsingErrorState; + + return r; + } + + // Transit from object related states(ObjectInitial, ObjectContent). + template + NonRecursiveParsingState TransitFromObjectStates(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + NonRecursiveParsingState r = NonRecursiveParsingErrorState; + + switch (is.Peek()) { + case '}': { + is.Take(); + // Get member count(include an extra one for non-empty object). + int memberCount = *stack_.template Pop(1); + if (state == NonRecursiveParsingObjectContentState) + ++memberCount; + // Restore the parent stack frame. + r = *stack_.template Pop(1); + // Transit to ContentState since a member/an element was just parsed. + if (r == NonRecursiveParsingArrayInitialState) + r = NonRecursiveParsingArrayContentState; + else if (r == NonRecursiveParsingObjectInitialState) + r = NonRecursiveParsingObjectContentState; + // If we return to the topmost frame mark it finished. + if (r == NonRecursiveParsingStartState) + r = NonRecursiveParsingFinishState; + handler.EndObject(memberCount); + break; + } + case ',': + is.Take(); + r = NonRecursiveParsingObjectContentState; + // Update member count. + *stack_.template Top() = *stack_.template Top() + 1; + break; + case '"': + // Should be a key-value pair. + ParseString(is, handler); + if (HasParseError()) { + r = NonRecursiveParsingErrorState; + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); + break; + } + + SkipWhitespace(is); + + if (is.Take() != ':') { + r = NonRecursiveParsingErrorState; + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissColon, is.Tell()); + break; + } + + SkipWhitespace(is); + + r = TransitByValue(state, is, handler); + + break; + default: + r = NonRecursiveParsingErrorState; + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); + break; + } + + return r; + } + + // Transit from array related states(ArrayInitial, ArrayContent). + template + NonRecursiveParsingState TransitFromArrayStates(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + NonRecursiveParsingState r = NonRecursiveParsingErrorState; + + switch (is.Peek()) { + case ']': { + is.Take(); + // Get element count(include an extra one for non-empty array). + int elementCount = *stack_.template Pop(1); + if (state == NonRecursiveParsingArrayContentState) + ++elementCount; + // Restore the parent stack frame. + r = *stack_.template Pop(1); + // Transit to ContentState since a member/an element was just parsed. + if (r == NonRecursiveParsingArrayInitialState) + r = NonRecursiveParsingArrayContentState; + else if (r == NonRecursiveParsingObjectInitialState) + r = NonRecursiveParsingObjectContentState; + // If we return to the topmost frame mark it finished. + if (r == NonRecursiveParsingStartState) + r = NonRecursiveParsingFinishState; + handler.EndArray(elementCount); + break; + } + case ',': + is.Take(); + r = NonRecursiveParsingArrayContentState; + // Update element count. + *stack_.template Top() = *stack_.template Top() + 1; + break; + default: + // Should be a single value. + r = TransitByValue(state, is, handler); + break; + } + + return r; + } + + template + NonRecursiveParsingState Transit(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + NonRecursiveParsingState r = NonRecursiveParsingErrorState; + + switch (state) { + case NonRecursiveParsingStartState: + r = TransitToCompoundValueTypeState(state, is, handler); + break; + case NonRecursiveParsingObjectInitialState: + case NonRecursiveParsingObjectContentState: + r = TransitFromObjectStates(state, is, handler); + break; + case NonRecursiveParsingArrayInitialState: + case NonRecursiveParsingArrayContentState: + r = TransitFromArrayStates(state, is, handler); + break; + } + + return r; + } + + template + bool NonRecursiveParse(InputStream& is, Handler& handler) { + NonRecursiveParsingState state = NonRecursiveParsingStartState; + + SkipWhitespace(is); + while (is.Peek() != '\0' && state != NonRecursiveParsingErrorState) { + state = Transit(state, is, handler); + SkipWhitespace(is); + } + + stack_.Clear(); + return state == NonRecursiveParsingFinishState && !HasParseError(); + } + static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. internal::Stack stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. ParseErrorCode parseErrorCode_; diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 62a0b42..028f934 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -650,7 +650,7 @@ struct StreamTraits > { enum { copyOptimization = 1 }; }; -} // namespace rapdijson +} // namespace rapidjson #endif TEST(Reader, CustomStringStream) { @@ -706,6 +706,200 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { EXPECT_FALSE(reader.HasParseError()); } +TEST(Reader, NonRecursiveParsing) { + StringStream json("[1,true,false,null,\"string\",{\"array\":[1]}]"); + Reader reader; + BaseReaderHandler<> handler; + + Reader::NonRecursiveParsingState r; + + // [ + r = reader.Transit( + Reader::NonRecursiveParsingStartState, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayInitialState, r); + + // 1 + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(0, *reader.stack_.template Top()); // element count + + // , + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(1, *reader.stack_.template Top()); // element count + + // true + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(1, *reader.stack_.template Top()); // element count + + // , + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(2, *reader.stack_.template Top()); // element count + + // false + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(2, *reader.stack_.template Top()); // element count + + // , + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(3, *reader.stack_.template Top()); // element count + + // null + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(3, *reader.stack_.template Top()); // element count + + // , + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(4, *reader.stack_.template Top()); // element count + + // "string" + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(4, *reader.stack_.template Top()); // element count + + // , + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(5, *reader.stack_.template Top()); // element count + + // { + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingObjectInitialState, r); + EXPECT_EQ(0, *reader.stack_.template Top()); // member count + + // "array":[ + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayInitialState, r); + EXPECT_EQ(0, *reader.stack_.template Top()); // element count + + // 1 + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(0, *reader.stack_.template Top()); // element count + + // ] + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingObjectContentState, r); + EXPECT_EQ(0, *reader.stack_.template Top()); // member count + + // } + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(5, *reader.stack_.template Top()); // element count + + // ] + r = reader.Transit( + r, + json, + handler); + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::NonRecursiveParsingFinishState, r); +} + +struct CountHandler : BaseReaderHandler<> { + void EndObject(SizeType memberCount) { + MemberCount = memberCount; + } + + void EndArray(SizeType elementCount) { + ElementCount = elementCount; + } + + SizeType MemberCount; + SizeType ElementCount; +}; + +TEST(Reader, NonRecursiveParsing_MemberCounting) { + StringStream json("{\"array\": []}"); + Reader reader; + CountHandler handler; + + reader.NonRecursiveParse(json, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(1, handler.MemberCount); +} + +TEST(Reader, NonRecursiveParsing_ElementCounting) { + StringStream json("[{}]"); + Reader reader; + CountHandler handler; + + reader.NonRecursiveParse(json, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(1, handler.ElementCount); +} + #ifdef __GNUC__ #pragma GCC diagnostic pop #endif From 7acb0c181ef15880bb7efbd35b2c2df362ccd12d Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Mon, 7 Jul 2014 21:46:57 +0800 Subject: [PATCH 02/20] Rename flags/state names/functions/test cases from 'NonRecursive' to 'Iterative'. --- include/rapidjson/reader.h | 130 +++++++++++++++++------------------ test/unittest/readertest.cpp | 82 +++++++++++----------- 2 files changed, 106 insertions(+), 106 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index d0f05e4..cdc577a 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -47,7 +47,7 @@ enum ParseFlag { kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer. kParseInsituFlag = 1, //!< In-situ(destructive) parsing. kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. - kParseNonRecursiveFlag = 4 //!< Non-recursive(constant complexity in terms of function call stack size) parsing. + kParseIterativeFlag = 4 //!< Iterative(constant complexity in terms of function call stack size) parsing. }; //! Error code of parsing. @@ -298,8 +298,8 @@ public: parseErrorCode_ = kParseErrorNone; errorOffset_ = 0; - if (parseFlags & kParseNonRecursiveFlag) - return NonRecursiveParse(is, handler); + if (parseFlags & kParseIterativeFlag) + return IterativeParse(is, handler); SkipWhitespace(is); @@ -753,37 +753,37 @@ private: } // Non-recursive parsing - enum NonRecursiveParsingState { - NonRecursiveParsingStartState, - NonRecursiveParsingFinishState, - NonRecursiveParsingErrorState, + enum IterativeParsingState { + IterativeParsingStartState, + IterativeParsingFinishState, + IterativeParsingErrorState, // Object states - NonRecursiveParsingObjectInitialState, - NonRecursiveParsingObjectContentState, + IterativeParsingObjectInitialState, + IterativeParsingObjectContentState, // Array states - NonRecursiveParsingArrayInitialState, - NonRecursiveParsingArrayContentState + IterativeParsingArrayInitialState, + IterativeParsingArrayContentState }; template - NonRecursiveParsingState TransitToCompoundValueTypeState(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + IterativeParsingState TransitToCompoundValueTypeState(IterativeParsingState state, InputStream& is, Handler& handler) { // For compound value type(object and array), we should push the current state and start a new stack frame for this type. - NonRecursiveParsingState r = NonRecursiveParsingErrorState; + IterativeParsingState r = IterativeParsingErrorState; switch (is.Take()) { case '{': handler.StartObject(); - r = NonRecursiveParsingObjectInitialState; + r = IterativeParsingObjectInitialState; // Push current state. - *stack_.template Push(1) = state; + *stack_.template Push(1) = state; // Initialize and push member count. *stack_.template Push(1) = 0; break; case '[': handler.StartArray(); - r = NonRecursiveParsingArrayInitialState; + r = IterativeParsingArrayInitialState; // Push current state. - *stack_.template Push(1) = state; + *stack_.template Push(1) = state; // Initialize and push element count. *stack_.template Push(1) = 0; break; @@ -793,22 +793,22 @@ private: // Inner transition of object or array states(ObjectInitial->ObjectContent, ArrayInitial->ArrayContent). template - NonRecursiveParsingState TransitByValue(NonRecursiveParsingState state, InputStream& is, Handler& handler) { + IterativeParsingState TransitByValue(IterativeParsingState state, InputStream& is, Handler& handler) { RAPIDJSON_ASSERT( - state == NonRecursiveParsingObjectInitialState || - state == NonRecursiveParsingArrayInitialState || - state == NonRecursiveParsingObjectContentState || - state == NonRecursiveParsingArrayContentState); + state == IterativeParsingObjectInitialState || + state == IterativeParsingArrayInitialState || + state == IterativeParsingObjectContentState || + state == IterativeParsingArrayContentState); - NonRecursiveParsingState t; - if (state == NonRecursiveParsingObjectInitialState) - t = NonRecursiveParsingObjectContentState; - else if (state == NonRecursiveParsingArrayInitialState) - t = NonRecursiveParsingArrayContentState; + IterativeParsingState t; + if (state == IterativeParsingObjectInitialState) + t = IterativeParsingObjectContentState; + else if (state == IterativeParsingArrayInitialState) + t = IterativeParsingArrayContentState; else t = state; - NonRecursiveParsingState r = NonRecursiveParsingErrorState; + IterativeParsingState r = IterativeParsingErrorState; switch (is.Peek()) { // For plain value state is not changed. @@ -824,39 +824,39 @@ private: } if (HasParseError()) - r = NonRecursiveParsingErrorState; + r = IterativeParsingErrorState; return r; } // Transit from object related states(ObjectInitial, ObjectContent). template - NonRecursiveParsingState TransitFromObjectStates(NonRecursiveParsingState state, InputStream& is, Handler& handler) { - NonRecursiveParsingState r = NonRecursiveParsingErrorState; + IterativeParsingState TransitFromObjectStates(IterativeParsingState state, InputStream& is, Handler& handler) { + IterativeParsingState r = IterativeParsingErrorState; switch (is.Peek()) { case '}': { is.Take(); // Get member count(include an extra one for non-empty object). int memberCount = *stack_.template Pop(1); - if (state == NonRecursiveParsingObjectContentState) + if (state == IterativeParsingObjectContentState) ++memberCount; // Restore the parent stack frame. - r = *stack_.template Pop(1); + r = *stack_.template Pop(1); // Transit to ContentState since a member/an element was just parsed. - if (r == NonRecursiveParsingArrayInitialState) - r = NonRecursiveParsingArrayContentState; - else if (r == NonRecursiveParsingObjectInitialState) - r = NonRecursiveParsingObjectContentState; + if (r == IterativeParsingArrayInitialState) + r = IterativeParsingArrayContentState; + else if (r == IterativeParsingObjectInitialState) + r = IterativeParsingObjectContentState; // If we return to the topmost frame mark it finished. - if (r == NonRecursiveParsingStartState) - r = NonRecursiveParsingFinishState; + if (r == IterativeParsingStartState) + r = IterativeParsingFinishState; handler.EndObject(memberCount); break; } case ',': is.Take(); - r = NonRecursiveParsingObjectContentState; + r = IterativeParsingObjectContentState; // Update member count. *stack_.template Top() = *stack_.template Top() + 1; break; @@ -864,7 +864,7 @@ private: // Should be a key-value pair. ParseString(is, handler); if (HasParseError()) { - r = NonRecursiveParsingErrorState; + r = IterativeParsingErrorState; RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); break; } @@ -872,7 +872,7 @@ private: SkipWhitespace(is); if (is.Take() != ':') { - r = NonRecursiveParsingErrorState; + r = IterativeParsingErrorState; RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissColon, is.Tell()); break; } @@ -883,7 +883,7 @@ private: break; default: - r = NonRecursiveParsingErrorState; + r = IterativeParsingErrorState; RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; } @@ -893,32 +893,32 @@ private: // Transit from array related states(ArrayInitial, ArrayContent). template - NonRecursiveParsingState TransitFromArrayStates(NonRecursiveParsingState state, InputStream& is, Handler& handler) { - NonRecursiveParsingState r = NonRecursiveParsingErrorState; + IterativeParsingState TransitFromArrayStates(IterativeParsingState state, InputStream& is, Handler& handler) { + IterativeParsingState r = IterativeParsingErrorState; switch (is.Peek()) { case ']': { is.Take(); // Get element count(include an extra one for non-empty array). int elementCount = *stack_.template Pop(1); - if (state == NonRecursiveParsingArrayContentState) + if (state == IterativeParsingArrayContentState) ++elementCount; // Restore the parent stack frame. - r = *stack_.template Pop(1); + r = *stack_.template Pop(1); // Transit to ContentState since a member/an element was just parsed. - if (r == NonRecursiveParsingArrayInitialState) - r = NonRecursiveParsingArrayContentState; - else if (r == NonRecursiveParsingObjectInitialState) - r = NonRecursiveParsingObjectContentState; + if (r == IterativeParsingArrayInitialState) + r = IterativeParsingArrayContentState; + else if (r == IterativeParsingObjectInitialState) + r = IterativeParsingObjectContentState; // If we return to the topmost frame mark it finished. - if (r == NonRecursiveParsingStartState) - r = NonRecursiveParsingFinishState; + if (r == IterativeParsingStartState) + r = IterativeParsingFinishState; handler.EndArray(elementCount); break; } case ',': is.Take(); - r = NonRecursiveParsingArrayContentState; + r = IterativeParsingArrayContentState; // Update element count. *stack_.template Top() = *stack_.template Top() + 1; break; @@ -932,19 +932,19 @@ private: } template - NonRecursiveParsingState Transit(NonRecursiveParsingState state, InputStream& is, Handler& handler) { - NonRecursiveParsingState r = NonRecursiveParsingErrorState; + IterativeParsingState Transit(IterativeParsingState state, InputStream& is, Handler& handler) { + IterativeParsingState r = IterativeParsingErrorState; switch (state) { - case NonRecursiveParsingStartState: + case IterativeParsingStartState: r = TransitToCompoundValueTypeState(state, is, handler); break; - case NonRecursiveParsingObjectInitialState: - case NonRecursiveParsingObjectContentState: + case IterativeParsingObjectInitialState: + case IterativeParsingObjectContentState: r = TransitFromObjectStates(state, is, handler); break; - case NonRecursiveParsingArrayInitialState: - case NonRecursiveParsingArrayContentState: + case IterativeParsingArrayInitialState: + case IterativeParsingArrayContentState: r = TransitFromArrayStates(state, is, handler); break; } @@ -953,17 +953,17 @@ private: } template - bool NonRecursiveParse(InputStream& is, Handler& handler) { - NonRecursiveParsingState state = NonRecursiveParsingStartState; + bool IterativeParse(InputStream& is, Handler& handler) { + IterativeParsingState state = IterativeParsingStartState; SkipWhitespace(is); - while (is.Peek() != '\0' && state != NonRecursiveParsingErrorState) { + while (is.Peek() != '\0' && state != IterativeParsingErrorState) { state = Transit(state, is, handler); SkipWhitespace(is); } stack_.Clear(); - return state == NonRecursiveParsingFinishState && !HasParseError(); + return state == IterativeParsingFinishState && !HasParseError(); } static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 028f934..eb63dec 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -706,163 +706,163 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { EXPECT_FALSE(reader.HasParseError()); } -TEST(Reader, NonRecursiveParsing) { +TEST(Reader, IterativeParsing) { StringStream json("[1,true,false,null,\"string\",{\"array\":[1]}]"); Reader reader; BaseReaderHandler<> handler; - Reader::NonRecursiveParsingState r; + Reader::IterativeParsingState r; // [ - r = reader.Transit( - Reader::NonRecursiveParsingStartState, + r = reader.Transit( + Reader::IterativeParsingStartState, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayInitialState, r); + EXPECT_EQ(Reader::IterativeParsingArrayInitialState, r); // 1 - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(0, *reader.stack_.template Top()); // element count // , - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(1, *reader.stack_.template Top()); // element count // true - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(1, *reader.stack_.template Top()); // element count // , - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(2, *reader.stack_.template Top()); // element count // false - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(2, *reader.stack_.template Top()); // element count // , - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(3, *reader.stack_.template Top()); // element count // null - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(3, *reader.stack_.template Top()); // element count // , - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(4, *reader.stack_.template Top()); // element count // "string" - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(4, *reader.stack_.template Top()); // element count // , - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(5, *reader.stack_.template Top()); // element count // { - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingObjectInitialState, r); + EXPECT_EQ(Reader::IterativeParsingObjectInitialState, r); EXPECT_EQ(0, *reader.stack_.template Top()); // member count // "array":[ - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayInitialState, r); + EXPECT_EQ(Reader::IterativeParsingArrayInitialState, r); EXPECT_EQ(0, *reader.stack_.template Top()); // element count // 1 - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(0, *reader.stack_.template Top()); // element count // ] - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingObjectContentState, r); + EXPECT_EQ(Reader::IterativeParsingObjectContentState, r); EXPECT_EQ(0, *reader.stack_.template Top()); // member count // } - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingArrayContentState, r); + EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); EXPECT_EQ(5, *reader.stack_.template Top()); // element count // ] - r = reader.Transit( + r = reader.Transit( r, json, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::NonRecursiveParsingFinishState, r); + EXPECT_EQ(Reader::IterativeParsingFinishState, r); } struct CountHandler : BaseReaderHandler<> { @@ -878,23 +878,23 @@ struct CountHandler : BaseReaderHandler<> { SizeType ElementCount; }; -TEST(Reader, NonRecursiveParsing_MemberCounting) { +TEST(Reader, IterativeParsing_MemberCounting) { StringStream json("{\"array\": []}"); Reader reader; CountHandler handler; - reader.NonRecursiveParse(json, handler); + reader.IterativeParse(json, handler); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(1, handler.MemberCount); } -TEST(Reader, NonRecursiveParsing_ElementCounting) { +TEST(Reader, IterativeParsing_ElementCounting) { StringStream json("[{}]"); Reader reader; CountHandler handler; - reader.NonRecursiveParse(json, handler); + reader.IterativeParse(json, handler); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(1, handler.ElementCount); From 7d33b0151547b5b95f4d016a35746c9860844ea9 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Mon, 7 Jul 2014 21:57:23 +0800 Subject: [PATCH 03/20] Bugfix: add missing transition from finish state. --- include/rapidjson/reader.h | 5 +++++ test/unittest/readertest.cpp | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index cdc577a..b4d2d8b 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -947,6 +947,11 @@ private: case IterativeParsingArrayContentState: r = TransitFromArrayStates(state, is, handler); break; + case IterativeParsingFinishState: + // Any token appears after finish state leads to error state. + r = IterativeParsingErrorState; + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); + break; } return r; diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index eb63dec..cf4ca67 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -900,6 +900,16 @@ TEST(Reader, IterativeParsing_ElementCounting) { EXPECT_EQ(1, handler.ElementCount); } +TEST(Reader, IterativeParsing_AfterFinishState) { + StringStream json("{}, {}"); + Reader reader; + BaseReaderHandler<> handler; + + reader.IterativeParse(json, handler); + + EXPECT_TRUE(reader.HasParseError()); +} + #ifdef __GNUC__ #pragma GCC diagnostic pop #endif From ebb9a250d888a90522ba57937cd27489a8a0f1d4 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Mon, 7 Jul 2014 22:06:14 +0800 Subject: [PATCH 04/20] It is sufficient to check finish state in iterative parsing. --- include/rapidjson/reader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index b4d2d8b..0097325 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -968,7 +968,7 @@ private: } stack_.Clear(); - return state == IterativeParsingFinishState && !HasParseError(); + return state == IterativeParsingFinishState; } static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. From 6b0df217a80d8271ff05fd78b9f558fcd9eeba12 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Tue, 8 Jul 2014 02:19:35 +0800 Subject: [PATCH 05/20] WIP: refactor iterative parsing. --- include/rapidjson/reader.h | 427 ++++++++++++++++++++--------------- test/unittest/readertest.cpp | 204 ----------------- 2 files changed, 244 insertions(+), 387 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 0097325..3de6f9f 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -752,209 +752,263 @@ private: } } - // Non-recursive parsing + // Iterative Parsing + + // States enum IterativeParsingState { - IterativeParsingStartState, + IterativeParsingStartState = 0, IterativeParsingFinishState, IterativeParsingErrorState, + // Object states IterativeParsingObjectInitialState, - IterativeParsingObjectContentState, + IterativeParsingMemberState, + IterativeParsingObjectDelimiterState, + IterativeParsingObjectFinishState, + // Array states IterativeParsingArrayInitialState, - IterativeParsingArrayContentState + IterativeParsingElementState, + IterativeParsingArrayDelimiterState, + IterativeParsingArrayFinishState, + + cIterativeParsingStateCount }; - template - IterativeParsingState TransitToCompoundValueTypeState(IterativeParsingState state, InputStream& is, Handler& handler) { - // For compound value type(object and array), we should push the current state and start a new stack frame for this type. - IterativeParsingState r = IterativeParsingErrorState; + // Tokens + enum IterativeParsingToken { + IterativeParsingLeftBracketToken = 0, + IterativeParsingRightBracketToken, - switch (is.Take()) { - case '{': - handler.StartObject(); - r = IterativeParsingObjectInitialState; - // Push current state. - *stack_.template Push(1) = state; - // Initialize and push member count. - *stack_.template Push(1) = 0; - break; - case '[': - handler.StartArray(); - r = IterativeParsingArrayInitialState; - // Push current state. - *stack_.template Push(1) = state; - // Initialize and push element count. - *stack_.template Push(1) = 0; - break; + IterativeParsingLeftCurlyBracketToken, + IterativeParsingRightCurlyBracketToken, + + IterativeParsingCommaToken, + + IterativeParsingQuotesToken, + + IterativeParsingFalseToken, + IterativeParsingTrueToken, + IterativeParsingNullToken, + IterativeParsingNumberToken, + + cIterativeParsingTokenCount + }; + + IterativeParsingToken GuessToken(Ch c) { + switch (c) { + case '[': return IterativeParsingLeftBracketToken; + case ']': return IterativeParsingRightBracketToken; + case '{': return IterativeParsingLeftCurlyBracketToken; + case '}': return IterativeParsingRightCurlyBracketToken; + case ',': return IterativeParsingCommaToken; + case '"': return IterativeParsingQuotesToken; + case 'f': return IterativeParsingFalseToken; + case 't': return IterativeParsingTrueToken; + case 'n': return IterativeParsingNullToken; + default: return IterativeParsingNumberToken; } - return r; } - // Inner transition of object or array states(ObjectInitial->ObjectContent, ArrayInitial->ArrayContent). - template - IterativeParsingState TransitByValue(IterativeParsingState state, InputStream& is, Handler& handler) { - RAPIDJSON_ASSERT( - state == IterativeParsingObjectInitialState || - state == IterativeParsingArrayInitialState || - state == IterativeParsingObjectContentState || - state == IterativeParsingArrayContentState); - - IterativeParsingState t; - if (state == IterativeParsingObjectInitialState) - t = IterativeParsingObjectContentState; - else if (state == IterativeParsingArrayInitialState) - t = IterativeParsingArrayContentState; - else - t = state; - - IterativeParsingState r = IterativeParsingErrorState; - - switch (is.Peek()) { - // For plain value state is not changed. - case 'n': ParseNull (is, handler); r = t; break; - case 't': ParseTrue (is, handler); r = t; break; - case 'f': ParseFalse (is, handler); r = t; break; - case '"': ParseString(is, handler); r = t; break; - // Transit when value is object or array. - case '{': - case '[': - r = TransitToCompoundValueTypeState(state, is, handler); break; - default: ParseNumber(is, handler); r = t; break; - } - - if (HasParseError()) - r = IterativeParsingErrorState; - - return r; - } - - // Transit from object related states(ObjectInitial, ObjectContent). - template - IterativeParsingState TransitFromObjectStates(IterativeParsingState state, InputStream& is, Handler& handler) { - IterativeParsingState r = IterativeParsingErrorState; - - switch (is.Peek()) { - case '}': { - is.Take(); - // Get member count(include an extra one for non-empty object). - int memberCount = *stack_.template Pop(1); - if (state == IterativeParsingObjectContentState) - ++memberCount; - // Restore the parent stack frame. - r = *stack_.template Pop(1); - // Transit to ContentState since a member/an element was just parsed. - if (r == IterativeParsingArrayInitialState) - r = IterativeParsingArrayContentState; - else if (r == IterativeParsingObjectInitialState) - r = IterativeParsingObjectContentState; - // If we return to the topmost frame mark it finished. - if (r == IterativeParsingStartState) - r = IterativeParsingFinishState; - handler.EndObject(memberCount); - break; - } - case ',': - is.Take(); - r = IterativeParsingObjectContentState; - // Update member count. - *stack_.template Top() = *stack_.template Top() + 1; - break; - case '"': - // Should be a key-value pair. - ParseString(is, handler); - if (HasParseError()) { - r = IterativeParsingErrorState; - RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); - break; + IterativeParsingState Transit(IterativeParsingState state, IterativeParsingToken token) { + // current state x one lookahead token -> new state + static const IterativeParsingState G[cIterativeParsingStateCount][cIterativeParsingTokenCount] = { + // Start + { + IterativeParsingObjectInitialState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingArrayInitialState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Quotes + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // Finish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState + }, + // Error(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState + }, + // ObjectInitial + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingMemberState, // Quotes + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // Member + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingObjectDelimiterState, // Comma + IterativeParsingErrorState, // Quotes + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ObjectDelimiter + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingMemberState, // Quotes + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ObjectFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState + }, + // ArrayInitial + { + IterativeParsingElementState, // Left bracket + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingElementState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingElementState, // Quotes + IterativeParsingElementState, // False + IterativeParsingElementState, // True + IterativeParsingElementState, // Null + IterativeParsingElementState // Number + }, + // Element + { + IterativeParsingErrorState, // Left bracket + IterativeParsingArrayFinishState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingArrayDelimiterState, // Comma + IterativeParsingErrorState, // Quotes + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // ArrayDelimiter + { + IterativeParsingElementState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingElementState, // Left curly bracket + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingElementState, // Quotes + IterativeParsingElementState, // False + IterativeParsingElementState, // True + IterativeParsingElementState, // Null + IterativeParsingElementState // Number + }, + // ArrayFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState } + }; // End of G - SkipWhitespace(is); - - if (is.Take() != ':') { - r = IterativeParsingErrorState; - RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissColon, is.Tell()); - break; - } - - SkipWhitespace(is); - - r = TransitByValue(state, is, handler); - - break; - default: - r = IterativeParsingErrorState; - RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); - break; - } - - return r; + return G[state][token]; } - // Transit from array related states(ArrayInitial, ArrayContent). + // Store or process informations during state transition. + // May return a new state. template - IterativeParsingState TransitFromArrayStates(IterativeParsingState state, InputStream& is, Handler& handler) { - IterativeParsingState r = IterativeParsingErrorState; + IterativeParsingState Advance(IterativeParsingState src, IterativeParsingToken token, IterativeParsingState dst, InputStream& is, Handler& handler) { + int c = 0; + IterativeParsingState n; - switch (is.Peek()) { - case ']': { - is.Take(); - // Get element count(include an extra one for non-empty array). - int elementCount = *stack_.template Pop(1); - if (state == IterativeParsingArrayContentState) - ++elementCount; - // Restore the parent stack frame. - r = *stack_.template Pop(1); - // Transit to ContentState since a member/an element was just parsed. - if (r == IterativeParsingArrayInitialState) - r = IterativeParsingArrayContentState; - else if (r == IterativeParsingObjectInitialState) - r = IterativeParsingObjectContentState; - // If we return to the topmost frame mark it finished. - if (r == IterativeParsingStartState) - r = IterativeParsingFinishState; - handler.EndArray(elementCount); - break; - } - case ',': - is.Take(); - r = IterativeParsingArrayContentState; - // Update element count. - *stack_.template Top() = *stack_.template Top() + 1; - break; - default: - // Should be a single value. - r = TransitByValue(state, is, handler); - break; - } - - return r; - } - - template - IterativeParsingState Transit(IterativeParsingState state, InputStream& is, Handler& handler) { - IterativeParsingState r = IterativeParsingErrorState; - - switch (state) { + switch (dst) { case IterativeParsingStartState: - r = TransitToCompoundValueTypeState(state, is, handler); - break; - case IterativeParsingObjectInitialState: - case IterativeParsingObjectContentState: - r = TransitFromObjectStates(state, is, handler); - break; - case IterativeParsingArrayInitialState: - case IterativeParsingArrayContentState: - r = TransitFromArrayStates(state, is, handler); - break; - case IterativeParsingFinishState: - // Any token appears after finish state leads to error state. - r = IterativeParsingErrorState; - RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); - break; - } + RAPIDJSON_ASSERT(false); + return IterativeParsingErrorState; - return r; + case IterativeParsingFinishState: + is.Take(); + return dst; + + case IterativeParsingErrorState: + return dst; + + case IterativeParsingObjectInitialState: + case IterativeParsingArrayInitialState: + is.Take(); + // Push current state. + *stack_.template Push(1) = src; + // Initialize and push the member/element count. + *stack_.template Push(1) = 0; + return dst; + + case IterativeParsingMemberState: + return dst; + + case IterativeParsingElementState: + if (token == IterativeParsingLeftBracketToken || token == IterativeParsingLeftCurlyBracketToken) { + return dst; + } + else { + + } + + case IterativeParsingObjectDelimiterState: + case IterativeParsingArrayDelimiterState: + is.Take(); + // Update member/element count. + *stack_.template Top() = *stack_.template Top() + 1; + return dst; + + case IterativeParsingObjectFinishState: + is.Take(); + // Get member count. + c = *stack_.template Pop(1); + if (src == IterativeParsingMemberState) + ++c; + // Restore the state. + n = *stack_.template Pop(1); + // Transit to Finish state if this is the topmost scope. + if (n == IterativeParsingStartState) + n = IterativeParsingFinishState; + // Call handler + handler.EndObject(c); + return n; + + case IterativeParsingArrayFinishState: + is.Take(); + // Get element count. + c = *stack_.template Pop(1); + if (src == IterativeParsingElementState) + ++c; + // Restore the state. + n = *stack_.template Pop(1); + // Transit to Finish state if this is the topmost scope. + if (n == IterativeParsingStartState) + n = IterativeParsingFinishState; + // Call handler + handler.EndArray(c); + return n; + + default: + RAPIDJSON_ASSERT(false); + return IterativeParsingErrorState; + } } template @@ -963,7 +1017,14 @@ private: SkipWhitespace(is); while (is.Peek() != '\0' && state != IterativeParsingErrorState) { - state = Transit(state, is, handler); + IterativeParsingToken t = GuessToken(is.Peek()); + IterativeParsingState n = Transit(state, t); + + if (Advance(state, t, n, is, handler)) + state = n; + else + break; + SkipWhitespace(is); } diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index cf4ca67..4dd9815 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -706,210 +706,6 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { EXPECT_FALSE(reader.HasParseError()); } -TEST(Reader, IterativeParsing) { - StringStream json("[1,true,false,null,\"string\",{\"array\":[1]}]"); - Reader reader; - BaseReaderHandler<> handler; - - Reader::IterativeParsingState r; - - // [ - r = reader.Transit( - Reader::IterativeParsingStartState, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayInitialState, r); - - // 1 - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(0, *reader.stack_.template Top()); // element count - - // , - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(1, *reader.stack_.template Top()); // element count - - // true - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(1, *reader.stack_.template Top()); // element count - - // , - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(2, *reader.stack_.template Top()); // element count - - // false - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(2, *reader.stack_.template Top()); // element count - - // , - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(3, *reader.stack_.template Top()); // element count - - // null - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(3, *reader.stack_.template Top()); // element count - - // , - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(4, *reader.stack_.template Top()); // element count - - // "string" - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(4, *reader.stack_.template Top()); // element count - - // , - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(5, *reader.stack_.template Top()); // element count - - // { - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingObjectInitialState, r); - EXPECT_EQ(0, *reader.stack_.template Top()); // member count - - // "array":[ - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayInitialState, r); - EXPECT_EQ(0, *reader.stack_.template Top()); // element count - - // 1 - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(0, *reader.stack_.template Top()); // element count - - // ] - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingObjectContentState, r); - EXPECT_EQ(0, *reader.stack_.template Top()); // member count - - // } - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayContentState, r); - EXPECT_EQ(5, *reader.stack_.template Top()); // element count - - // ] - r = reader.Transit( - r, - json, - handler); - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingFinishState, r); -} - -struct CountHandler : BaseReaderHandler<> { - void EndObject(SizeType memberCount) { - MemberCount = memberCount; - } - - void EndArray(SizeType elementCount) { - ElementCount = elementCount; - } - - SizeType MemberCount; - SizeType ElementCount; -}; - -TEST(Reader, IterativeParsing_MemberCounting) { - StringStream json("{\"array\": []}"); - Reader reader; - CountHandler handler; - - reader.IterativeParse(json, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(1, handler.MemberCount); -} - -TEST(Reader, IterativeParsing_ElementCounting) { - StringStream json("[{}]"); - Reader reader; - CountHandler handler; - - reader.IterativeParse(json, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(1, handler.ElementCount); -} - -TEST(Reader, IterativeParsing_AfterFinishState) { - StringStream json("{}, {}"); - Reader reader; - BaseReaderHandler<> handler; - - reader.IterativeParse(json, handler); - - EXPECT_TRUE(reader.HasParseError()); -} - #ifdef __GNUC__ #pragma GCC diagnostic pop #endif From 91aaa346e4e4cf23c3b6c4b36475b5467a54a7c7 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Tue, 8 Jul 2014 22:54:22 +0800 Subject: [PATCH 06/20] Finish the new implementation of state machine. But not been unittested. --- include/rapidjson/reader.h | 162 +++++++++++++++++++++++++++---------- 1 file changed, 119 insertions(+), 43 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 3de6f9f..0f4ae3e 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -762,14 +762,16 @@ private: // Object states IterativeParsingObjectInitialState, - IterativeParsingMemberState, - IterativeParsingObjectDelimiterState, + IterativeParsingMemberKeyState, + IterativeParsingKeyValueDelimiterState, + IterativeParsingMemberValueState, + IterativeParsingMemberDelimiterState, IterativeParsingObjectFinishState, // Array states IterativeParsingArrayInitialState, IterativeParsingElementState, - IterativeParsingArrayDelimiterState, + IterativeParsingElementDelimiterState, IterativeParsingArrayFinishState, cIterativeParsingStateCount @@ -784,9 +786,9 @@ private: IterativeParsingRightCurlyBracketToken, IterativeParsingCommaToken, + IterativeParsingColonToken, - IterativeParsingQuotesToken, - + IterativeParsingStringToken, IterativeParsingFalseToken, IterativeParsingTrueToken, IterativeParsingNullToken, @@ -802,7 +804,8 @@ private: case '{': return IterativeParsingLeftCurlyBracketToken; case '}': return IterativeParsingRightCurlyBracketToken; case ',': return IterativeParsingCommaToken; - case '"': return IterativeParsingQuotesToken; + case ':': return IterativeParsingColonToken; + case '"': return IterativeParsingStringToken; case 'f': return IterativeParsingFalseToken; case 't': return IterativeParsingTrueToken; case 'n': return IterativeParsingNullToken; @@ -815,12 +818,13 @@ private: static const IterativeParsingState G[cIterativeParsingStateCount][cIterativeParsingTokenCount] = { // Start { - IterativeParsingObjectInitialState, // Left bracket + IterativeParsingArrayInitialState, // Left bracket IterativeParsingErrorState, // Right bracket - IterativeParsingArrayInitialState, // Left curly bracket + IterativeParsingObjectInitialState, // Left curly bracket IterativeParsingErrorState, // Right curly bracket IterativeParsingErrorState, // Comma - IterativeParsingErrorState, // Quotes + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String IterativeParsingErrorState, // False IterativeParsingErrorState, // True IterativeParsingErrorState, // Null @@ -829,12 +833,14 @@ private: // Finish(sink state) { IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState }, // Error(sink state) { IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState }, // ObjectInitial { @@ -843,33 +849,64 @@ private: IterativeParsingErrorState, // Left curly bracket IterativeParsingObjectFinishState, // Right curly bracket IterativeParsingErrorState, // Comma - IterativeParsingMemberState, // Quotes + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String IterativeParsingErrorState, // False IterativeParsingErrorState, // True IterativeParsingErrorState, // Null IterativeParsingErrorState // Number }, - // Member + // MemberKey { IterativeParsingErrorState, // Left bracket IterativeParsingErrorState, // Right bracket IterativeParsingErrorState, // Left curly bracket - IterativeParsingObjectFinishState, // Right curly bracket - IterativeParsingObjectDelimiterState, // Comma - IterativeParsingErrorState, // Quotes + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingKeyValueDelimiterState, // Colon + IterativeParsingErrorState, // String IterativeParsingErrorState, // False IterativeParsingErrorState, // True IterativeParsingErrorState, // Null IterativeParsingErrorState // Number }, - // ObjectDelimiter + // KeyValueDelimiter + { + IterativeParsingArrayInitialState, // Left bracket(push MemberValue state) + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberValueState, // String + IterativeParsingMemberValueState, // False + IterativeParsingMemberValueState, // True + IterativeParsingMemberValueState, // Null + IterativeParsingMemberValueState // Number + }, + // MemberValue + { + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingMemberDelimiterState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number + }, + // MemberDelimiter { IterativeParsingErrorState, // Left bracket IterativeParsingErrorState, // Right bracket IterativeParsingErrorState, // Left curly bracket IterativeParsingErrorState, // Right curly bracket IterativeParsingErrorState, // Comma - IterativeParsingMemberState, // Quotes + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String IterativeParsingErrorState, // False IterativeParsingErrorState, // True IterativeParsingErrorState, // Null @@ -878,16 +915,18 @@ private: // ObjectFinish(sink state) { IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState }, // ArrayInitial { - IterativeParsingElementState, // Left bracket + IterativeParsingArrayInitialState, // Left bracket(push Element state) IterativeParsingArrayFinishState, // Right bracket - IterativeParsingElementState, // Left curly bracket + IterativeParsingObjectInitialState, // Left curly bracket(push Element state) IterativeParsingErrorState, // Right curly bracket IterativeParsingErrorState, // Comma - IterativeParsingElementState, // Quotes + IterativeParsingErrorState, // Colon + IterativeParsingElementState, // String IterativeParsingElementState, // False IterativeParsingElementState, // True IterativeParsingElementState, // Null @@ -899,21 +938,23 @@ private: IterativeParsingArrayFinishState, // Right bracket IterativeParsingErrorState, // Left curly bracket IterativeParsingErrorState, // Right curly bracket - IterativeParsingArrayDelimiterState, // Comma - IterativeParsingErrorState, // Quotes + IterativeParsingElementDelimiterState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingErrorState, // String IterativeParsingErrorState, // False IterativeParsingErrorState, // True IterativeParsingErrorState, // Null IterativeParsingErrorState // Number }, - // ArrayDelimiter + // ElementDelimiter { - IterativeParsingElementState, // Left bracket + IterativeParsingArrayInitialState, // Left bracket(push Element state) IterativeParsingErrorState, // Right bracket - IterativeParsingElementState, // Left curly bracket + IterativeParsingObjectInitialState, // Left curly bracket(push Element state) IterativeParsingErrorState, // Right curly bracket IterativeParsingErrorState, // Comma - IterativeParsingElementState, // Quotes + IterativeParsingErrorState, // Colon + IterativeParsingElementState, // String IterativeParsingElementState, // False IterativeParsingElementState, // True IterativeParsingElementState, // Null @@ -922,15 +963,16 @@ private: // ArrayFinish(sink state) { IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState } }; // End of G return G[state][token]; } - // Store or process informations during state transition. - // May return a new state. + // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). + // May return a new state on state pop. template IterativeParsingState Advance(IterativeParsingState src, IterativeParsingToken token, IterativeParsingState dst, InputStream& is, Handler& handler) { int c = 0; @@ -951,25 +993,57 @@ private: case IterativeParsingObjectInitialState: case IterativeParsingArrayInitialState: is.Take(); + // Push the state(Element or MemeberValue) if we are nested in another array or value of member. + // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop. + n = src; + if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState) + n = IterativeParsingElementState; + else if (src == IterativeParsingKeyValueDelimiterState) + n = IterativeParsingMemberValueState; // Push current state. - *stack_.template Push(1) = src; + *stack_.template Push(1) = n; // Initialize and push the member/element count. *stack_.template Push(1) = 0; + // Call handler + if (dst == IterativeParsingObjectInitialState) + handler.StartObject(); + else + handler.StartArray(); return dst; - case IterativeParsingMemberState: + case IterativeParsingMemberKeyState: + ParseString(is, handler); + if (HasParseError()) + return IterativeParsingErrorState; + else + return dst; + + case IterativeParsingKeyValueDelimiterState: + if (token == IterativeParsingColonToken) { + is.Take(); + return dst; + } + else + return IterativeParsingErrorState; + + case IterativeParsingMemberValueState: + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; + } return dst; case IterativeParsingElementState: - if (token == IterativeParsingLeftBracketToken || token == IterativeParsingLeftCurlyBracketToken) { - return dst; + // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state. + ParseValue(is, handler); + if (HasParseError()) { + return IterativeParsingErrorState; } - else { + return dst; - } - - case IterativeParsingObjectDelimiterState: - case IterativeParsingArrayDelimiterState: + case IterativeParsingMemberDelimiterState: + case IterativeParsingElementDelimiterState: is.Take(); // Update member/element count. *stack_.template Top() = *stack_.template Top() + 1; @@ -979,7 +1053,8 @@ private: is.Take(); // Get member count. c = *stack_.template Pop(1); - if (src == IterativeParsingMemberState) + // If the object is not empty, count the last member. + if (src == IterativeParsingMemberValueState) ++c; // Restore the state. n = *stack_.template Pop(1); @@ -994,6 +1069,7 @@ private: is.Take(); // Get element count. c = *stack_.template Pop(1); + // If the array is not empty, count the last element. if (src == IterativeParsingElementState) ++c; // Restore the state. @@ -1016,11 +1092,11 @@ private: IterativeParsingState state = IterativeParsingStartState; SkipWhitespace(is); - while (is.Peek() != '\0' && state != IterativeParsingErrorState) { + while (is.Peek() != '\0') { IterativeParsingToken t = GuessToken(is.Peek()); IterativeParsingState n = Transit(state, t); - if (Advance(state, t, n, is, handler)) + if ((n = Advance(state, t, n, is, handler)) != IterativeParsingErrorState) state = n; else break; From f6235b216079cf1c6b7aebf45afbf73281d96284 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Thu, 10 Jul 2014 00:00:56 +0800 Subject: [PATCH 07/20] Add basic error handling. --- include/rapidjson/reader.h | 47 +++++++++++++++++++++++++++++++----- test/unittest/readertest.cpp | 24 ++++++++++++++++++ 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 0f4ae3e..815fbba 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -813,7 +813,7 @@ private: } } - IterativeParsingState Transit(IterativeParsingState state, IterativeParsingToken token) { + IterativeParsingState Deduce(IterativeParsingState state, IterativeParsingToken token) { // current state x one lookahead token -> new state static const IterativeParsingState G[cIterativeParsingStateCount][cIterativeParsingTokenCount] = { // Start @@ -974,7 +974,7 @@ private: // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). // May return a new state on state pop. template - IterativeParsingState Advance(IterativeParsingState src, IterativeParsingToken token, IterativeParsingState dst, InputStream& is, Handler& handler) { + IterativeParsingState Transit(IterativeParsingState src, IterativeParsingToken token, IterativeParsingState dst, InputStream& is, Handler& handler) { int c = 0; IterativeParsingState n; @@ -1087,6 +1087,35 @@ private: } } + template + void HandleError(IterativeParsingState src, InputStream& is) { + if (HasParseError()) { + // Error flag has been set. + return; + } + + if (src == IterativeParsingStartState && is.Peek() == '\0') + RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); + + else if (src == IterativeParsingStartState) + RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotObjectOrArray, is.Tell()); + + else if (src == IterativeParsingFinishState) + RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); + + else if (src == IterativeParsingObjectInitialState || src == IterativeParsingMemberDelimiterState) + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); + + else if (src == IterativeParsingMemberKeyState) + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); + + else if (src == IterativeParsingMemberValueState) + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); + + else if (src == IterativeParsingElementState) + RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); + } + template bool IterativeParse(InputStream& is, Handler& handler) { IterativeParsingState state = IterativeParsingStartState; @@ -1094,16 +1123,22 @@ private: SkipWhitespace(is); while (is.Peek() != '\0') { IterativeParsingToken t = GuessToken(is.Peek()); - IterativeParsingState n = Transit(state, t); + IterativeParsingState n = Deduce(state, t); + IterativeParsingState d = Transit(state, t, n, is, handler); - if ((n = Advance(state, t, n, is, handler)) != IterativeParsingErrorState) - state = n; - else + if (d == IterativeParsingErrorState) { + HandleError(state, is); break; + } + state = d; SkipWhitespace(is); } + // Handle the end of file. + if (state != IterativeParsingFinishState) + HandleError(state, is); + stack_.Clear(); return state == IterativeParsingFinishState; } diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 4dd9815..05c434f 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -706,6 +706,30 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { EXPECT_FALSE(reader.HasParseError()); } +#define TESTERRORHANDLING(text, errorCode)\ + {\ + StringStream json(text);\ + BaseReaderHandler<> handler;\ + Reader reader;\ + reader.IterativeParse(json, handler);\ + EXPECT_TRUE(reader.HasParseError());\ + EXPECT_EQ(errorCode, reader.GetParseErrorCode());\ + } + +TEST(Reader, IterativeParsing_ErrorHandling) { + TESTERRORHANDLING("{\"a\": a}", kParseErrorValueInvalid); + + TESTERRORHANDLING("", kParseErrorDocumentEmpty); + TESTERRORHANDLING("1", kParseErrorDocumentRootNotObjectOrArray); + TESTERRORHANDLING("{}{}", kParseErrorDocumentRootNotSingular); + + TESTERRORHANDLING("{1}", kParseErrorObjectMissName); + TESTERRORHANDLING("{\"a\", 1}", kParseErrorObjectMissColon); + TESTERRORHANDLING("{\"a\"}", kParseErrorObjectMissColon); + TESTERRORHANDLING("{\"a\": 1", kParseErrorObjectMissCommaOrCurlyBracket); + TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket); +} + #ifdef __GNUC__ #pragma GCC diagnostic pop #endif From 692904b77b94c41ffc65406d42822a642cf20ef7 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Thu, 10 Jul 2014 19:49:43 +0800 Subject: [PATCH 08/20] Handle all unspecific parsing errors. --- include/rapidjson/error/en.h | 2 ++ include/rapidjson/reader.h | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/rapidjson/error/en.h b/include/rapidjson/error/en.h index 81637ee..c96fa53 100644 --- a/include/rapidjson/error/en.h +++ b/include/rapidjson/error/en.h @@ -38,6 +38,8 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro case kParseErrorNumberMissFraction: return RAPIDJSON_ERROR_STRING("Miss fraction part in number."); case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number."); + case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error."); + default: return RAPIDJSON_ERROR_STRING("Unknown error."); } diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 815fbba..372016f 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -74,7 +74,9 @@ enum ParseErrorCode { kParseErrorNumberTooBig, //!< Number too big to be stored in double. kParseErrorNumberMissFraction, //!< Miss fraction part in number. - kParseErrorNumberMissExponent //!< Miss exponent in number. + kParseErrorNumberMissExponent, //!< Miss exponent in number. + + kParseErrorUnspecificSyntaxError //!< General syntax error. }; /////////////////////////////////////////////////////////////////////////////// @@ -1114,6 +1116,9 @@ private: else if (src == IterativeParsingElementState) RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); + + else + RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); } template From e3c4b3391515e89b98eb8e950b0420c17ad45d80 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Thu, 10 Jul 2014 22:27:25 +0800 Subject: [PATCH 09/20] Add unittests for state transition. --- include/rapidjson/reader.h | 8 +- test/unittest/readertest.cpp | 141 +++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 4 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 372016f..9c83775 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -799,7 +799,7 @@ private: cIterativeParsingTokenCount }; - IterativeParsingToken GuessToken(Ch c) { + IterativeParsingToken Tokenize(Ch c) { switch (c) { case '[': return IterativeParsingLeftBracketToken; case ']': return IterativeParsingRightBracketToken; @@ -815,7 +815,7 @@ private: } } - IterativeParsingState Deduce(IterativeParsingState state, IterativeParsingToken token) { + IterativeParsingState Predict(IterativeParsingState state, IterativeParsingToken token) { // current state x one lookahead token -> new state static const IterativeParsingState G[cIterativeParsingStateCount][cIterativeParsingTokenCount] = { // Start @@ -1127,8 +1127,8 @@ private: SkipWhitespace(is); while (is.Peek() != '\0') { - IterativeParsingToken t = GuessToken(is.Peek()); - IterativeParsingState n = Deduce(state, t); + IterativeParsingToken t = Tokenize(is.Peek()); + IterativeParsingState n = Predict(state, t); IterativeParsingState d = Transit(state, t, n, is, handler); if (d == IterativeParsingErrorState) { diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 05c434f..b4107d1 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -730,6 +730,147 @@ TEST(Reader, IterativeParsing_ErrorHandling) { TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket); } +// Test iterative parsing. +template > +struct IterativeParsingReaderHandler { + typedef typename Encoding::Ch Ch; + + IterativeParsingReaderHandler() : + IsNullTriggered(false), + IsBoolTriggered(false), + IsIntTriggered(false), + IsUintTriggered(false), + IsInt64Triggered(false), + IsUint64Triggered(false), + IsDoubleTriggered(false), + IsStringTriggered(false), + IsStartObjectTriggered(false), + IsEndObjectTriggered(false), + MemberCount(0), + IsStartArrayTriggered(false), + ElementCount(0) { + } + + bool IsNullTriggered; + void Null() { IsNullTriggered = true; } + + bool IsBoolTriggered; + void Bool(bool) { IsBoolTriggered = true; } + + bool IsIntTriggered; + void Int(int) { IsIntTriggered = true; } + + bool IsUintTriggered; + void Uint(unsigned) { IsUintTriggered = true; } + + bool IsInt64Triggered; + void Int64(int64_t) { IsInt64Triggered = true; } + + bool IsUint64Triggered; + void Uint64(uint64_t) { IsUint64Triggered = true; } + + bool IsDoubleTriggered; + void Double(double) { IsDoubleTriggered = true; } + + bool IsStringTriggered; + void String(const Ch*, SizeType, bool) { IsStringTriggered = true; } + + bool IsStartObjectTriggered; + void StartObject() { IsStartObjectTriggered = true; } + + bool IsEndObjectTriggered; + SizeType MemberCount; + void EndObject(SizeType c) { IsEndObjectTriggered = true; MemberCount = c; } + + bool IsStartArrayTriggered; + void StartArray() { IsStartArrayTriggered = true; } + + bool IsEndArrayTriggered; + SizeType ElementCount; + void EndArray(SizeType c) { IsEndArrayTriggered = true; ElementCount = c; } +}; + +TEST(Reader, IterativeParsing_StateTransition_Start) { + // Start->ArrayInitial + { + IterativeParsingReaderHandler<> handler; + Reader reader; + StringStream is("["); + + Reader::IterativeParsingState n = reader.Predict(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftBracketToken); + Reader::IterativeParsingState d = reader.Transit(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftBracketToken, n, is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingArrayInitialState, d); + EXPECT_TRUE(handler.IsStartArrayTriggered); + } + + // Start->ObjectInitial + { + IterativeParsingReaderHandler<> handler; + Reader reader; + StringStream is("{"); + + Reader::IterativeParsingState n = reader.Predict(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftCurlyBracketToken); + Reader::IterativeParsingState d = reader.Transit(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftCurlyBracketToken, n, is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingObjectInitialState, d); + EXPECT_TRUE(handler.IsStartObjectTriggered); + } +} + +TEST(Reader, IterativeParsing_StateTransition_ObjectInitial) { + // ObjectInitial -> ObjectFinish -> Finish + { + IterativeParsingReaderHandler<> handler; + Reader reader; + StringStream is("{}"); + + Reader::IterativeParsingState s = reader.Transit( + Reader::IterativeParsingStartState, + Reader::IterativeParsingLeftCurlyBracketToken, + Reader::IterativeParsingObjectInitialState, + is, handler); + + EXPECT_EQ(Reader::IterativeParsingObjectInitialState, s); + Reader::IterativeParsingState d = reader.Transit( + s, + Reader::IterativeParsingRightCurlyBracketToken, + Reader::IterativeParsingObjectFinishState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingFinishState, d); + EXPECT_TRUE(handler.IsEndObjectTriggered); + EXPECT_EQ(0, handler.MemberCount); + } + + // ObjectInitial -> MemberKey + { + IterativeParsingReaderHandler<> handler; + Reader reader; + StringStream is("{\"key\""); + + Reader::IterativeParsingState s = reader.Transit( + Reader::IterativeParsingStartState, + Reader::IterativeParsingLeftCurlyBracketToken, + Reader::IterativeParsingObjectInitialState, + is, handler); + + EXPECT_EQ(Reader::IterativeParsingObjectInitialState, s); + Reader::IterativeParsingState d = reader.Transit( + s, + Reader::IterativeParsingStringToken, + Reader::IterativeParsingMemberKeyState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingMemberKeyState, d); + EXPECT_TRUE(handler.IsStringTriggered); + } +} + #ifdef __GNUC__ #pragma GCC diagnostic pop #endif From 55e97eada0ee32dce748b4ab0f57f63e6f88cd46 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Thu, 10 Jul 2014 22:31:12 +0800 Subject: [PATCH 10/20] Add two basic performance tests. --- test/perftest/rapidjsontest.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/perftest/rapidjsontest.cpp b/test/perftest/rapidjsontest.cpp index ea9d144..5cd452e 100644 --- a/test/perftest/rapidjsontest.cpp +++ b/test/perftest/rapidjsontest.cpp @@ -76,6 +76,24 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) { } } +TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterative_DummyHandler)) { + for (size_t i = 0; i < kTrialCount; i++) { + StringStream s(json_); + BaseReaderHandler<> h; + Reader reader; + EXPECT_TRUE(reader.Parse(s, h)); + } +} + +TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseIterativeInsitu_DummyHandler)) { + for (size_t i = 0; i < kTrialCount; i++) { + StringStream s(json_); + BaseReaderHandler<> h; + Reader reader; + EXPECT_TRUE(reader.Parse(s, h)); + } +} + TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) { for (size_t i = 0; i < kTrialCount; i++) { StringStream s(json_); From 70d01cc5f35c0f633a3835eb8737a377c672bc26 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Fri, 11 Jul 2014 11:43:09 +0800 Subject: [PATCH 11/20] Complete unittests for state transition. --- test/unittest/readertest.cpp | 352 ++++++++++++++++++++++++++++++++--- 1 file changed, 328 insertions(+), 24 deletions(-) diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index b4107d1..119cea3 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -790,12 +790,26 @@ struct IterativeParsingReaderHandler { void EndArray(SizeType c) { IsEndArrayTriggered = true; ElementCount = c; } }; +#define ITERATIVE_PARSING_PREPARE_STATE_UNTIL(text, pos)\ + IterativeParsingReaderHandler<> handler;\ + Reader reader;\ + StringStream is(text);\ + \ + Reader::IterativeParsingState state = Reader::IterativeParsingStartState;\ + SkipWhitespace(is);\ + while (is.Tell() != pos) {\ + Reader::IterativeParsingToken token = reader.Tokenize(is.Peek());\ + Reader::IterativeParsingState n = reader.Predict(state, token);\ + state = reader.Transit(state, token, n, is, handler);\ + SkipWhitespace(is);\ + } + TEST(Reader, IterativeParsing_StateTransition_Start) { // Start->ArrayInitial { IterativeParsingReaderHandler<> handler; Reader reader; - StringStream is("["); + StringStream is("[]"); Reader::IterativeParsingState n = reader.Predict(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftBracketToken); Reader::IterativeParsingState d = reader.Transit(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftBracketToken, n, is, handler); @@ -809,7 +823,7 @@ TEST(Reader, IterativeParsing_StateTransition_Start) { { IterativeParsingReaderHandler<> handler; Reader reader; - StringStream is("{"); + StringStream is("{}"); Reader::IterativeParsingState n = reader.Predict(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftCurlyBracketToken); Reader::IterativeParsingState d = reader.Transit(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftCurlyBracketToken, n, is, handler); @@ -823,19 +837,11 @@ TEST(Reader, IterativeParsing_StateTransition_Start) { TEST(Reader, IterativeParsing_StateTransition_ObjectInitial) { // ObjectInitial -> ObjectFinish -> Finish { - IterativeParsingReaderHandler<> handler; - Reader reader; - StringStream is("{}"); + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{}", 1); - Reader::IterativeParsingState s = reader.Transit( - Reader::IterativeParsingStartState, - Reader::IterativeParsingLeftCurlyBracketToken, - Reader::IterativeParsingObjectInitialState, - is, handler); - - EXPECT_EQ(Reader::IterativeParsingObjectInitialState, s); + EXPECT_EQ(Reader::IterativeParsingObjectInitialState, state); Reader::IterativeParsingState d = reader.Transit( - s, + state, Reader::IterativeParsingRightCurlyBracketToken, Reader::IterativeParsingObjectFinishState, is, handler); @@ -848,19 +854,11 @@ TEST(Reader, IterativeParsing_StateTransition_ObjectInitial) { // ObjectInitial -> MemberKey { - IterativeParsingReaderHandler<> handler; - Reader reader; - StringStream is("{\"key\""); + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"key\": 1}", 1); - Reader::IterativeParsingState s = reader.Transit( - Reader::IterativeParsingStartState, - Reader::IterativeParsingLeftCurlyBracketToken, - Reader::IterativeParsingObjectInitialState, - is, handler); - - EXPECT_EQ(Reader::IterativeParsingObjectInitialState, s); + EXPECT_EQ(Reader::IterativeParsingObjectInitialState, state); Reader::IterativeParsingState d = reader.Transit( - s, + state, Reader::IterativeParsingStringToken, Reader::IterativeParsingMemberKeyState, is, handler); @@ -871,6 +869,312 @@ TEST(Reader, IterativeParsing_StateTransition_ObjectInitial) { } } +TEST(Reader, IterativeParsing_StateTransition_MemberKey) { + // MemberKey -> KeyValueDelimiter + { + IterativeParsingReaderHandler<> handler; + Reader reader; + StringStream is(":"); + + Reader::IterativeParsingState d = reader.Transit( + Reader::IterativeParsingMemberKeyState, + Reader::IterativeParsingColonToken, + Reader::IterativeParsingKeyValueDelimiterState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingKeyValueDelimiterState, d); + } +} + +#define TEST_COMPOUNDTYPE_INITIAL_STATE_AUX(type, text, src, token, popstate, statesuffix, eventsuffix)\ + {\ + IterativeParsingReaderHandler<> handler;\ + Reader reader;\ + StringStream is(text);\ + \ + Reader::IterativeParsingState d = reader.Transit(\ + src,\ + token,\ + Reader::IterativeParsing ## type ## statesuffix,\ + is, handler);\ + \ + EXPECT_FALSE(reader.HasParseError());\ + EXPECT_EQ(Reader::IterativeParsing ## type ## statesuffix, d);\ + EXPECT_TRUE(handler.IsStart ## type ## eventsuffix);\ + \ + int c = *reader.stack_.template Pop(1);\ + EXPECT_EQ(0, c);\ + Reader::IterativeParsingState s = *reader.stack_.template Pop(1);\ + EXPECT_EQ(popstate, s);\ + } + +#define TEST_COMPOUNDTYPE_INITIAL_STATE(type, text, src, token, popstate)\ + TEST_COMPOUNDTYPE_INITIAL_STATE_AUX(type, text, src, token, popstate, InitialState, Triggered) + +#define TEST_PLAIN_VALUE_STATE_AUX(text, src, token, dst, event, eventsuffix)\ + {\ + IterativeParsingReaderHandler<> handler;\ + Reader reader;\ + StringStream is(text);\ + \ + Reader::IterativeParsingState d = reader.Transit(\ + src,\ + token,\ + dst,\ + is, handler);\ + \ + EXPECT_FALSE(reader.HasParseError());\ + EXPECT_EQ(dst, d);\ + EXPECT_TRUE(handler. Is ## event ## eventsuffix);\ + } + +#define TEST_PLAIN_VALUE_STATE(text, src, token, dst, event)\ + TEST_PLAIN_VALUE_STATE_AUX(text, src, token, dst, event, Triggered) + +TEST(Reader, IterativeParsing_StateTransition_KeyValueDelimiter) { + // KeyValueDelimiter -> ArrayInitial + TEST_COMPOUNDTYPE_INITIAL_STATE( + Array, + "[", + Reader::IterativeParsingKeyValueDelimiterState, + Reader::IterativeParsingLeftBracketToken, + Reader::IterativeParsingMemberValueState); + + // KeyValueDelimiter -> ObjectInitial + TEST_COMPOUNDTYPE_INITIAL_STATE( + Object, + "{", + Reader::IterativeParsingKeyValueDelimiterState, + Reader::IterativeParsingLeftCurlyBracketToken, + Reader::IterativeParsingMemberValueState); + + // KeyValueDelimiter -> MemberValue + TEST_PLAIN_VALUE_STATE( + "123,", + Reader::IterativeParsingKeyValueDelimiterState, + Reader::IterativeParsingNumberToken, + Reader::IterativeParsingMemberValueState, + Uint); +} + +TEST(Reader, IterativeParsing_StateTransition_MemberValue) { + // MemberValue -> ObjectFinish + { + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"k\": 123}", 9); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingMemberValueState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, + Reader::IterativeParsingRightCurlyBracketToken, + Reader::IterativeParsingObjectFinishState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingFinishState, d); + EXPECT_TRUE(handler.IsEndObjectTriggered); + EXPECT_EQ(1, handler.MemberCount); + } + + // MemberValue -> MemberDelimiter + { + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"k\": 1, \"e\": 2}", 7); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingMemberValueState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, + Reader::IterativeParsingCommaToken, + Reader::IterativeParsingMemberDelimiterState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingMemberDelimiterState, d); + } +} + +TEST(Reader, IterativeParsing_StateTransition_MemberDelimiter) { + // MemberDelimiter -> MemberKey + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"k\": 1, \"e\": 2}", 9); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingMemberDelimiterState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, + Reader::IterativeParsingStringToken, + Reader::IterativeParsingMemberKeyState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingMemberKeyState, d); +} + +TEST(Reader, IterativeParsing_StateTransition_ArrayInitial) { + // ArrayInitial -> ArrayInitial + { + TEST_COMPOUNDTYPE_INITIAL_STATE( + Array, + "[]", + Reader::IterativeParsingArrayInitialState, + Reader::IterativeParsingLeftBracketToken, + Reader::IterativeParsingElementState); + } + + // ArrayInitial -> ArrayFinish -> Finish + { + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[]", 1); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingArrayInitialState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, + Reader::IterativeParsingRightBracketToken, + Reader::IterativeParsingArrayFinishState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingFinishState, d); + EXPECT_TRUE(handler.IsEndArrayTriggered); + EXPECT_EQ(0, handler.ElementCount); + } + + // ArrayInitial -> ObjectInitial + { + TEST_COMPOUNDTYPE_INITIAL_STATE( + Object, + "{}", + Reader::IterativeParsingStartState, + Reader::IterativeParsingLeftCurlyBracketToken, + Reader::IterativeParsingStartState); + } + + // ArrayInitial -> Element + { + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1]", 1); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingArrayInitialState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, + Reader::IterativeParsingNumberToken, + Reader::IterativeParsingElementState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingElementState, d); + } +} + +TEST(Reader, IterativeParsing_StateTransition_Element) { + // Element -> ArrayFinish -> Finish + { + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1]", 2); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingElementState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, + Reader::IterativeParsingRightBracketToken, + Reader::IterativeParsingArrayFinishState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingFinishState, d); + EXPECT_TRUE(handler.IsEndArrayTriggered); + EXPECT_EQ(1, handler.ElementCount); + } + + // Element -> ElementDelimiter + { + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, 2]", 2); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingElementState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, + Reader::IterativeParsingCommaToken, + Reader::IterativeParsingElementDelimiterState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, d); + } +} + +TEST(Reader, IterativeParsing_StateTransition_ElementDelimiter) { + // ElementDelimiter -> ArrayInitial + { + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, [1]]", 4); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, + Reader::IterativeParsingLeftBracketToken, + Reader::IterativeParsingArrayInitialState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingArrayInitialState, d); + + int c = *reader.stack_.template Pop(1); + EXPECT_EQ(0, c); + Reader::IterativeParsingState s = *reader.stack_.template Pop(1); + EXPECT_EQ(Reader::IterativeParsingElementState, s); + } + + // ElementDelimiter -> ObjectInitial + { + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, [1]]", 4); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, + Reader::IterativeParsingLeftBracketToken, + Reader::IterativeParsingArrayInitialState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingArrayInitialState, d); + + int c = *reader.stack_.template Pop(1); + EXPECT_EQ(0, c); + Reader::IterativeParsingState s = *reader.stack_.template Pop(1); + EXPECT_EQ(Reader::IterativeParsingElementState, s); + } + + // ElementDelimiter -> Element + { + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, 2]", 4); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, + Reader::IterativeParsingNumberToken, + Reader::IterativeParsingElementState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingElementState, d); + + int c = *reader.stack_.template Pop(1); + EXPECT_EQ(1, c); + } +} + #ifdef __GNUC__ #pragma GCC diagnostic pop #endif From 3038a7855e5a61b28133e2d9b72cecde95f2fb5b Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Fri, 11 Jul 2014 16:03:38 +0800 Subject: [PATCH 12/20] Revise unittests: reset the handler before the transition which we are going to test. --- include/rapidjson/error/en.h | 2 +- include/rapidjson/reader.h | 2 +- test/unittest/readertest.cpp | 44 ++++++++++++++++++++++++------------ 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/include/rapidjson/error/en.h b/include/rapidjson/error/en.h index c96fa53..a40c5c7 100644 --- a/include/rapidjson/error/en.h +++ b/include/rapidjson/error/en.h @@ -38,7 +38,7 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro case kParseErrorNumberMissFraction: return RAPIDJSON_ERROR_STRING("Miss fraction part in number."); case kParseErrorNumberMissExponent: return RAPIDJSON_ERROR_STRING("Miss exponent in number."); - case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error."); + case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error."); default: return RAPIDJSON_ERROR_STRING("Unknown error."); diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 11141e9..fe7bedb 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -82,7 +82,7 @@ enum ParseErrorCode { kParseErrorNumberMissFraction, //!< Miss fraction part in number. kParseErrorNumberMissExponent, //!< Miss exponent in number. - kParseErrorUnspecificSyntaxError //!< General syntax error. + kParseErrorUnspecificSyntaxError //!< Unspecific syntax error. }; /////////////////////////////////////////////////////////////////////////////// diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 705e2c2..98747a0 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -735,20 +735,24 @@ template > struct IterativeParsingReaderHandler { typedef typename Encoding::Ch Ch; - IterativeParsingReaderHandler() : - IsNullTriggered(false), - IsBoolTriggered(false), - IsIntTriggered(false), - IsUintTriggered(false), - IsInt64Triggered(false), - IsUint64Triggered(false), - IsDoubleTriggered(false), - IsStringTriggered(false), - IsStartObjectTriggered(false), - IsEndObjectTriggered(false), - MemberCount(0), - IsStartArrayTriggered(false), - ElementCount(0) { + IterativeParsingReaderHandler() { + Reset(); + } + + void Reset() { + IsNullTriggered = false; + IsBoolTriggered = false; + IsIntTriggered = false; + IsUintTriggered = false; + IsInt64Triggered = false; + IsUint64Triggered = false; + IsDoubleTriggered = false; + IsStringTriggered = false; + IsStartObjectTriggered = false; + IsEndObjectTriggered = false; + MemberCount = 0; + IsStartArrayTriggered = false; + ElementCount = 0; } bool IsNullTriggered; @@ -838,6 +842,7 @@ TEST(Reader, IterativeParsing_StateTransition_ObjectInitial) { // ObjectInitial -> ObjectFinish -> Finish { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{}", 1); + handler.Reset(); EXPECT_EQ(Reader::IterativeParsingObjectInitialState, state); Reader::IterativeParsingState d = reader.Transit( @@ -855,6 +860,7 @@ TEST(Reader, IterativeParsing_StateTransition_ObjectInitial) { // ObjectInitial -> MemberKey { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"key\": 1}", 1); + handler.Reset(); EXPECT_EQ(Reader::IterativeParsingObjectInitialState, state); Reader::IterativeParsingState d = reader.Transit( @@ -962,6 +968,7 @@ TEST(Reader, IterativeParsing_StateTransition_MemberValue) { // MemberValue -> ObjectFinish { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"k\": 123}", 9); + handler.Reset(); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(Reader::IterativeParsingMemberValueState, state); @@ -981,6 +988,7 @@ TEST(Reader, IterativeParsing_StateTransition_MemberValue) { // MemberValue -> MemberDelimiter { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"k\": 1, \"e\": 2}", 7); + handler.Reset(); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(Reader::IterativeParsingMemberValueState, state); @@ -999,6 +1007,7 @@ TEST(Reader, IterativeParsing_StateTransition_MemberValue) { TEST(Reader, IterativeParsing_StateTransition_MemberDelimiter) { // MemberDelimiter -> MemberKey ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"k\": 1, \"e\": 2}", 9); + handler.Reset(); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(Reader::IterativeParsingMemberDelimiterState, state); @@ -1027,6 +1036,7 @@ TEST(Reader, IterativeParsing_StateTransition_ArrayInitial) { // ArrayInitial -> ArrayFinish -> Finish { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[]", 1); + handler.Reset(); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(Reader::IterativeParsingArrayInitialState, state); @@ -1056,6 +1066,7 @@ TEST(Reader, IterativeParsing_StateTransition_ArrayInitial) { // ArrayInitial -> Element { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1]", 1); + handler.Reset(); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(Reader::IterativeParsingArrayInitialState, state); @@ -1075,6 +1086,7 @@ TEST(Reader, IterativeParsing_StateTransition_Element) { // Element -> ArrayFinish -> Finish { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1]", 2); + handler.Reset(); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(Reader::IterativeParsingElementState, state); @@ -1094,6 +1106,7 @@ TEST(Reader, IterativeParsing_StateTransition_Element) { // Element -> ElementDelimiter { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, 2]", 2); + handler.Reset(); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(Reader::IterativeParsingElementState, state); @@ -1113,6 +1126,7 @@ TEST(Reader, IterativeParsing_StateTransition_ElementDelimiter) { // ElementDelimiter -> ArrayInitial { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, [1]]", 4); + handler.Reset(); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, state); @@ -1135,6 +1149,7 @@ TEST(Reader, IterativeParsing_StateTransition_ElementDelimiter) { // ElementDelimiter -> ObjectInitial { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, [1]]", 4); + handler.Reset(); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, state); @@ -1157,6 +1172,7 @@ TEST(Reader, IterativeParsing_StateTransition_ElementDelimiter) { // ElementDelimiter -> Element { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, 2]", 4); + handler.Reset(); EXPECT_FALSE(reader.HasParseError()); EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, state); From face7240fe4994269ad5558fa8b78db8577a8daf Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Fri, 11 Jul 2014 16:33:32 +0800 Subject: [PATCH 13/20] Revise unittests of compound value(array or object)'s initial state transition. --- test/unittest/readertest.cpp | 54 +++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 98747a0..72dfece 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -809,7 +809,7 @@ struct IterativeParsingReaderHandler { } TEST(Reader, IterativeParsing_StateTransition_Start) { - // Start->ArrayInitial + // Start -> ArrayInitial { IterativeParsingReaderHandler<> handler; Reader reader; @@ -823,7 +823,7 @@ TEST(Reader, IterativeParsing_StateTransition_Start) { EXPECT_TRUE(handler.IsStartArrayTriggered); } - // Start->ObjectInitial + // Start -> ObjectInitial { IterativeParsingReaderHandler<> handler; Reader reader; @@ -965,7 +965,7 @@ TEST(Reader, IterativeParsing_StateTransition_KeyValueDelimiter) { } TEST(Reader, IterativeParsing_StateTransition_MemberValue) { - // MemberValue -> ObjectFinish + // MemberValue -> ObjectFinish -> Finish { ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"k\": 123}", 9); handler.Reset(); @@ -1025,12 +1025,26 @@ TEST(Reader, IterativeParsing_StateTransition_MemberDelimiter) { TEST(Reader, IterativeParsing_StateTransition_ArrayInitial) { // ArrayInitial -> ArrayInitial { - TEST_COMPOUNDTYPE_INITIAL_STATE( - Array, - "[]", - Reader::IterativeParsingArrayInitialState, + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[[1]]", 1); + handler.Reset(); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingArrayInitialState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, Reader::IterativeParsingLeftBracketToken, - Reader::IterativeParsingElementState); + Reader::IterativeParsingArrayInitialState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingArrayInitialState, d); + // Check initialized element count. + int c = *reader.stack_.template Pop(1); + EXPECT_EQ(0, c); + // Check pushed state. + Reader::IterativeParsingState s = *reader.stack_.template Pop(1); + EXPECT_EQ(Reader::IterativeParsingElementState, s); } // ArrayInitial -> ArrayFinish -> Finish @@ -1055,12 +1069,26 @@ TEST(Reader, IterativeParsing_StateTransition_ArrayInitial) { // ArrayInitial -> ObjectInitial { - TEST_COMPOUNDTYPE_INITIAL_STATE( - Object, - "{}", - Reader::IterativeParsingStartState, + ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[{\"k\": 1}]", 1); + handler.Reset(); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingArrayInitialState, state); + + Reader::IterativeParsingState d = reader.Transit( + state, Reader::IterativeParsingLeftCurlyBracketToken, - Reader::IterativeParsingStartState); + Reader::IterativeParsingObjectInitialState, + is, handler); + + EXPECT_FALSE(reader.HasParseError()); + EXPECT_EQ(Reader::IterativeParsingObjectInitialState, d); + // Check initialized element count. + int c = *reader.stack_.template Pop(1); + EXPECT_EQ(0, c); + // Check pushed state. + Reader::IterativeParsingState s = *reader.stack_.template Pop(1); + EXPECT_EQ(Reader::IterativeParsingElementState, s); } // ArrayInitial -> Element From 46e89dad0d973e6dbda335b7f18b599042f6ba5b Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Tue, 15 Jul 2014 00:51:34 +0800 Subject: [PATCH 14/20] Add unittests for kParserErrorTermination; Fix bugs in last merge. --- include/rapidjson/reader.h | 16 ++++--- test/unittest/readertest.cpp | 88 ++++++++++++++++++++++++++++++++---- 2 files changed, 87 insertions(+), 17 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 454cb6e..bdfd826 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -1023,7 +1023,6 @@ private: return IterativeParsingErrorState; case IterativeParsingFinishState: - is.Take(); return dst; case IterativeParsingErrorState: @@ -1031,7 +1030,6 @@ private: case IterativeParsingObjectInitialState: case IterativeParsingArrayInitialState: - is.Take(); // Push the state(Element or MemeberValue) if we are nested in another array or value of member. // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop. n = src; @@ -1053,8 +1051,10 @@ private: RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); return IterativeParsingErrorState; } - else + else { + is.Take(); return dst; + } case IterativeParsingMemberKeyState: ParseString(is, handler); @@ -1095,7 +1095,6 @@ private: return dst; case IterativeParsingObjectFinishState: - is.Take(); // Get member count. c = *stack_.template Pop(1); // If the object is not empty, count the last member. @@ -1113,11 +1112,12 @@ private: RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); return IterativeParsingErrorState; } - else + else { + is.Take(); return n; + } case IterativeParsingArrayFinishState: - is.Take(); // Get element count. c = *stack_.template Pop(1); // If the array is not empty, count the last element. @@ -1135,8 +1135,10 @@ private: RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell()); return IterativeParsingErrorState; } - else + else { + is.Take(); return n; + } default: RAPIDJSON_ASSERT(false); diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 45cbeaa..0bd1b13 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -707,7 +707,7 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { EXPECT_FALSE(reader.HasParseError()); } -#define TESTERRORHANDLING(text, errorCode)\ +#define TESTERRORHANDLING(text, errorCode, offset)\ {\ StringStream json(text);\ BaseReaderHandler<> handler;\ @@ -715,20 +715,21 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { reader.IterativeParse(json, handler);\ EXPECT_TRUE(reader.HasParseError());\ EXPECT_EQ(errorCode, reader.GetParseErrorCode());\ + EXPECT_EQ(offset, reader.GetErrorOffset());\ } TEST(Reader, IterativeParsing_ErrorHandling) { - TESTERRORHANDLING("{\"a\": a}", kParseErrorValueInvalid); + TESTERRORHANDLING("{\"a\": a}", kParseErrorValueInvalid, 6); - TESTERRORHANDLING("", kParseErrorDocumentEmpty); - TESTERRORHANDLING("1", kParseErrorDocumentRootNotObjectOrArray); - TESTERRORHANDLING("{}{}", kParseErrorDocumentRootNotSingular); + TESTERRORHANDLING("", kParseErrorDocumentEmpty, 0); + TESTERRORHANDLING("1", kParseErrorDocumentRootNotObjectOrArray, 0); + TESTERRORHANDLING("{}{}", kParseErrorDocumentRootNotSingular, 2); - TESTERRORHANDLING("{1}", kParseErrorObjectMissName); - TESTERRORHANDLING("{\"a\", 1}", kParseErrorObjectMissColon); - TESTERRORHANDLING("{\"a\"}", kParseErrorObjectMissColon); - TESTERRORHANDLING("{\"a\": 1", kParseErrorObjectMissCommaOrCurlyBracket); - TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket); + TESTERRORHANDLING("{1}", kParseErrorObjectMissName, 1); + TESTERRORHANDLING("{\"a\", 1}", kParseErrorObjectMissColon, 4); + TESTERRORHANDLING("{\"a\"}", kParseErrorObjectMissColon, 4); + TESTERRORHANDLING("{\"a\": 1", kParseErrorObjectMissCommaOrCurlyBracket, 7); + TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket, 3); } // Test iterative parsing. @@ -1220,6 +1221,73 @@ TEST(Reader, IterativeParsing_StateTransition_ElementDelimiter) { } } +// Test iterative parsing on kParseErrorTermination. +struct HandlerTerminateAtStartObject : public IterativeParsingReaderHandler<> { + bool StartObject() { return false; } +}; + +struct HandlerTerminateAtStartArray : public IterativeParsingReaderHandler<> { + bool StartArray() { return false; } +}; + +struct HandlerTerminateAtEndObject : public IterativeParsingReaderHandler<> { + bool EndObject(SizeType) { return false; } +}; + +struct HandlerTerminateAtEndArray : public IterativeParsingReaderHandler<> { + bool EndArray(SizeType) { return false; } +}; + +TEST(Reader, IterativeParsing_ShortCircuit) { + { + HandlerTerminateAtStartObject handler; + Reader reader; + StringStream is("[1, {}]"); + + ParseResult r = reader.Parse(is, handler); + + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorTermination, r.Code()); + EXPECT_EQ(4, r.Offset()); + } + + { + HandlerTerminateAtStartArray handler; + Reader reader; + StringStream is("{\"a\": []}"); + + ParseResult r = reader.Parse(is, handler); + + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorTermination, r.Code()); + EXPECT_EQ(6, r.Offset()); + } + + { + HandlerTerminateAtEndObject handler; + Reader reader; + StringStream is("[1, {}]"); + + ParseResult r = reader.Parse(is, handler); + + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorTermination, r.Code()); + EXPECT_EQ(5, r.Offset()); + } + + { + HandlerTerminateAtEndArray handler; + Reader reader; + StringStream is("{\"a\": []}"); + + ParseResult r = reader.Parse(is, handler); + + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorTermination, r.Code()); + EXPECT_EQ(7, r.Offset()); + } +} + #ifdef __GNUC__ RAPIDJSON_DIAG_POP #endif From 1f53c6c041eee6321eb18b5dc7f685f80cd62599 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Tue, 15 Jul 2014 14:16:06 +0800 Subject: [PATCH 15/20] Implement stack size limitation for iterative parsing. --- include/rapidjson/document.h | 45 ++++++++++++++++++++------------- include/rapidjson/error/en.h | 1 + include/rapidjson/error/error.h | 3 ++- include/rapidjson/reader.h | 34 ++++++++++++++++++++++--- test/unittest/readertest.cpp | 12 +++++++++ 5 files changed, 72 insertions(+), 23 deletions(-) diff --git a/include/rapidjson/document.h b/include/rapidjson/document.h index 4448600..d94cd62 100644 --- a/include/rapidjson/document.h +++ b/include/rapidjson/document.h @@ -1221,12 +1221,13 @@ public: \tparam SourceEncoding Encoding of input stream \tparam InputStream Type of input stream, implementing Stream concept \param is Input stream to be parsed. + \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ template - GenericDocument& ParseStream(InputStream& is) { + GenericDocument& ParseStream(InputStream& is, size_t limit = 0) { ValueType::SetNull(); // Remove existing root if exist - GenericReader reader(&GetAllocator()); + GenericReader reader(limit, &GetAllocator()); ClearStackOnExit scope(*this); parseResult_ = reader.template Parse(is, *this); if (parseResult_) { @@ -1240,21 +1241,23 @@ public: /*! \tparam parseFlags Combination of \ref ParseFlag. \tparam InputStream Type of input stream, implementing Stream concept \param is Input stream to be parsed. + \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ template - GenericDocument& ParseStream(InputStream& is) { - return ParseStream(is); + GenericDocument& ParseStream(InputStream& is, size_t limit = 0) { + return ParseStream(is, limit); } //! Parse JSON text from an input stream (with \ref kParseDefaultFlags) /*! \tparam InputStream Type of input stream, implementing Stream concept \param is Input stream to be parsed. + \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ template - GenericDocument& ParseStream(InputStream& is) { - return ParseStream(is); + GenericDocument& ParseStream(InputStream& is, size_t limit = 0) { + return ParseStream(is, limit); } //!@} @@ -1265,30 +1268,33 @@ public: /*! \tparam parseFlags Combination of \ref ParseFlag. \tparam SourceEncoding Transcoding from input Encoding \param str Mutable zero-terminated string to be parsed. + \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ template - GenericDocument& ParseInsitu(Ch* str) { + GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) { GenericInsituStringStream s(str); - return ParseStream(s); + return ParseStream(s, limit); } //! Parse JSON text from a mutable string /*! \tparam parseFlags Combination of \ref ParseFlag. \param str Mutable zero-terminated string to be parsed. + \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ template - GenericDocument& ParseInsitu(Ch* str) { - return ParseInsitu(str); + GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) { + return ParseInsitu(str, limit); } //! Parse JSON text from a mutable string (with \ref kParseDefaultFlags) /*! \param str Mutable zero-terminated string to be parsed. + \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. \return The document itself for fluent API. */ - GenericDocument& ParseInsitu(Ch* str) { - return ParseInsitu(str); + GenericDocument& ParseInsitu(Ch* str, size_t limit = 0) { + return ParseInsitu(str, limit); } //!@} @@ -1299,28 +1305,31 @@ public: /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). \tparam SourceEncoding Transcoding from input Encoding \param str Read-only zero-terminated string to be parsed. + \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. */ template - GenericDocument& Parse(const Ch* str) { + GenericDocument& Parse(const Ch* str, size_t limit = 0) { RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag)); GenericStringStream s(str); - return ParseStream(s); + return ParseStream(s, limit); } //! Parse JSON text from a read-only string /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag). \param str Read-only zero-terminated string to be parsed. + \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. */ template - GenericDocument& Parse(const Ch* str) { - return Parse(str); + GenericDocument& Parse(const Ch* str, size_t limit = 0) { + return Parse(str, limit); } //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags) /*! \param str Read-only zero-terminated string to be parsed. + \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. */ - GenericDocument& Parse(const Ch* str) { - return Parse(str); + GenericDocument& Parse(const Ch* str, size_t limit = 0) { + return Parse(str, limit); } //!@} diff --git a/include/rapidjson/error/en.h b/include/rapidjson/error/en.h index e9120c5..e9a5d1d 100644 --- a/include/rapidjson/error/en.h +++ b/include/rapidjson/error/en.h @@ -40,6 +40,7 @@ inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErro case kParseErrorTermination: return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error."); case kParseErrorUnspecificSyntaxError: return RAPIDJSON_ERROR_STRING("Unspecific syntax error."); + case kParseErrorStackSizeLimitExceeded: return RAPIDJSON_ERROR_STRING("Parsing stack size limit is exceeded."); default: return RAPIDJSON_ERROR_STRING("Unknown error."); diff --git a/include/rapidjson/error/error.h b/include/rapidjson/error/error.h index e5c2b1b..a47dfaa 100644 --- a/include/rapidjson/error/error.h +++ b/include/rapidjson/error/error.h @@ -59,7 +59,8 @@ enum ParseErrorCode { kParseErrorNumberMissExponent, //!< Miss exponent in number. kParseErrorTermination, //!< Parsing was terminated. - kParseErrorUnspecificSyntaxError //!< Unspecific syntax error. + kParseErrorUnspecificSyntaxError, //!< Unspecific syntax error. + kParseErrorStackSizeLimitExceeded //!< Parsing stack size limit is exceeded. }; //! Result of parsing (wraps ParseErrorCode) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index bdfd826..95a2996 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -272,10 +272,11 @@ public: typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type //! Constructor. - /*! \param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) + /*! \param limit Parsing stack size limit(in bytes). Pass 0 means no limit. + \param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) */ - GenericReader(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), parseResult_() {} + GenericReader(size_t limit = 0, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), kStackSizeLimit(limit), parseResult_() {} //! Parse JSON text. /*! \tparam parseFlags Combination of \ref ParseFlag. @@ -569,8 +570,14 @@ private: if (c == '\\') { // Escape is.Take(); Ch e = is.Take(); - if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) + if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) { + if (!(parseFlags & kParseInsituFlag)) { + if (!IsStackSpaceSufficient(1)) { + RAPIDJSON_PARSE_ERROR(kParseErrorStackSizeLimitExceeded, is.Tell() - 1); + } + } os.Put(escape[(unsigned char)e]); + } else if (e == 'u') { // Unicode unsigned codepoint = ParseHex4(is); if (codepoint >= 0xD800 && codepoint <= 0xDBFF) { @@ -589,6 +596,11 @@ private: } else if (c == '"') { // Closing double quote is.Take(); + if (!(parseFlags & kParseInsituFlag)) { + if (!IsStackSpaceSufficient(1)) { + RAPIDJSON_PARSE_ERROR(kParseErrorStackSizeLimitExceeded, is.Tell() - 1); + } + } os.Put('\0'); // null-terminate the string return; } @@ -1038,8 +1050,16 @@ private: else if (src == IterativeParsingKeyValueDelimiterState) n = IterativeParsingMemberValueState; // Push current state. + if (!IsStackSpaceSufficient(1)) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStackSizeLimitExceeded, is.Tell()); + return IterativeParsingErrorState; + } *stack_.template Push(1) = n; // Initialize and push the member/element count. + if (!IsStackSpaceSufficient(1)) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStackSizeLimitExceeded, is.Tell()); + return IterativeParsingErrorState; + } *stack_.template Push(1) = 0; // Call handler if (dst == IterativeParsingObjectInitialState) @@ -1206,7 +1226,13 @@ private: return parseResult_; } - static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. + template + bool IsStackSpaceSufficient(size_t count) const { + return kStackSizeLimit == 0 || (stack_.GetSize() + sizeof(T) * count <= kStackSizeLimit); + } + + static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. + const size_t kStackSizeLimit; //!< Stack size limit(in bytes). A value of 0 means no limit. internal::Stack stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. ParseResult parseResult_; }; // class GenericReader diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 0bd1b13..9c668cd 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -1288,6 +1288,18 @@ TEST(Reader, IterativeParsing_ShortCircuit) { } } +TEST(Reader, IterativeParsing_LimitStackSize) { + BaseReaderHandler<> handler; + Reader reader(20); + StringStream is("[[[]]]"); + + ParseResult r = reader.Parse(is, handler); + + EXPECT_TRUE(reader.HasParseError()); + EXPECT_EQ(kParseErrorStackSizeLimitExceeded, r.Code()); + EXPECT_EQ(2, r.Offset()); +} + #ifdef __GNUC__ RAPIDJSON_DIAG_POP #endif From b22a89bf3f5786733f757de354f11d8aeee23788 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Wed, 16 Jul 2014 22:09:50 +0800 Subject: [PATCH 16/20] Reduce times of stack size check; reduce transition table size. --- include/rapidjson/reader.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 95a2996..154aad4 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -572,7 +572,7 @@ private: Ch e = is.Take(); if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) { if (!(parseFlags & kParseInsituFlag)) { - if (!IsStackSpaceSufficient(1)) { + if (!CheckStackSpaceQuota(sizeof(Ch))) { RAPIDJSON_PARSE_ERROR(kParseErrorStackSizeLimitExceeded, is.Tell() - 1); } } @@ -597,7 +597,7 @@ private: else if (c == '"') { // Closing double quote is.Take(); if (!(parseFlags & kParseInsituFlag)) { - if (!IsStackSpaceSufficient(1)) { + if (!CheckStackSpaceQuota(sizeof(Ch))) { RAPIDJSON_PARSE_ERROR(kParseErrorStackSizeLimitExceeded, is.Tell() - 1); } } @@ -865,7 +865,7 @@ private: IterativeParsingState Predict(IterativeParsingState state, IterativeParsingToken token) { // current state x one lookahead token -> new state - static const IterativeParsingState G[cIterativeParsingStateCount][cIterativeParsingTokenCount] = { + static const char G[cIterativeParsingStateCount][cIterativeParsingTokenCount] = { // Start { IterativeParsingArrayInitialState, // Left bracket @@ -1018,7 +1018,7 @@ private: } }; // End of G - return G[state][token]; + return (IterativeParsingState)G[state][token]; } // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). @@ -1049,17 +1049,14 @@ private: n = IterativeParsingElementState; else if (src == IterativeParsingKeyValueDelimiterState) n = IterativeParsingMemberValueState; - // Push current state. - if (!IsStackSpaceSufficient(1)) { + // Check stack space limit. + if (!CheckStackSpaceQuota(sizeof(IterativeParsingState) + sizeof(int))) { RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStackSizeLimitExceeded, is.Tell()); return IterativeParsingErrorState; } + // Push current state. *stack_.template Push(1) = n; // Initialize and push the member/element count. - if (!IsStackSpaceSufficient(1)) { - RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStackSizeLimitExceeded, is.Tell()); - return IterativeParsingErrorState; - } *stack_.template Push(1) = 0; // Call handler if (dst == IterativeParsingObjectInitialState) @@ -1226,9 +1223,8 @@ private: return parseResult_; } - template - bool IsStackSpaceSufficient(size_t count) const { - return kStackSizeLimit == 0 || (stack_.GetSize() + sizeof(T) * count <= kStackSizeLimit); + bool CheckStackSpaceQuota(size_t size) const { + return kStackSizeLimit == 0 || (stack_.GetSize() + size <= kStackSizeLimit); } static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. From c3d7d8b38af34d136356858edf9452abf262cd43 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Wed, 16 Jul 2014 23:10:16 +0800 Subject: [PATCH 17/20] Revise unittests: should not expose implementation details. --- test/unittest/readertest.cpp | 588 +++++++---------------------------- 1 file changed, 115 insertions(+), 473 deletions(-) diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 9c668cd..508fed4 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -707,16 +707,18 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { EXPECT_FALSE(reader.HasParseError()); } +// Test iterative parsing. + #define TESTERRORHANDLING(text, errorCode, offset)\ - {\ - StringStream json(text);\ - BaseReaderHandler<> handler;\ - Reader reader;\ - reader.IterativeParse(json, handler);\ - EXPECT_TRUE(reader.HasParseError());\ - EXPECT_EQ(errorCode, reader.GetParseErrorCode());\ - EXPECT_EQ(offset, reader.GetErrorOffset());\ - } +{\ + StringStream json(text); \ + BaseReaderHandler<> handler; \ + Reader reader; \ + reader.IterativeParse(json, handler); \ + EXPECT_TRUE(reader.HasParseError()); \ + EXPECT_EQ(errorCode, reader.GetParseErrorCode()); \ + EXPECT_EQ(offset, reader.GetErrorOffset()); \ +} TEST(Reader, IterativeParsing_ErrorHandling) { TESTERRORHANDLING("{\"a\": a}", kParseErrorValueInvalid, 6); @@ -732,492 +734,132 @@ TEST(Reader, IterativeParsing_ErrorHandling) { TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket, 3); } -// Test iterative parsing. template > struct IterativeParsingReaderHandler { typedef typename Encoding::Ch Ch; - IterativeParsingReaderHandler() { - Reset(); + const static int LOG_NULL = -1; + const static int LOG_BOOL = -2; + const static int LOG_INT = -3; + const static int LOG_UINT = -4; + const static int LOG_INT64 = -5; + const static int LOG_UINT64 = -6; + const static int LOG_DOUBLE = -7; + const static int LOG_STRING = -8; + const static int LOG_STARTOBJECT = -9; + const static int LOG_ENDOBJECT = -10; + const static int LOG_STARTARRAY = -11; + const static int LOG_ENDARRAY = -12; + + const static size_t LogCapacity = 256; + int Logs[LogCapacity]; + size_t LogCount; + + IterativeParsingReaderHandler() : LogCount(0) { } - void Reset() { - IsNullTriggered = false; - IsBoolTriggered = false; - IsIntTriggered = false; - IsUintTriggered = false; - IsInt64Triggered = false; - IsUint64Triggered = false; - IsDoubleTriggered = false; - IsStringTriggered = false; - IsStartObjectTriggered = false; - IsEndObjectTriggered = false; - MemberCount = 0; - IsStartArrayTriggered = false; - ElementCount = 0; + bool Null() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_NULL; return true; } + + bool Bool(bool) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_BOOL; return true; } + + bool Int(int) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT; return true; } + + bool Uint(unsigned) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT; return true; } + + bool Int64(int64_t) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_INT64; return true; } + + bool Uint64(uint64_t) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_UINT64; return true; } + + bool Double(double) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_DOUBLE; return true; } + + bool String(const Ch*, SizeType, bool) { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STRING; return true; } + + bool StartObject() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STARTOBJECT; return true; } + + bool EndObject(SizeType c) { + RAPIDJSON_ASSERT(LogCount < LogCapacity); + Logs[LogCount++] = LOG_ENDOBJECT; + Logs[LogCount++] = (int)c; + return true; } - bool IsNullTriggered; - bool Null() { IsNullTriggered = true; return true; } + bool StartArray() { RAPIDJSON_ASSERT(LogCount < LogCapacity); Logs[LogCount++] = LOG_STARTARRAY; return true; } - bool IsBoolTriggered; - bool Bool(bool) { IsBoolTriggered = true; return true; } - - bool IsIntTriggered; - bool Int(int) { IsIntTriggered = true; return true; } - - bool IsUintTriggered; - bool Uint(unsigned) { IsUintTriggered = true; return true; } - - bool IsInt64Triggered; - bool Int64(int64_t) { IsInt64Triggered = true; return true; } - - bool IsUint64Triggered; - bool Uint64(uint64_t) { IsUint64Triggered = true; return true; } - - bool IsDoubleTriggered; - bool Double(double) { IsDoubleTriggered = true; return true; } - - bool IsStringTriggered; - bool String(const Ch*, SizeType, bool) { IsStringTriggered = true; return true; } - - bool IsStartObjectTriggered; - bool StartObject() { IsStartObjectTriggered = true; return true; } - - bool IsEndObjectTriggered; - SizeType MemberCount; - bool EndObject(SizeType c) { IsEndObjectTriggered = true; MemberCount = c; return true; } - - bool IsStartArrayTriggered; - bool StartArray() { IsStartArrayTriggered = true; return true; } - - bool IsEndArrayTriggered; - SizeType ElementCount; - bool EndArray(SizeType c) { IsEndArrayTriggered = true; ElementCount = c; return true; } + bool EndArray(SizeType c) { + RAPIDJSON_ASSERT(LogCount < LogCapacity); + Logs[LogCount++] = LOG_ENDARRAY; + Logs[LogCount++] = (int)c; + return true; + } }; -#define ITERATIVE_PARSING_PREPARE_STATE_UNTIL(text, pos)\ - IterativeParsingReaderHandler<> handler;\ - Reader reader;\ - StringStream is(text);\ - \ - Reader::IterativeParsingState state = Reader::IterativeParsingStartState;\ - SkipWhitespace(is);\ - while (is.Tell() != pos) {\ - Reader::IterativeParsingToken token = reader.Tokenize(is.Peek());\ - Reader::IterativeParsingState n = reader.Predict(state, token);\ - state = reader.Transit(state, token, n, is, handler);\ - SkipWhitespace(is);\ - } - -TEST(Reader, IterativeParsing_StateTransition_Start) { - // Start -> ArrayInitial +TEST(Reader, IterativeParsing_General) { { - IterativeParsingReaderHandler<> handler; + StringStream is("[1, {\"k\": [1, 2]}, null, false, true, \"string\", 1.2]"); Reader reader; - StringStream is("[]"); - - Reader::IterativeParsingState n = reader.Predict(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftBracketToken); - Reader::IterativeParsingState d = reader.Transit(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftBracketToken, n, is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayInitialState, d); - EXPECT_TRUE(handler.IsStartArrayTriggered); - } - - // Start -> ObjectInitial - { IterativeParsingReaderHandler<> handler; + + ParseResult r = reader.IterativeParse(is, handler); + + EXPECT_FALSE(reader.HasParseError()); + + int e[] = { + handler.LOG_STARTARRAY, + handler.LOG_INT, + handler.LOG_STARTOBJECT, + handler.LOG_STRING, + handler.LOG_STARTARRAY, + handler.LOG_INT, + handler.LOG_INT, + handler.LOG_ENDARRAY, 2, + handler.LOG_ENDOBJECT, 1, + handler.LOG_NULL, + handler.LOG_BOOL, + handler.LOG_BOOL, + handler.LOG_STRING, + handler.LOG_DOUBLE, + handler.LOG_ENDARRAY, 7 + }; + + EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount); + + for (size_t i = 0; i < handler.LogCount; ++i) { + EXPECT_EQ(e[i], handler.Logs[i]) << "i = " << i; + } + } +} + +TEST(Reader, IterativeParsing_Count) { + { + StringStream is("[{}, {\"k\": 1}, [1], []]"); Reader reader; - StringStream is("{}"); - - Reader::IterativeParsingState n = reader.Predict(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftCurlyBracketToken); - Reader::IterativeParsingState d = reader.Transit(Reader::IterativeParsingStartState, Reader::IterativeParsingLeftCurlyBracketToken, n, is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingObjectInitialState, d); - EXPECT_TRUE(handler.IsStartObjectTriggered); - } -} - -TEST(Reader, IterativeParsing_StateTransition_ObjectInitial) { - // ObjectInitial -> ObjectFinish -> Finish - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{}", 1); - handler.Reset(); - - EXPECT_EQ(Reader::IterativeParsingObjectInitialState, state); - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingRightCurlyBracketToken, - Reader::IterativeParsingObjectFinishState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingFinishState, d); - EXPECT_TRUE(handler.IsEndObjectTriggered); - EXPECT_EQ(0, handler.MemberCount); - } - - // ObjectInitial -> MemberKey - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"key\": 1}", 1); - handler.Reset(); - - EXPECT_EQ(Reader::IterativeParsingObjectInitialState, state); - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingStringToken, - Reader::IterativeParsingMemberKeyState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingMemberKeyState, d); - EXPECT_TRUE(handler.IsStringTriggered); - } -} - -TEST(Reader, IterativeParsing_StateTransition_MemberKey) { - // MemberKey -> KeyValueDelimiter - { IterativeParsingReaderHandler<> handler; - Reader reader; - StringStream is(":"); - Reader::IterativeParsingState d = reader.Transit( - Reader::IterativeParsingMemberKeyState, - Reader::IterativeParsingColonToken, - Reader::IterativeParsingKeyValueDelimiterState, - is, handler); + ParseResult r = reader.IterativeParse(is, handler); EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingKeyValueDelimiterState, d); - } -} -#define TEST_COMPOUNDTYPE_INITIAL_STATE_AUX(type, text, src, token, popstate, statesuffix, eventsuffix)\ - {\ - IterativeParsingReaderHandler<> handler;\ - Reader reader;\ - StringStream is(text);\ - \ - Reader::IterativeParsingState d = reader.Transit(\ - src,\ - token,\ - Reader::IterativeParsing ## type ## statesuffix,\ - is, handler);\ - \ - EXPECT_FALSE(reader.HasParseError());\ - EXPECT_EQ(Reader::IterativeParsing ## type ## statesuffix, d);\ - EXPECT_TRUE(handler.IsStart ## type ## eventsuffix);\ - \ - int c = *reader.stack_.template Pop(1);\ - EXPECT_EQ(0, c);\ - Reader::IterativeParsingState s = *reader.stack_.template Pop(1);\ - EXPECT_EQ(popstate, s);\ - } + int e[] = { + handler.LOG_STARTARRAY, + handler.LOG_STARTOBJECT, + handler.LOG_ENDOBJECT, 0, + handler.LOG_STARTOBJECT, + handler.LOG_STRING, + handler.LOG_INT, + handler.LOG_ENDOBJECT, 1, + handler.LOG_STARTARRAY, + handler.LOG_INT, + handler.LOG_ENDARRAY, 1, + handler.LOG_STARTARRAY, + handler.LOG_ENDARRAY, 0, + handler.LOG_ENDARRAY, 4 + }; -#define TEST_COMPOUNDTYPE_INITIAL_STATE(type, text, src, token, popstate)\ - TEST_COMPOUNDTYPE_INITIAL_STATE_AUX(type, text, src, token, popstate, InitialState, Triggered) + EXPECT_EQ(sizeof(e) / sizeof(int), handler.LogCount); -#define TEST_PLAIN_VALUE_STATE_AUX(text, src, token, dst, event, eventsuffix)\ - {\ - IterativeParsingReaderHandler<> handler;\ - Reader reader;\ - StringStream is(text);\ - \ - Reader::IterativeParsingState d = reader.Transit(\ - src,\ - token,\ - dst,\ - is, handler);\ - \ - EXPECT_FALSE(reader.HasParseError());\ - EXPECT_EQ(dst, d);\ - EXPECT_TRUE(handler. Is ## event ## eventsuffix);\ - } - -#define TEST_PLAIN_VALUE_STATE(text, src, token, dst, event)\ - TEST_PLAIN_VALUE_STATE_AUX(text, src, token, dst, event, Triggered) - -TEST(Reader, IterativeParsing_StateTransition_KeyValueDelimiter) { - // KeyValueDelimiter -> ArrayInitial - TEST_COMPOUNDTYPE_INITIAL_STATE( - Array, - "[", - Reader::IterativeParsingKeyValueDelimiterState, - Reader::IterativeParsingLeftBracketToken, - Reader::IterativeParsingMemberValueState); - - // KeyValueDelimiter -> ObjectInitial - TEST_COMPOUNDTYPE_INITIAL_STATE( - Object, - "{", - Reader::IterativeParsingKeyValueDelimiterState, - Reader::IterativeParsingLeftCurlyBracketToken, - Reader::IterativeParsingMemberValueState); - - // KeyValueDelimiter -> MemberValue - TEST_PLAIN_VALUE_STATE( - "123,", - Reader::IterativeParsingKeyValueDelimiterState, - Reader::IterativeParsingNumberToken, - Reader::IterativeParsingMemberValueState, - Uint); -} - -TEST(Reader, IterativeParsing_StateTransition_MemberValue) { - // MemberValue -> ObjectFinish -> Finish - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"k\": 123}", 9); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingMemberValueState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingRightCurlyBracketToken, - Reader::IterativeParsingObjectFinishState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingFinishState, d); - EXPECT_TRUE(handler.IsEndObjectTriggered); - EXPECT_EQ(1, handler.MemberCount); - } - - // MemberValue -> MemberDelimiter - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"k\": 1, \"e\": 2}", 7); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingMemberValueState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingCommaToken, - Reader::IterativeParsingMemberDelimiterState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingMemberDelimiterState, d); - } -} - -TEST(Reader, IterativeParsing_StateTransition_MemberDelimiter) { - // MemberDelimiter -> MemberKey - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("{\"k\": 1, \"e\": 2}", 9); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingMemberDelimiterState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingStringToken, - Reader::IterativeParsingMemberKeyState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingMemberKeyState, d); -} - -TEST(Reader, IterativeParsing_StateTransition_ArrayInitial) { - // ArrayInitial -> ArrayInitial - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[[1]]", 1); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayInitialState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingLeftBracketToken, - Reader::IterativeParsingArrayInitialState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayInitialState, d); - // Check initialized element count. - int c = *reader.stack_.template Pop(1); - EXPECT_EQ(0, c); - // Check pushed state. - Reader::IterativeParsingState s = *reader.stack_.template Pop(1); - EXPECT_EQ(Reader::IterativeParsingElementState, s); - } - - // ArrayInitial -> ArrayFinish -> Finish - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[]", 1); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayInitialState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingRightBracketToken, - Reader::IterativeParsingArrayFinishState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingFinishState, d); - EXPECT_TRUE(handler.IsEndArrayTriggered); - EXPECT_EQ(0, handler.ElementCount); - } - - // ArrayInitial -> ObjectInitial - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[{\"k\": 1}]", 1); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayInitialState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingLeftCurlyBracketToken, - Reader::IterativeParsingObjectInitialState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingObjectInitialState, d); - // Check initialized element count. - int c = *reader.stack_.template Pop(1); - EXPECT_EQ(0, c); - // Check pushed state. - Reader::IterativeParsingState s = *reader.stack_.template Pop(1); - EXPECT_EQ(Reader::IterativeParsingElementState, s); - } - - // ArrayInitial -> Element - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1]", 1); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayInitialState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingNumberToken, - Reader::IterativeParsingElementState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingElementState, d); - } -} - -TEST(Reader, IterativeParsing_StateTransition_Element) { - // Element -> ArrayFinish -> Finish - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1]", 2); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingElementState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingRightBracketToken, - Reader::IterativeParsingArrayFinishState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingFinishState, d); - EXPECT_TRUE(handler.IsEndArrayTriggered); - EXPECT_EQ(1, handler.ElementCount); - } - - // Element -> ElementDelimiter - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, 2]", 2); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingElementState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingCommaToken, - Reader::IterativeParsingElementDelimiterState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, d); - } -} - -TEST(Reader, IterativeParsing_StateTransition_ElementDelimiter) { - // ElementDelimiter -> ArrayInitial - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, [1]]", 4); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingLeftBracketToken, - Reader::IterativeParsingArrayInitialState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayInitialState, d); - - int c = *reader.stack_.template Pop(1); - EXPECT_EQ(0, c); - Reader::IterativeParsingState s = *reader.stack_.template Pop(1); - EXPECT_EQ(Reader::IterativeParsingElementState, s); - } - - // ElementDelimiter -> ObjectInitial - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, [1]]", 4); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingLeftBracketToken, - Reader::IterativeParsingArrayInitialState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingArrayInitialState, d); - - int c = *reader.stack_.template Pop(1); - EXPECT_EQ(0, c); - Reader::IterativeParsingState s = *reader.stack_.template Pop(1); - EXPECT_EQ(Reader::IterativeParsingElementState, s); - } - - // ElementDelimiter -> Element - { - ITERATIVE_PARSING_PREPARE_STATE_UNTIL("[1, 2]", 4); - handler.Reset(); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingElementDelimiterState, state); - - Reader::IterativeParsingState d = reader.Transit( - state, - Reader::IterativeParsingNumberToken, - Reader::IterativeParsingElementState, - is, handler); - - EXPECT_FALSE(reader.HasParseError()); - EXPECT_EQ(Reader::IterativeParsingElementState, d); - - int c = *reader.stack_.template Pop(1); - EXPECT_EQ(1, c); + for (size_t i = 0; i < handler.LogCount; ++i) { + EXPECT_EQ(e[i], handler.Logs[i]) << "i = " << i; + } } } From 4a152646d9a5affe6018630afa5385ab460f8ccc Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Fri, 18 Jul 2014 00:12:04 +0800 Subject: [PATCH 18/20] Fix compilation error for gcc/clang. --- include/rapidjson/reader.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 99fb1a3..69d8cd8 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -277,7 +277,7 @@ public: \param allocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing) \param stackCapacity stack capacity in bytes for storing a single decoded string. (Only use for non-destructive parsing) */ - GenericReader(size_t limit = 0, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), kStackSizeLimit(limit), parseResult_() {} + GenericReader(size_t limit = 0, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(allocator, stackCapacity), kStackSizeLimit_(limit), parseResult_() {} //! Parse JSON text. /*! \tparam parseFlags Combination of \ref ParseFlag. @@ -1225,12 +1225,12 @@ private: } bool CheckStackSpaceQuota(size_t size) const { - return kStackSizeLimit == 0 || (stack_.GetSize() + size <= kStackSizeLimit); + return kStackSizeLimit_ == 0 || (stack_.GetSize() + size <= kStackSizeLimit_); } static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. - const size_t kStackSizeLimit; //!< Stack size limit(in bytes). A value of 0 means no limit. internal::Stack stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. + const size_t kStackSizeLimit_; //!< Stack size limit(in bytes). A value of 0 means no limit. ParseResult parseResult_; }; // class GenericReader From afa59455a5aae405949c0a516d65d1ca01e2057a Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Fri, 18 Jul 2014 00:36:31 +0800 Subject: [PATCH 19/20] Fix compilation error(unused variable, signed/unsigned comparison). --- test/unittest/readertest.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 508fed4..0edcaca 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -721,17 +721,17 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) { } TEST(Reader, IterativeParsing_ErrorHandling) { - TESTERRORHANDLING("{\"a\": a}", kParseErrorValueInvalid, 6); + TESTERRORHANDLING("{\"a\": a}", kParseErrorValueInvalid, 6u); - TESTERRORHANDLING("", kParseErrorDocumentEmpty, 0); - TESTERRORHANDLING("1", kParseErrorDocumentRootNotObjectOrArray, 0); - TESTERRORHANDLING("{}{}", kParseErrorDocumentRootNotSingular, 2); + TESTERRORHANDLING("", kParseErrorDocumentEmpty, 0u); + TESTERRORHANDLING("1", kParseErrorDocumentRootNotObjectOrArray, 0u); + TESTERRORHANDLING("{}{}", kParseErrorDocumentRootNotSingular, 2u); - TESTERRORHANDLING("{1}", kParseErrorObjectMissName, 1); - TESTERRORHANDLING("{\"a\", 1}", kParseErrorObjectMissColon, 4); - TESTERRORHANDLING("{\"a\"}", kParseErrorObjectMissColon, 4); - TESTERRORHANDLING("{\"a\": 1", kParseErrorObjectMissCommaOrCurlyBracket, 7); - TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket, 3); + TESTERRORHANDLING("{1}", kParseErrorObjectMissName, 1u); + TESTERRORHANDLING("{\"a\", 1}", kParseErrorObjectMissColon, 4u); + TESTERRORHANDLING("{\"a\"}", kParseErrorObjectMissColon, 4u); + TESTERRORHANDLING("{\"a\": 1", kParseErrorObjectMissCommaOrCurlyBracket, 7u); + TESTERRORHANDLING("[1 2 3]", kParseErrorArrayMissCommaOrSquareBracket, 3u); } template > @@ -801,6 +801,7 @@ TEST(Reader, IterativeParsing_General) { ParseResult r = reader.IterativeParse(is, handler); + EXPECT_FALSE(r.IsError()); EXPECT_FALSE(reader.HasParseError()); int e[] = { @@ -837,6 +838,7 @@ TEST(Reader, IterativeParsing_Count) { ParseResult r = reader.IterativeParse(is, handler); + EXPECT_FALSE(r.IsError()); EXPECT_FALSE(reader.HasParseError()); int e[] = { From 140dc0664ed406c8502589e41f675217e70a7342 Mon Sep 17 00:00:00 2001 From: thebusytypist Date: Fri, 18 Jul 2014 01:08:07 +0800 Subject: [PATCH 20/20] Fix compilation error(signed/unsigned comparison). --- test/unittest/readertest.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 0edcaca..1919e94 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -892,7 +892,7 @@ TEST(Reader, IterativeParsing_ShortCircuit) { EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorTermination, r.Code()); - EXPECT_EQ(4, r.Offset()); + EXPECT_EQ(4u, r.Offset()); } { @@ -904,7 +904,7 @@ TEST(Reader, IterativeParsing_ShortCircuit) { EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorTermination, r.Code()); - EXPECT_EQ(6, r.Offset()); + EXPECT_EQ(6u, r.Offset()); } { @@ -916,7 +916,7 @@ TEST(Reader, IterativeParsing_ShortCircuit) { EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorTermination, r.Code()); - EXPECT_EQ(5, r.Offset()); + EXPECT_EQ(5u, r.Offset()); } { @@ -928,7 +928,7 @@ TEST(Reader, IterativeParsing_ShortCircuit) { EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorTermination, r.Code()); - EXPECT_EQ(7, r.Offset()); + EXPECT_EQ(7u, r.Offset()); } } @@ -941,7 +941,7 @@ TEST(Reader, IterativeParsing_LimitStackSize) { EXPECT_TRUE(reader.HasParseError()); EXPECT_EQ(kParseErrorStackSizeLimitExceeded, r.Code()); - EXPECT_EQ(2, r.Offset()); + EXPECT_EQ(2u, r.Offset()); } #ifdef __GNUC__