WIP: refactor iterative parsing.

This commit is contained in:
thebusytypist 2014-07-08 02:19:35 +08:00
parent ebb9a250d8
commit 6b0df217a8
2 changed files with 244 additions and 387 deletions

View File

@ -752,209 +752,263 @@ private:
} }
} }
// Non-recursive parsing // Iterative Parsing
// States
enum IterativeParsingState { enum IterativeParsingState {
IterativeParsingStartState, IterativeParsingStartState = 0,
IterativeParsingFinishState, IterativeParsingFinishState,
IterativeParsingErrorState, IterativeParsingErrorState,
// Object states // Object states
IterativeParsingObjectInitialState, IterativeParsingObjectInitialState,
IterativeParsingObjectContentState, IterativeParsingMemberState,
IterativeParsingObjectDelimiterState,
IterativeParsingObjectFinishState,
// Array states // Array states
IterativeParsingArrayInitialState, IterativeParsingArrayInitialState,
IterativeParsingArrayContentState IterativeParsingElementState,
IterativeParsingArrayDelimiterState,
IterativeParsingArrayFinishState,
cIterativeParsingStateCount
}; };
template <typename InputStream, typename Handler> // Tokens
IterativeParsingState TransitToCompoundValueTypeState(IterativeParsingState state, InputStream& is, Handler& handler) { enum IterativeParsingToken {
// For compound value type(object and array), we should push the current state and start a new stack frame for this type. IterativeParsingLeftBracketToken = 0,
IterativeParsingState r = IterativeParsingErrorState; IterativeParsingRightBracketToken,
switch (is.Take()) { IterativeParsingLeftCurlyBracketToken,
case '{': IterativeParsingRightCurlyBracketToken,
handler.StartObject();
r = IterativeParsingObjectInitialState; IterativeParsingCommaToken,
// Push current state.
*stack_.template Push<IterativeParsingState>(1) = state; IterativeParsingQuotesToken,
// Initialize and push member count.
*stack_.template Push<int>(1) = 0; IterativeParsingFalseToken,
break; IterativeParsingTrueToken,
case '[': IterativeParsingNullToken,
handler.StartArray(); IterativeParsingNumberToken,
r = IterativeParsingArrayInitialState;
// Push current state. cIterativeParsingTokenCount
*stack_.template Push<IterativeParsingState>(1) = state; };
// Initialize and push element count.
*stack_.template Push<int>(1) = 0; IterativeParsingToken GuessToken(Ch c) {
break; switch (c) {
case '[': return IterativeParsingLeftBracketToken;
case ']': return IterativeParsingRightBracketToken;
case '{': return IterativeParsingLeftCurlyBracketToken;
case '}': return IterativeParsingRightCurlyBracketToken;
case ',': return IterativeParsingCommaToken;
case '"': return IterativeParsingQuotesToken;
case 'f': return IterativeParsingFalseToken;
case 't': return IterativeParsingTrueToken;
case 'n': return IterativeParsingNullToken;
default: return IterativeParsingNumberToken;
} }
return r;
} }
// Inner transition of object or array states(ObjectInitial->ObjectContent, ArrayInitial->ArrayContent). IterativeParsingState Transit(IterativeParsingState state, IterativeParsingToken token) {
template <unsigned parseFlags, typename InputStream, typename Handler> // current state x one lookahead token -> new state
IterativeParsingState TransitByValue(IterativeParsingState state, InputStream& is, Handler& handler) { static const IterativeParsingState G[cIterativeParsingStateCount][cIterativeParsingTokenCount] = {
RAPIDJSON_ASSERT( // Start
state == IterativeParsingObjectInitialState || {
state == IterativeParsingArrayInitialState || IterativeParsingObjectInitialState, // Left bracket
state == IterativeParsingObjectContentState || IterativeParsingErrorState, // Right bracket
state == IterativeParsingArrayContentState); IterativeParsingArrayInitialState, // Left curly bracket
IterativeParsingErrorState, // Right curly bracket
IterativeParsingState t; IterativeParsingErrorState, // Comma
if (state == IterativeParsingObjectInitialState) IterativeParsingErrorState, // Quotes
t = IterativeParsingObjectContentState; IterativeParsingErrorState, // False
else if (state == IterativeParsingArrayInitialState) IterativeParsingErrorState, // True
t = IterativeParsingArrayContentState; IterativeParsingErrorState, // Null
else IterativeParsingErrorState // Number
t = state; },
// Finish(sink state)
IterativeParsingState r = IterativeParsingErrorState; {
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
switch (is.Peek()) { IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState
// For plain value state is not changed. },
case 'n': ParseNull <parseFlags>(is, handler); r = t; break; // Error(sink state)
case 't': ParseTrue <parseFlags>(is, handler); r = t; break; {
case 'f': ParseFalse <parseFlags>(is, handler); r = t; break; IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
case '"': ParseString<parseFlags>(is, handler); r = t; break; IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState
// Transit when value is object or array. },
case '{': // ObjectInitial
case '[': {
r = TransitToCompoundValueTypeState(state, is, handler); break; IterativeParsingErrorState, // Left bracket
default: ParseNumber<parseFlags>(is, handler); r = t; break; IterativeParsingErrorState, // Right bracket
} IterativeParsingErrorState, // Left curly bracket
IterativeParsingObjectFinishState, // Right curly bracket
if (HasParseError()) IterativeParsingErrorState, // Comma
r = IterativeParsingErrorState; IterativeParsingMemberState, // Quotes
IterativeParsingErrorState, // False
return r; IterativeParsingErrorState, // True
} IterativeParsingErrorState, // Null
IterativeParsingErrorState // Number
// Transit from object related states(ObjectInitial, ObjectContent). },
template <unsigned parseFlags, typename InputStream, typename Handler> // Member
IterativeParsingState TransitFromObjectStates(IterativeParsingState state, InputStream& is, Handler& handler) { {
IterativeParsingState r = IterativeParsingErrorState; IterativeParsingErrorState, // Left bracket
IterativeParsingErrorState, // Right bracket
switch (is.Peek()) { IterativeParsingErrorState, // Left curly bracket
case '}': { IterativeParsingObjectFinishState, // Right curly bracket
is.Take(); IterativeParsingObjectDelimiterState, // Comma
// Get member count(include an extra one for non-empty object). IterativeParsingErrorState, // Quotes
int memberCount = *stack_.template Pop<int>(1); IterativeParsingErrorState, // False
if (state == IterativeParsingObjectContentState) IterativeParsingErrorState, // True
++memberCount; IterativeParsingErrorState, // Null
// Restore the parent stack frame. IterativeParsingErrorState // Number
r = *stack_.template Pop<IterativeParsingState>(1); },
// Transit to ContentState since a member/an element was just parsed. // ObjectDelimiter
if (r == IterativeParsingArrayInitialState) {
r = IterativeParsingArrayContentState; IterativeParsingErrorState, // Left bracket
else if (r == IterativeParsingObjectInitialState) IterativeParsingErrorState, // Right bracket
r = IterativeParsingObjectContentState; IterativeParsingErrorState, // Left curly bracket
// If we return to the topmost frame mark it finished. IterativeParsingErrorState, // Right curly bracket
if (r == IterativeParsingStartState) IterativeParsingErrorState, // Comma
r = IterativeParsingFinishState; IterativeParsingMemberState, // Quotes
handler.EndObject(memberCount); IterativeParsingErrorState, // False
break; IterativeParsingErrorState, // True
} IterativeParsingErrorState, // Null
case ',': IterativeParsingErrorState // Number
is.Take(); },
r = IterativeParsingObjectContentState; // ObjectFinish(sink state)
// Update member count. {
*stack_.template Top<int>() = *stack_.template Top<int>() + 1; IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
break; IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState
case '"': },
// Should be a key-value pair. // ArrayInitial
ParseString<parseFlags>(is, handler); {
if (HasParseError()) { IterativeParsingElementState, // Left bracket
r = IterativeParsingErrorState; IterativeParsingArrayFinishState, // Right bracket
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); IterativeParsingElementState, // Left curly bracket
break; IterativeParsingErrorState, // Right curly bracket
IterativeParsingErrorState, // Comma
IterativeParsingElementState, // Quotes
IterativeParsingElementState, // False
IterativeParsingElementState, // True
IterativeParsingElementState, // Null
IterativeParsingElementState // Number
},
// Element
{
IterativeParsingErrorState, // Left bracket
IterativeParsingArrayFinishState, // Right bracket
IterativeParsingErrorState, // Left curly bracket
IterativeParsingErrorState, // Right curly bracket
IterativeParsingArrayDelimiterState, // Comma
IterativeParsingErrorState, // Quotes
IterativeParsingErrorState, // False
IterativeParsingErrorState, // True
IterativeParsingErrorState, // Null
IterativeParsingErrorState // Number
},
// ArrayDelimiter
{
IterativeParsingElementState, // Left bracket
IterativeParsingErrorState, // Right bracket
IterativeParsingElementState, // Left curly bracket
IterativeParsingErrorState, // Right curly bracket
IterativeParsingErrorState, // Comma
IterativeParsingElementState, // Quotes
IterativeParsingElementState, // False
IterativeParsingElementState, // True
IterativeParsingElementState, // Null
IterativeParsingElementState // Number
},
// ArrayFinish(sink state)
{
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState
} }
}; // End of G
SkipWhitespace(is); return G[state][token];
if (is.Take() != ':') {
r = IterativeParsingErrorState;
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissColon, is.Tell());
break;
}
SkipWhitespace(is);
r = TransitByValue<parseFlags>(state, is, handler);
break;
default:
r = IterativeParsingErrorState;
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
break;
}
return r;
} }
// Transit from array related states(ArrayInitial, ArrayContent). // Store or process informations during state transition.
// May return a new state.
template <unsigned parseFlags, typename InputStream, typename Handler> template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParsingState TransitFromArrayStates(IterativeParsingState state, InputStream& is, Handler& handler) { IterativeParsingState Advance(IterativeParsingState src, IterativeParsingToken token, IterativeParsingState dst, InputStream& is, Handler& handler) {
IterativeParsingState r = IterativeParsingErrorState; int c = 0;
IterativeParsingState n;
switch (is.Peek()) { switch (dst) {
case ']': {
is.Take();
// Get element count(include an extra one for non-empty array).
int elementCount = *stack_.template Pop<int>(1);
if (state == IterativeParsingArrayContentState)
++elementCount;
// Restore the parent stack frame.
r = *stack_.template Pop<IterativeParsingState>(1);
// Transit to ContentState since a member/an element was just parsed.
if (r == IterativeParsingArrayInitialState)
r = IterativeParsingArrayContentState;
else if (r == IterativeParsingObjectInitialState)
r = IterativeParsingObjectContentState;
// If we return to the topmost frame mark it finished.
if (r == IterativeParsingStartState)
r = IterativeParsingFinishState;
handler.EndArray(elementCount);
break;
}
case ',':
is.Take();
r = IterativeParsingArrayContentState;
// Update element count.
*stack_.template Top<int>() = *stack_.template Top<int>() + 1;
break;
default:
// Should be a single value.
r = TransitByValue<parseFlags>(state, is, handler);
break;
}
return r;
}
template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParsingState Transit(IterativeParsingState state, InputStream& is, Handler& handler) {
IterativeParsingState r = IterativeParsingErrorState;
switch (state) {
case IterativeParsingStartState: case IterativeParsingStartState:
r = TransitToCompoundValueTypeState(state, is, handler); RAPIDJSON_ASSERT(false);
break; return IterativeParsingErrorState;
case IterativeParsingObjectInitialState:
case IterativeParsingObjectContentState:
r = TransitFromObjectStates<parseFlags>(state, is, handler);
break;
case IterativeParsingArrayInitialState:
case IterativeParsingArrayContentState:
r = TransitFromArrayStates<parseFlags>(state, is, handler);
break;
case IterativeParsingFinishState:
// Any token appears after finish state leads to error state.
r = IterativeParsingErrorState;
RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
break;
}
return r; case IterativeParsingFinishState:
is.Take();
return dst;
case IterativeParsingErrorState:
return dst;
case IterativeParsingObjectInitialState:
case IterativeParsingArrayInitialState:
is.Take();
// Push current state.
*stack_.template Push<IterativeParsingState>(1) = src;
// Initialize and push the member/element count.
*stack_.template Push<int>(1) = 0;
return dst;
case IterativeParsingMemberState:
return dst;
case IterativeParsingElementState:
if (token == IterativeParsingLeftBracketToken || token == IterativeParsingLeftCurlyBracketToken) {
return dst;
}
else {
}
case IterativeParsingObjectDelimiterState:
case IterativeParsingArrayDelimiterState:
is.Take();
// Update member/element count.
*stack_.template Top<int>() = *stack_.template Top<int>() + 1;
return dst;
case IterativeParsingObjectFinishState:
is.Take();
// Get member count.
c = *stack_.template Pop<int>(1);
if (src == IterativeParsingMemberState)
++c;
// Restore the state.
n = *stack_.template Pop<IterativeParsingState>(1);
// Transit to Finish state if this is the topmost scope.
if (n == IterativeParsingStartState)
n = IterativeParsingFinishState;
// Call handler
handler.EndObject(c);
return n;
case IterativeParsingArrayFinishState:
is.Take();
// Get element count.
c = *stack_.template Pop<int>(1);
if (src == IterativeParsingElementState)
++c;
// Restore the state.
n = *stack_.template Pop<IterativeParsingState>(1);
// Transit to Finish state if this is the topmost scope.
if (n == IterativeParsingStartState)
n = IterativeParsingFinishState;
// Call handler
handler.EndArray(c);
return n;
default:
RAPIDJSON_ASSERT(false);
return IterativeParsingErrorState;
}
} }
template <unsigned parseFlags, typename InputStream, typename Handler> template <unsigned parseFlags, typename InputStream, typename Handler>
@ -963,7 +1017,14 @@ private:
SkipWhitespace(is); SkipWhitespace(is);
while (is.Peek() != '\0' && state != IterativeParsingErrorState) { while (is.Peek() != '\0' && state != IterativeParsingErrorState) {
state = Transit<parseFlags>(state, is, handler); IterativeParsingToken t = GuessToken(is.Peek());
IterativeParsingState n = Transit(state, t);
if (Advance<parseFlags>(state, t, n, is, handler))
state = n;
else
break;
SkipWhitespace(is); SkipWhitespace(is);
} }

View File

@ -706,210 +706,6 @@ TEST(Reader, Parse_IStreamWrapper_StringStream) {
EXPECT_FALSE(reader.HasParseError()); EXPECT_FALSE(reader.HasParseError());
} }
TEST(Reader, IterativeParsing) {
StringStream json("[1,true,false,null,\"string\",{\"array\":[1]}]");
Reader reader;
BaseReaderHandler<> handler;
Reader::IterativeParsingState r;
// [
r = reader.Transit<kParseIterativeFlag>(
Reader::IterativeParsingStartState,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayInitialState, r);
// 1
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(0, *reader.stack_.template Top<int>()); // element count
// ,
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(1, *reader.stack_.template Top<int>()); // element count
// true
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(1, *reader.stack_.template Top<int>()); // element count
// ,
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(2, *reader.stack_.template Top<int>()); // element count
// false
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(2, *reader.stack_.template Top<int>()); // element count
// ,
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(3, *reader.stack_.template Top<int>()); // element count
// null
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(3, *reader.stack_.template Top<int>()); // element count
// ,
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(4, *reader.stack_.template Top<int>()); // element count
// "string"
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(4, *reader.stack_.template Top<int>()); // element count
// ,
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(5, *reader.stack_.template Top<int>()); // element count
// {
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingObjectInitialState, r);
EXPECT_EQ(0, *reader.stack_.template Top<int>()); // member count
// "array":[
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayInitialState, r);
EXPECT_EQ(0, *reader.stack_.template Top<int>()); // element count
// 1
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(0, *reader.stack_.template Top<int>()); // element count
// ]
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingObjectContentState, r);
EXPECT_EQ(0, *reader.stack_.template Top<int>()); // member count
// }
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingArrayContentState, r);
EXPECT_EQ(5, *reader.stack_.template Top<int>()); // element count
// ]
r = reader.Transit<kParseIterativeFlag>(
r,
json,
handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(Reader::IterativeParsingFinishState, r);
}
struct CountHandler : BaseReaderHandler<> {
void EndObject(SizeType memberCount) {
MemberCount = memberCount;
}
void EndArray(SizeType elementCount) {
ElementCount = elementCount;
}
SizeType MemberCount;
SizeType ElementCount;
};
TEST(Reader, IterativeParsing_MemberCounting) {
StringStream json("{\"array\": []}");
Reader reader;
CountHandler handler;
reader.IterativeParse<kParseIterativeFlag>(json, handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(1, handler.MemberCount);
}
TEST(Reader, IterativeParsing_ElementCounting) {
StringStream json("[{}]");
Reader reader;
CountHandler handler;
reader.IterativeParse<kParseIterativeFlag>(json, handler);
EXPECT_FALSE(reader.HasParseError());
EXPECT_EQ(1, handler.ElementCount);
}
TEST(Reader, IterativeParsing_AfterFinishState) {
StringStream json("{}, {}");
Reader reader;
BaseReaderHandler<> handler;
reader.IterativeParse<kParseIterativeFlag>(json, handler);
EXPECT_TRUE(reader.HasParseError());
}
#ifdef __GNUC__ #ifdef __GNUC__
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
#endif #endif