From e7fd707698334b17d698be6c741990027e4f1378 Mon Sep 17 00:00:00 2001 From: John Stiles Date: Mon, 13 Mar 2017 00:33:10 -0700 Subject: [PATCH 1/4] Improve LookaheadParser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix clang -Wswitch-enum warnings. Made NextArrayValue() more robust—now handles error state correctly, will enter error state if an unexpected state is reached. Made separate states for each value type to simplify getters. Simplified implementation of skipping arrays and objects. Skipping an object now works whether you’ve retrieved the key or not. --- example/lookaheadparser/lookaheadparser.cpp | 188 ++++++++++++-------- 1 file changed, 111 insertions(+), 77 deletions(-) diff --git a/example/lookaheadparser/lookaheadparser.cpp b/example/lookaheadparser/lookaheadparser.cpp index f4759c4..4d8e13f 100644 --- a/example/lookaheadparser/lookaheadparser.cpp +++ b/example/lookaheadparser/lookaheadparser.cpp @@ -19,15 +19,15 @@ // // After calling EnterObject, you retrieve keys via NextObjectKey() and values via // the normal getters. When NextObjectKey() returns null, you have exited the -// object, or you can call ExitObject() to skip to the end of the object +// object, or you can call SkipObject() to skip to the end of the object // immediately. If you fetch the entire object (i.e. NextObjectKey() returned null), -// you should not call ExitObject(). +// you should not call SkipObject(). // // After calling EnterArray(), you must alternate between calling NextArrayValue() // to see if the array has more data, and then retrieving values via the normal -// getters. You can call ExitArray() to skip to the end of the array immediately. +// getters. You can call SkipArray() to skip to the end of the array immediately. // If you fetch the entire array (i.e. NextArrayValue() returned null), -// you should not call ExitArray(). +// you should not call SkipArray(). // // This parser uses in-situ strings, so the JSON buffer will be altered during the // parse. @@ -37,15 +37,15 @@ using namespace rapidjson; class LookaheadParserHandler { public: - bool Null() { st_ = kHasValue; v_.SetNull(); return true; } - bool Bool(bool b) { st_ = kHasValue; v_.SetBool(b); return true; } - bool Int(int i) { st_ = kHasValue; v_.SetInt(i); return true; } - bool Uint(unsigned u) { st_ = kHasValue; v_.SetUint(u); return true; } - bool Int64(int64_t i) { st_ = kHasValue; v_.SetInt64(i); return true; } - bool Uint64(uint64_t u) { st_ = kHasValue; v_.SetUint64(u); return true; } - bool Double(double d) { st_ = kHasValue; v_.SetDouble(d); return true; } + bool Null() { st_ = kHasNull; v_.SetNull(); return true; } + bool Bool(bool b) { st_ = kHasBool; v_.SetBool(b); return true; } + bool Int(int i) { st_ = kHasNumber; v_.SetInt(i); return true; } + bool Uint(unsigned u) { st_ = kHasNumber; v_.SetUint(u); return true; } + bool Int64(int64_t i) { st_ = kHasNumber; v_.SetInt64(i); return true; } + bool Uint64(uint64_t u) { st_ = kHasNumber; v_.SetUint64(u); return true; } + bool Double(double d) { st_ = kHasNumber; v_.SetDouble(d); return true; } bool RawNumber(const char*, SizeType, bool) { return false; } - bool String(const char* str, SizeType length, bool) { st_ = kHasValue; v_.SetString(str, length); return true; } + bool String(const char* str, SizeType length, bool) { st_ = kHasString; v_.SetString(str, length); return true; } bool StartObject() { st_ = kEnteringObject; return true; } bool Key(const char* str, SizeType length, bool) { st_ = kHasKey; v_.SetString(str, length); return true; } bool EndObject(SizeType) { st_ = kExitingObject; return true; } @@ -59,7 +59,10 @@ protected: protected: enum LookaheadParsingState { kError, - kHasValue, + kHasNull, + kHasBool, + kHasNumber, + kHasString, kHasKey, kEnteringObject, kExitingObject, @@ -93,10 +96,8 @@ class LookaheadParser : protected LookaheadParserHandler { public: LookaheadParser(char* str) : LookaheadParserHandler(str) {} - void EnterObject(); - void EnterArray(); - void ExitObject(); - void ExitArray(); + bool EnterObject(); + bool EnterArray(); const char* NextObjectKey(); bool NextArrayValue(); int GetInt(); @@ -105,70 +106,87 @@ public: bool GetBool(); void GetNull(); + void SkipObject(); + void SkipArray(); void SkipValue(); Value* PeekValue(); int PeekType(); // returns a rapidjson::Type, or -1 for no value (at end of object/array) bool IsValid() { return st_ != kError; } + +protected: + void SkipOut(int depth); }; -void LookaheadParser::EnterObject() { +bool LookaheadParser::EnterObject() { if (st_ != kEnteringObject) { st_ = kError; - return; - } - - ParseNext(); -} - -void LookaheadParser::EnterArray() { - if (st_ != kEnteringArray) { - st_ = kError; - return; - } - - ParseNext(); -} - -void LookaheadParser::ExitObject() { - while (NextObjectKey()) { - SkipValue(); - } -} - -void LookaheadParser::ExitArray() { - while (NextArrayValue()) { - SkipValue(); - } -} - -const char* LookaheadParser::NextObjectKey() { - if (st_ == kExitingObject) { - ParseNext(); - return 0; - } - - if (st_ != kHasKey || !v_.IsString()) { - st_ = kError; - return 0; - } - - const char* result = v_.GetString(); - ParseNext(); - return result; -} - -bool LookaheadParser::NextArrayValue() { - if (st_ == kExitingArray) { - ParseNext(); return false; } + ParseNext(); return true; } +bool LookaheadParser::EnterArray() { + if (st_ != kEnteringArray) { + st_ = kError; + return false; + } + + ParseNext(); + return true; +} + +const char* LookaheadParser::NextObjectKey() { + switch (st_) { + case kHasKey: { + const char* result = v_.GetString(); + ParseNext(); + return result; + } + + case kExitingObject: + ParseNext(); + return 0; + + case kError: + case kHasNull: + case kHasBool: + case kHasNumber: + case kHasString: + case kEnteringObject: + case kEnteringArray: + case kExitingArray: + st_ = kError; + return 0; + } +} + +bool LookaheadParser::NextArrayValue() { + switch (st_) { + case kExitingArray: + ParseNext(); + return false; + + case kError: + case kExitingObject: + case kHasKey: + st_ = kError; + return false; + + case kHasNull: + case kHasBool: + case kHasNumber: + case kHasString: + case kEnteringObject: + case kEnteringArray: + return true; + } +} + int LookaheadParser::GetInt() { - if (st_ != kHasValue || !v_.IsInt()) { + if (st_ != kHasNumber || !v_.IsInt()) { st_ = kError; return 0; } @@ -179,7 +197,7 @@ int LookaheadParser::GetInt() { } double LookaheadParser::GetDouble() { - if (st_ != kHasValue || !v_.IsNumber()) { + if (st_ != kHasNumber || !v_.IsNumber()) { st_ = kError; return 0.; } @@ -190,7 +208,7 @@ double LookaheadParser::GetDouble() { } bool LookaheadParser::GetBool() { - if (st_ != kHasValue || !v_.IsBool()) { + if (st_ != kHasBool) { st_ = kError; return false; } @@ -201,7 +219,7 @@ bool LookaheadParser::GetBool() { } void LookaheadParser::GetNull() { - if (st_ != kHasValue || !v_.IsNull()) { + if (st_ != kHasNull) { st_ = kError; return; } @@ -210,7 +228,7 @@ void LookaheadParser::GetNull() { } const char* LookaheadParser::GetString() { - if (st_ != kHasValue || !v_.IsString()) { + if (st_ != kHasString) { st_ = kError; return 0; } @@ -220,8 +238,7 @@ const char* LookaheadParser::GetString() { return result; } -void LookaheadParser::SkipValue() { - int depth = 0; +void LookaheadParser::SkipOut(int depth) { do { switch (st_) { case kEnteringArray: @@ -237,8 +254,11 @@ void LookaheadParser::SkipValue() { case kError: return; - case kHasKey: - case kHasValue: + case kHasNull: + case kHasBool: + case kHasNumber: + case kHasString: + case kHasKey: break; } ParseNext(); @@ -246,8 +266,20 @@ void LookaheadParser::SkipValue() { while (depth > 0); } +void LookaheadParser::SkipValue() { + SkipOut(0); +} + +void LookaheadParser::SkipArray() { + SkipOut(1); +} + +void LookaheadParser::SkipObject() { + SkipOut(1); +} + Value* LookaheadParser::PeekValue() { - if (st_ == kHasValue || st_ == kHasKey) { + if (st_ >= kHasNull && st_ <= kHasKey) { return &v_; } @@ -256,7 +288,10 @@ Value* LookaheadParser::PeekValue() { int LookaheadParser::PeekType() { switch (st_) { - case kHasValue: + case kHasNull: + case kHasBool: + case kHasNumber: + case kHasString: case kHasKey: return v_.GetType(); @@ -269,7 +304,6 @@ int LookaheadParser::PeekType() { case kExitingArray: case kExitingObject: case kError: - default: return -1; } } @@ -325,7 +359,7 @@ int main() { cout << r.GetString() << " "; } else { - r.ExitArray(); + r.SkipArray(); break; } } From bf19c1a0beafa6a118b1c242d16d0c0cfe0296e2 Mon Sep 17 00:00:00 2001 From: John Stiles Date: Mon, 13 Mar 2017 07:40:51 -0700 Subject: [PATCH 2/4] Remove switch GCC and Clang cannot agree on what constitutes a good switch statement. --- example/lookaheadparser/lookaheadparser.cpp | 61 ++++++++------------- 1 file changed, 22 insertions(+), 39 deletions(-) diff --git a/example/lookaheadparser/lookaheadparser.cpp b/example/lookaheadparser/lookaheadparser.cpp index 4d8e13f..29d9299 100644 --- a/example/lookaheadparser/lookaheadparser.cpp +++ b/example/lookaheadparser/lookaheadparser.cpp @@ -139,50 +139,33 @@ bool LookaheadParser::EnterArray() { } const char* LookaheadParser::NextObjectKey() { - switch (st_) { - case kHasKey: { - const char* result = v_.GetString(); - ParseNext(); - return result; - } - - case kExitingObject: - ParseNext(); - return 0; - - case kError: - case kHasNull: - case kHasBool: - case kHasNumber: - case kHasString: - case kEnteringObject: - case kEnteringArray: - case kExitingArray: - st_ = kError; - return 0; + if (st_ == kHasKey) { + const char* result = v_.GetString(); + ParseNext(); + return result; } + + if (st_ == kExitingObject) { + ParseNext(); + return 0; + } + + st_ = kError; + return 0; } bool LookaheadParser::NextArrayValue() { - switch (st_) { - case kExitingArray: - ParseNext(); - return false; - - case kError: - case kExitingObject: - case kHasKey: - st_ = kError; - return false; - - case kHasNull: - case kHasBool: - case kHasNumber: - case kHasString: - case kEnteringObject: - case kEnteringArray: - return true; + if (st_ == kExitingArray) { + ParseNext(); + return false; } + + if (st_ == kError || st_ == kExitingObject || st_ == kHasKey) { + st_ = kError; + return false; + } + + return true; } int LookaheadParser::GetInt() { From 6723e3296a9ae52aa249bd57395c955d05d81b45 Mon Sep 17 00:00:00 2001 From: John Stiles Date: Mon, 13 Mar 2017 07:43:26 -0700 Subject: [PATCH 3/4] Initialize v_ to placate GCC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v_ has a value assigned to it as part of ParseNext() which happens in the constructor, but that’s not soon enough for GCC --- example/lookaheadparser/lookaheadparser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/lookaheadparser/lookaheadparser.cpp b/example/lookaheadparser/lookaheadparser.cpp index 29d9299..9ce8432 100644 --- a/example/lookaheadparser/lookaheadparser.cpp +++ b/example/lookaheadparser/lookaheadparser.cpp @@ -78,7 +78,7 @@ protected: static const int parseFlags = kParseDefaultFlags | kParseInsituFlag; }; -LookaheadParserHandler::LookaheadParserHandler(char* str) : ss_(str) { +LookaheadParserHandler::LookaheadParserHandler(char* str) : v_(), ss_(str) { r_.IterativeParseInit(); ParseNext(); } From f0c108b5c9dc4b41a8ea39c8d418ff9a988edc86 Mon Sep 17 00:00:00 2001 From: John Stiles Date: Mon, 13 Mar 2017 07:53:37 -0700 Subject: [PATCH 4/4] Remove all switch --- example/lookaheadparser/lookaheadparser.cpp | 70 ++++++++------------- 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/example/lookaheadparser/lookaheadparser.cpp b/example/lookaheadparser/lookaheadparser.cpp index 9ce8432..29469ed 100644 --- a/example/lookaheadparser/lookaheadparser.cpp +++ b/example/lookaheadparser/lookaheadparser.cpp @@ -58,6 +58,7 @@ protected: protected: enum LookaheadParsingState { + kInit, kError, kHasNull, kHasBool, @@ -78,7 +79,7 @@ protected: static const int parseFlags = kParseDefaultFlags | kParseInsituFlag; }; -LookaheadParserHandler::LookaheadParserHandler(char* str) : v_(), ss_(str) { +LookaheadParserHandler::LookaheadParserHandler(char* str) : v_(), st_(kInit), r_(), ss_(str) { r_.IterativeParseInit(); ParseNext(); } @@ -145,12 +146,12 @@ const char* LookaheadParser::NextObjectKey() { return result; } - if (st_ == kExitingObject) { - ParseNext(); + if (st_ != kExitingObject) { + st_ = kError; return 0; } - st_ = kError; + ParseNext(); return 0; } @@ -180,7 +181,7 @@ int LookaheadParser::GetInt() { } double LookaheadParser::GetDouble() { - if (st_ != kHasNumber || !v_.IsNumber()) { + if (st_ != kHasNumber) { st_ = kError; return 0.; } @@ -223,27 +224,16 @@ const char* LookaheadParser::GetString() { void LookaheadParser::SkipOut(int depth) { do { - switch (st_) { - case kEnteringArray: - case kEnteringObject: - ++depth; - break; - - case kExitingArray: - case kExitingObject: - --depth; - break; - - case kError: - return; - - case kHasNull: - case kHasBool: - case kHasNumber: - case kHasString: - case kHasKey: - break; + if (st_ == kEnteringArray || st_ == kEnteringObject) { + ++depth; } + else if (st_ == kExitingArray || st_ == kExitingObject) { + --depth; + } + else if (st_ == kError) { + return; + } + ParseNext(); } while (depth > 0); @@ -270,25 +260,19 @@ Value* LookaheadParser::PeekValue() { } int LookaheadParser::PeekType() { - switch (st_) { - case kHasNull: - case kHasBool: - case kHasNumber: - case kHasString: - case kHasKey: - return v_.GetType(); - - case kEnteringArray: - return kArrayType; - - case kEnteringObject: - return kObjectType; - - case kExitingArray: - case kExitingObject: - case kError: - return -1; + if (st_ >= kHasNull && st_ <= kHasKey) { + return v_.GetType(); } + + if (st_ == kEnteringArray) { + return kArrayType; + } + + if (st_ == kEnteringObject) { + return kObjectType; + } + + return -1; } //-------------------------------------------------------------------------