diff --git a/include/rapidjson/rapidjson.h b/include/rapidjson/rapidjson.h index 114519d..163be6c 100644 --- a/include/rapidjson/rapidjson.h +++ b/include/rapidjson/rapidjson.h @@ -16,8 +16,10 @@ #ifdef _MSC_VER typedef __int64 int64_t; typedef unsigned __int64 uint64_t; +#define RAPIDJSON_FORCEINLINE __forceinline #else #include +#define RAPIDJSON_FORCEINLINE #endif #endif // RAPIDJSON_NO_INT64TYPEDEF @@ -343,6 +345,47 @@ struct UTF8 { } return buffer; } + + template + RAPIDJSON_FORCEINLINE static Ch* Validate(Ch *buffer, Stream& s) { +#define X1 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 +#define X5 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 + static const char utf8[256] = { + X1,X1,X1,X1,X1,X1,X1,X1, // 00-7F 1 byte + X5,X5,X5,X5, // 80-BF Continuation + 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // C0-C1: invalid, C2-CF: 2 bytes + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // D0-DF: 2 bytes + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // E0-EF: 3 bytes + 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // F0-F4: 4 bytes + }; +#undef X1 +#undef X5 + +#define TAIL() c = *buffer++ = s.Take(); if ((c & 0xC0) != 0x80) return NULL; + + Ch c = *buffer++ = s.Take(); + if ((unsigned char)c < 0x80u) + return buffer; + + switch(utf8[(unsigned char)c]) { + case 2: + TAIL(); + return buffer; + + case 3: + TAIL(); + TAIL(); + return buffer; + + case 4: + TAIL(); + TAIL(); + TAIL(); + return buffer; + } + return NULL; +#undef TAIL + } }; /////////////////////////////////////////////////////////////////////////////// @@ -370,6 +413,21 @@ struct UTF16 { } return buffer; } + + template + static Ch* Validate(Ch *buffer, Stream& s) { + Ch c = *buffer++ = s.Take(); + if (c < 0xD800 || c > 0xDFFF) + ; + else if (c < 0xDBFF) { + Ch c = *buffer++ = s.Take(); + if (c < 0xDC00 || c > 0xDFFF) + return NULL; + } + else + return NULL; + return buffer; + } }; /////////////////////////////////////////////////////////////////////////////// @@ -389,6 +447,12 @@ struct UTF32 { *buffer++ = codepoint; return buffer; } + + template + static Ch* Validate(Ch *buffer, Stream& s) { + Ch c = *buffer++ = s.Take(); + return c <= 0x10FFFF ? buffer : 0; + } }; /////////////////////////////////////////////////////////////////////////////// diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index a016497..efd092a 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -27,7 +27,8 @@ namespace rapidjson { //! Combination of parseFlags enum ParseFlag { kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer. - kParseInsituFlag = 1 //!< In-situ(destructive) parsing. + kParseInsituFlag = 1, //!< In-situ(destructive) parsing. + kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings. }; /////////////////////////////////////////////////////////////////////////////// @@ -401,8 +402,9 @@ private: } while(false) for (;;) { - Ch c = s.Take(); + Ch c = s.Peek(); if (c == '\\') { // Escape + s.Take(); Ch e = s.Take(); if ((sizeof(Ch) == 1 || e < 256) && escape[(unsigned char)e]) RAPIDJSON_PUT(escape[(unsigned char)e]); @@ -438,6 +440,7 @@ private: } } else if (c == '"') { // Closing double quote + s.Take(); if (parseFlags & kParseInsituFlag) { size_t length = s.PutEnd(head); RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); @@ -459,8 +462,29 @@ private: RAPIDJSON_PARSE_ERROR("Incorrect unescaped character in string", stream.Tell() - 1); return; } - else - RAPIDJSON_PUT(c); // Normal character, just copy + else if (parseFlags & kParseValidateEncodingFlag) { + Ch buffer[4]; + Ch* end = Encoding::Validate(&buffer[0], s); + if (end == NULL) { + RAPIDJSON_PARSE_ERROR("Invalid encoding", s.Tell()); + return; + } + + if (parseFlags & kParseInsituFlag) + for (Ch* p = &buffer[0]; p != end; ++p) + s.Put(*p); + else { + SizeType l = SizeType(end - &buffer[0]); + Ch* q = stack_.template Push(l); + for (Ch* p = &buffer[0]; p != end; ++p) + *q++ = *p; + len += l; + } + + } + else { + RAPIDJSON_PUT(s.Take()); // Normal character, just copy + } } #undef RAPIDJSON_PUT } diff --git a/test/perftest/perftest.h b/test/perftest/perftest.h index b9b8a05..1f762a1 100644 --- a/test/perftest/perftest.h +++ b/test/perftest/perftest.h @@ -2,14 +2,14 @@ #define PERFTEST_H_ #define TEST_RAPIDJSON 1 -#define TEST_JSONCPP 1 -#define TEST_YAJL 1 -#define TEST_ULTRAJSON 1 -#define TEST_PLATFORM 1 +#define TEST_JSONCPP 0 +#define TEST_YAJL 0 +#define TEST_ULTRAJSON 0 +#define TEST_PLATFORM 0 #if TEST_RAPIDJSON //#define RAPIDJSON_SSE2 -//#define RAPIDJSON_SSE42 +#define RAPIDJSON_SSE42 #endif #if TEST_YAJL diff --git a/test/perftest/rapidjsontest.cpp b/test/perftest/rapidjsontest.cpp index 2da4863..5042791 100644 --- a/test/perftest/rapidjsontest.cpp +++ b/test/perftest/rapidjsontest.cpp @@ -41,22 +41,41 @@ protected: Document doc_; }; -TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_NullHandler)) { +TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler)) { for (int i = 0; i < kTrialCount; i++) { memcpy(temp_, json_, length_ + 1); InsituStringStream s(temp_); BaseReaderHandler<> h; Reader reader; - reader.Parse(s, h); + EXPECT_TRUE(reader.Parse(s, h)); } } -TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_NullHandler)) { +TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler_ValidateEncoding)) { + for (int i = 0; i < kTrialCount; i++) { + memcpy(temp_, json_, length_ + 1); + InsituStringStream s(temp_); + BaseReaderHandler<> h; + Reader reader; + EXPECT_TRUE(reader.Parse(s, h)); + } +} + +TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) { for (int i = 0; i < kTrialCount; i++) { StringStream s(json_); BaseReaderHandler<> h; Reader reader; - reader.Parse<0>(s, h); + EXPECT_TRUE(reader.Parse<0>(s, h)); + } +} + +TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) { + for (int i = 0; i < kTrialCount; i++) { + StringStream s(json_); + BaseReaderHandler<> h; + Reader reader; + EXPECT_TRUE(reader.Parse(s, h)); } } @@ -235,7 +254,7 @@ TEST_F(RapidJson, FileReadStream) { } } -TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_NullHandler_FileReadStream)) { +TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream)) { for (int i = 0; i < kTrialCount; i++) { FILE *fp = fopen(filename_, "rb"); char buffer[65536]; diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 0c2f64c..fba29a7 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -204,7 +204,7 @@ TEST(Reader, ParseString) { GenericInsituStringStream is(buffer); \ ParseStringHandler h; \ GenericReader reader; \ - reader.ParseString(is, h); \ + reader.ParseString(is, h); \ EXPECT_EQ(0, StrCmp(e, h.str_)); \ EXPECT_EQ(StrLen(e), h.length_); \ free(buffer); \ @@ -286,7 +286,6 @@ TEST(Reader, ParseString_NonDestructive) { EXPECT_EQ(11, h.length_); } -#ifdef RAPIDJSON_USE_EXCEPTION TEST(Reader, ParseString_Error) { #define TEST_STRING_ERROR(str) \ { \ @@ -295,18 +294,28 @@ TEST(Reader, ParseString_Error) { InsituStringStream s(buffer); \ BaseReaderHandler<> h; \ Reader reader; \ - EXPECT_ERROR(reader.ParseString<0>(s, h), ParseException); \ + EXPECT_FALSE(reader.Parse(s, h)); \ } - TEST_STRING_ERROR("\"\\a\""); // Unknown escape character - TEST_STRING_ERROR("\"\\uABCG\""); // Incorrect hex digit after \\u escape - TEST_STRING_ERROR("\"\\uD800X\""); // Missing the second \\u in surrogate pair - TEST_STRING_ERROR("\"\\uD800\\uFFFF\""); // The second \\u in surrogate pair is invalid - TEST_STRING_ERROR("\"Test"); // lacks ending quotation before the end of string +#define ARRAY(...) { __VA_ARGS__ } +#define TEST_STRINGARRAY_ERROR(Encoding, array) \ + { \ + static const Encoding::Ch e[] = array; \ + TEST_STRING_ERROR(e); \ + } + TEST_STRING_ERROR("[\"\\a\"]"); // Unknown escape character + TEST_STRING_ERROR("[\"\\uABCG\"]"); // Incorrect hex digit after \\u escape + TEST_STRING_ERROR("[\"\\uD800X\"]"); // Missing the second \\u in surrogate pair + TEST_STRING_ERROR("[\"\\uD800\\uFFFF\"]"); // The second \\u in surrogate pair is invalid + TEST_STRING_ERROR("[\"Test]"); // lacks ending quotation before the end of string + TEST_STRINGARRAY_ERROR(UTF8<>, ARRAY('[', 0x80u, ']')); // Incorrect UTF8 sequence + TEST_STRINGARRAY_ERROR(UTF8<>, ARRAY('[', 0xC0u, 0x40, ']')); // Incorrect UTF8 sequence + +#undef ARRAY +#undef TEST_STRINGARRAY_ERROR #undef TEST_STRING_ERROR } -#endif // RAPIDJSON_USE_EXCEPTION template struct ParseArrayHandler : BaseReaderHandler<> { @@ -340,7 +349,6 @@ TEST(Reader, ParseArray) { free(json); } -#ifdef RAPIDJSON_USE_EXCEPTION TEST(Reader, ParseArray_Error) { #define TEST_ARRAY_ERROR(str) \ { \ @@ -348,8 +356,8 @@ TEST(Reader, ParseArray_Error) { strncpy(buffer, str, 1000); \ InsituStringStream s(buffer); \ BaseReaderHandler<> h; \ - Reader, CrtAllocator> reader; \ - EXPECT_ERROR(reader.ParseArray<0>(s, h), ParseException); \ + GenericReader, CrtAllocator> reader; \ + EXPECT_FALSE(reader.Parse<0>(s, h)); \ } // Must be a comma or ']' after an array element. @@ -359,7 +367,6 @@ TEST(Reader, ParseArray_Error) { #undef TEST_ARRAY_ERROR } -#endif // RAPIDJSON_USE_EXCEPTION struct ParseObjectHandler : BaseReaderHandler<> { ParseObjectHandler() : step_(0) {} @@ -446,7 +453,6 @@ TEST(Reader, Parse_EmptyObject) { EXPECT_EQ(2, h.step_); } -#ifdef RAPIDJSON_USE_EXCEPTION TEST(Reader, ParseObject_Error) { #define TEST_OBJECT_ERROR(str) \ { \ @@ -454,8 +460,8 @@ TEST(Reader, ParseObject_Error) { strncpy(buffer, str, 1000); \ InsituStringStream s(buffer); \ BaseReaderHandler<> h; \ - Reader, CrtAllocator> reader; \ - EXPECT_ERROR(reader.ParseObject<0>(s, h), ParseException); \ + GenericReader, CrtAllocator> reader; \ + EXPECT_FALSE(reader.Parse<0>(s, h)); \ } // Name of an object member must be a string @@ -477,9 +483,7 @@ TEST(Reader, ParseObject_Error) { #undef TEST_OBJECT_ERROR } -#endif // RAPIDJSON_USE_EXCEPTION -#ifdef RAPIDJSON_USE_EXCEPTION TEST(Reader, Parse_Error) { #define TEST_ERROR(str) \ { \ @@ -488,7 +492,7 @@ TEST(Reader, Parse_Error) { InsituStringStream s(buffer); \ BaseReaderHandler<> h; \ Reader reader; \ - EXPECT_ERROR(reader.Parse<0>(s, h), ParseException); \ + EXPECT_FALSE(reader.Parse<0>(s, h)); \ } // Text only contains white space(s) @@ -514,4 +518,3 @@ TEST(Reader, Parse_Error) { #undef TEST_ERROR } -#endif // RAPIDJSON_USE_EXCEPTION