Added encoding validation feature
git-svn-id: https://rapidjson.googlecode.com/svn/trunk@30 c5894555-1306-4e8d-425f-1f6f381ee07c
This commit is contained in:
parent
78492f9962
commit
f198c486ee
@ -16,8 +16,10 @@
|
|||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
typedef __int64 int64_t;
|
typedef __int64 int64_t;
|
||||||
typedef unsigned __int64 uint64_t;
|
typedef unsigned __int64 uint64_t;
|
||||||
|
#define RAPIDJSON_FORCEINLINE __forceinline
|
||||||
#else
|
#else
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
#define RAPIDJSON_FORCEINLINE
|
||||||
#endif
|
#endif
|
||||||
#endif // RAPIDJSON_NO_INT64TYPEDEF
|
#endif // RAPIDJSON_NO_INT64TYPEDEF
|
||||||
|
|
||||||
@ -343,6 +345,47 @@ struct UTF8 {
|
|||||||
}
|
}
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Stream>
|
||||||
|
RAPIDJSON_FORCEINLINE static Ch* Validate(Ch *buffer, Stream& s) {
|
||||||
|
#define X1 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
|
||||||
|
#define X5 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
|
||||||
|
static const char utf8[256] = {
|
||||||
|
X1,X1,X1,X1,X1,X1,X1,X1, // 00-7F 1 byte
|
||||||
|
X5,X5,X5,X5, // 80-BF Continuation
|
||||||
|
0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // C0-C1: invalid, C2-CF: 2 bytes
|
||||||
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // D0-DF: 2 bytes
|
||||||
|
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // E0-EF: 3 bytes
|
||||||
|
4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // F0-F4: 4 bytes
|
||||||
|
};
|
||||||
|
#undef X1
|
||||||
|
#undef X5
|
||||||
|
|
||||||
|
#define TAIL() c = *buffer++ = s.Take(); if ((c & 0xC0) != 0x80) return NULL;
|
||||||
|
|
||||||
|
Ch c = *buffer++ = s.Take();
|
||||||
|
if ((unsigned char)c < 0x80u)
|
||||||
|
return buffer;
|
||||||
|
|
||||||
|
switch(utf8[(unsigned char)c]) {
|
||||||
|
case 2:
|
||||||
|
TAIL();
|
||||||
|
return buffer;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
TAIL();
|
||||||
|
TAIL();
|
||||||
|
return buffer;
|
||||||
|
|
||||||
|
case 4:
|
||||||
|
TAIL();
|
||||||
|
TAIL();
|
||||||
|
TAIL();
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
#undef TAIL
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@ -370,6 +413,21 @@ struct UTF16 {
|
|||||||
}
|
}
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Stream>
|
||||||
|
static Ch* Validate(Ch *buffer, Stream& s) {
|
||||||
|
Ch c = *buffer++ = s.Take();
|
||||||
|
if (c < 0xD800 || c > 0xDFFF)
|
||||||
|
;
|
||||||
|
else if (c < 0xDBFF) {
|
||||||
|
Ch c = *buffer++ = s.Take();
|
||||||
|
if (c < 0xDC00 || c > 0xDFFF)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return NULL;
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@ -389,6 +447,12 @@ struct UTF32 {
|
|||||||
*buffer++ = codepoint;
|
*buffer++ = codepoint;
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Stream>
|
||||||
|
static Ch* Validate(Ch *buffer, Stream& s) {
|
||||||
|
Ch c = *buffer++ = s.Take();
|
||||||
|
return c <= 0x10FFFF ? buffer : 0;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -27,7 +27,8 @@ namespace rapidjson {
|
|||||||
//! Combination of parseFlags
|
//! Combination of parseFlags
|
||||||
enum ParseFlag {
|
enum ParseFlag {
|
||||||
kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer.
|
kParseDefaultFlags = 0, //!< Default parse flags. Non-destructive parsing. Text strings are decoded into allocated buffer.
|
||||||
kParseInsituFlag = 1 //!< In-situ(destructive) parsing.
|
kParseInsituFlag = 1, //!< In-situ(destructive) parsing.
|
||||||
|
kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@ -401,8 +402,9 @@ private:
|
|||||||
} while(false)
|
} while(false)
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
Ch c = s.Take();
|
Ch c = s.Peek();
|
||||||
if (c == '\\') { // Escape
|
if (c == '\\') { // Escape
|
||||||
|
s.Take();
|
||||||
Ch e = s.Take();
|
Ch e = s.Take();
|
||||||
if ((sizeof(Ch) == 1 || e < 256) && escape[(unsigned char)e])
|
if ((sizeof(Ch) == 1 || e < 256) && escape[(unsigned char)e])
|
||||||
RAPIDJSON_PUT(escape[(unsigned char)e]);
|
RAPIDJSON_PUT(escape[(unsigned char)e]);
|
||||||
@ -438,6 +440,7 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (c == '"') { // Closing double quote
|
else if (c == '"') { // Closing double quote
|
||||||
|
s.Take();
|
||||||
if (parseFlags & kParseInsituFlag) {
|
if (parseFlags & kParseInsituFlag) {
|
||||||
size_t length = s.PutEnd(head);
|
size_t length = s.PutEnd(head);
|
||||||
RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
|
RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
|
||||||
@ -459,8 +462,29 @@ private:
|
|||||||
RAPIDJSON_PARSE_ERROR("Incorrect unescaped character in string", stream.Tell() - 1);
|
RAPIDJSON_PARSE_ERROR("Incorrect unescaped character in string", stream.Tell() - 1);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else
|
else if (parseFlags & kParseValidateEncodingFlag) {
|
||||||
RAPIDJSON_PUT(c); // Normal character, just copy
|
Ch buffer[4];
|
||||||
|
Ch* end = Encoding::Validate(&buffer[0], s);
|
||||||
|
if (end == NULL) {
|
||||||
|
RAPIDJSON_PARSE_ERROR("Invalid encoding", s.Tell());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parseFlags & kParseInsituFlag)
|
||||||
|
for (Ch* p = &buffer[0]; p != end; ++p)
|
||||||
|
s.Put(*p);
|
||||||
|
else {
|
||||||
|
SizeType l = SizeType(end - &buffer[0]);
|
||||||
|
Ch* q = stack_.template Push<Ch>(l);
|
||||||
|
for (Ch* p = &buffer[0]; p != end; ++p)
|
||||||
|
*q++ = *p;
|
||||||
|
len += l;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
RAPIDJSON_PUT(s.Take()); // Normal character, just copy
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#undef RAPIDJSON_PUT
|
#undef RAPIDJSON_PUT
|
||||||
}
|
}
|
||||||
|
@ -2,14 +2,14 @@
|
|||||||
#define PERFTEST_H_
|
#define PERFTEST_H_
|
||||||
|
|
||||||
#define TEST_RAPIDJSON 1
|
#define TEST_RAPIDJSON 1
|
||||||
#define TEST_JSONCPP 1
|
#define TEST_JSONCPP 0
|
||||||
#define TEST_YAJL 1
|
#define TEST_YAJL 0
|
||||||
#define TEST_ULTRAJSON 1
|
#define TEST_ULTRAJSON 0
|
||||||
#define TEST_PLATFORM 1
|
#define TEST_PLATFORM 0
|
||||||
|
|
||||||
#if TEST_RAPIDJSON
|
#if TEST_RAPIDJSON
|
||||||
//#define RAPIDJSON_SSE2
|
//#define RAPIDJSON_SSE2
|
||||||
//#define RAPIDJSON_SSE42
|
#define RAPIDJSON_SSE42
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if TEST_YAJL
|
#if TEST_YAJL
|
||||||
|
@ -41,22 +41,41 @@ protected:
|
|||||||
Document doc_;
|
Document doc_;
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_NullHandler)) {
|
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler)) {
|
||||||
for (int i = 0; i < kTrialCount; i++) {
|
for (int i = 0; i < kTrialCount; i++) {
|
||||||
memcpy(temp_, json_, length_ + 1);
|
memcpy(temp_, json_, length_ + 1);
|
||||||
InsituStringStream s(temp_);
|
InsituStringStream s(temp_);
|
||||||
BaseReaderHandler<> h;
|
BaseReaderHandler<> h;
|
||||||
Reader reader;
|
Reader reader;
|
||||||
reader.Parse<kParseInsituFlag>(s, h);
|
EXPECT_TRUE(reader.Parse<kParseInsituFlag>(s, h));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_NullHandler)) {
|
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler_ValidateEncoding)) {
|
||||||
|
for (int i = 0; i < kTrialCount; i++) {
|
||||||
|
memcpy(temp_, json_, length_ + 1);
|
||||||
|
InsituStringStream s(temp_);
|
||||||
|
BaseReaderHandler<> h;
|
||||||
|
Reader reader;
|
||||||
|
EXPECT_TRUE(reader.Parse<kParseInsituFlag | kParseValidateEncodingFlag>(s, h));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler)) {
|
||||||
for (int i = 0; i < kTrialCount; i++) {
|
for (int i = 0; i < kTrialCount; i++) {
|
||||||
StringStream s(json_);
|
StringStream s(json_);
|
||||||
BaseReaderHandler<> h;
|
BaseReaderHandler<> h;
|
||||||
Reader reader;
|
Reader reader;
|
||||||
reader.Parse<0>(s, h);
|
EXPECT_TRUE(reader.Parse<0>(s, h));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_ValidateEncoding)) {
|
||||||
|
for (int i = 0; i < kTrialCount; i++) {
|
||||||
|
StringStream s(json_);
|
||||||
|
BaseReaderHandler<> h;
|
||||||
|
Reader reader;
|
||||||
|
EXPECT_TRUE(reader.Parse<kParseValidateEncodingFlag>(s, h));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,7 +254,7 @@ TEST_F(RapidJson, FileReadStream) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_NullHandler_FileReadStream)) {
|
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_FileReadStream)) {
|
||||||
for (int i = 0; i < kTrialCount; i++) {
|
for (int i = 0; i < kTrialCount; i++) {
|
||||||
FILE *fp = fopen(filename_, "rb");
|
FILE *fp = fopen(filename_, "rb");
|
||||||
char buffer[65536];
|
char buffer[65536];
|
||||||
|
@ -204,7 +204,7 @@ TEST(Reader, ParseString) {
|
|||||||
GenericInsituStringStream<Encoding> is(buffer); \
|
GenericInsituStringStream<Encoding> is(buffer); \
|
||||||
ParseStringHandler<Encoding> h; \
|
ParseStringHandler<Encoding> h; \
|
||||||
GenericReader<Encoding> reader; \
|
GenericReader<Encoding> reader; \
|
||||||
reader.ParseString<kParseInsituFlag>(is, h); \
|
reader.ParseString<kParseInsituFlag | kParseValidateEncodingFlag>(is, h); \
|
||||||
EXPECT_EQ(0, StrCmp<Encoding::Ch>(e, h.str_)); \
|
EXPECT_EQ(0, StrCmp<Encoding::Ch>(e, h.str_)); \
|
||||||
EXPECT_EQ(StrLen(e), h.length_); \
|
EXPECT_EQ(StrLen(e), h.length_); \
|
||||||
free(buffer); \
|
free(buffer); \
|
||||||
@ -286,7 +286,6 @@ TEST(Reader, ParseString_NonDestructive) {
|
|||||||
EXPECT_EQ(11, h.length_);
|
EXPECT_EQ(11, h.length_);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef RAPIDJSON_USE_EXCEPTION
|
|
||||||
TEST(Reader, ParseString_Error) {
|
TEST(Reader, ParseString_Error) {
|
||||||
#define TEST_STRING_ERROR(str) \
|
#define TEST_STRING_ERROR(str) \
|
||||||
{ \
|
{ \
|
||||||
@ -295,18 +294,28 @@ TEST(Reader, ParseString_Error) {
|
|||||||
InsituStringStream s(buffer); \
|
InsituStringStream s(buffer); \
|
||||||
BaseReaderHandler<> h; \
|
BaseReaderHandler<> h; \
|
||||||
Reader reader; \
|
Reader reader; \
|
||||||
EXPECT_ERROR(reader.ParseString<0>(s, h), ParseException); \
|
EXPECT_FALSE(reader.Parse<kParseValidateEncodingFlag>(s, h)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_STRING_ERROR("\"\\a\""); // Unknown escape character
|
#define ARRAY(...) { __VA_ARGS__ }
|
||||||
TEST_STRING_ERROR("\"\\uABCG\""); // Incorrect hex digit after \\u escape
|
#define TEST_STRINGARRAY_ERROR(Encoding, array) \
|
||||||
TEST_STRING_ERROR("\"\\uD800X\""); // Missing the second \\u in surrogate pair
|
{ \
|
||||||
TEST_STRING_ERROR("\"\\uD800\\uFFFF\""); // The second \\u in surrogate pair is invalid
|
static const Encoding::Ch e[] = array; \
|
||||||
TEST_STRING_ERROR("\"Test"); // lacks ending quotation before the end of string
|
TEST_STRING_ERROR(e); \
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_STRING_ERROR("[\"\\a\"]"); // Unknown escape character
|
||||||
|
TEST_STRING_ERROR("[\"\\uABCG\"]"); // Incorrect hex digit after \\u escape
|
||||||
|
TEST_STRING_ERROR("[\"\\uD800X\"]"); // Missing the second \\u in surrogate pair
|
||||||
|
TEST_STRING_ERROR("[\"\\uD800\\uFFFF\"]"); // The second \\u in surrogate pair is invalid
|
||||||
|
TEST_STRING_ERROR("[\"Test]"); // lacks ending quotation before the end of string
|
||||||
|
TEST_STRINGARRAY_ERROR(UTF8<>, ARRAY('[', 0x80u, ']')); // Incorrect UTF8 sequence
|
||||||
|
TEST_STRINGARRAY_ERROR(UTF8<>, ARRAY('[', 0xC0u, 0x40, ']')); // Incorrect UTF8 sequence
|
||||||
|
|
||||||
|
#undef ARRAY
|
||||||
|
#undef TEST_STRINGARRAY_ERROR
|
||||||
#undef TEST_STRING_ERROR
|
#undef TEST_STRING_ERROR
|
||||||
}
|
}
|
||||||
#endif // RAPIDJSON_USE_EXCEPTION
|
|
||||||
|
|
||||||
template <unsigned count>
|
template <unsigned count>
|
||||||
struct ParseArrayHandler : BaseReaderHandler<> {
|
struct ParseArrayHandler : BaseReaderHandler<> {
|
||||||
@ -340,7 +349,6 @@ TEST(Reader, ParseArray) {
|
|||||||
free(json);
|
free(json);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef RAPIDJSON_USE_EXCEPTION
|
|
||||||
TEST(Reader, ParseArray_Error) {
|
TEST(Reader, ParseArray_Error) {
|
||||||
#define TEST_ARRAY_ERROR(str) \
|
#define TEST_ARRAY_ERROR(str) \
|
||||||
{ \
|
{ \
|
||||||
@ -348,8 +356,8 @@ TEST(Reader, ParseArray_Error) {
|
|||||||
strncpy(buffer, str, 1000); \
|
strncpy(buffer, str, 1000); \
|
||||||
InsituStringStream s(buffer); \
|
InsituStringStream s(buffer); \
|
||||||
BaseReaderHandler<> h; \
|
BaseReaderHandler<> h; \
|
||||||
Reader<UTF8<>, CrtAllocator> reader; \
|
GenericReader<UTF8<>, CrtAllocator> reader; \
|
||||||
EXPECT_ERROR(reader.ParseArray<0>(s, h), ParseException); \
|
EXPECT_FALSE(reader.Parse<0>(s, h)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// Must be a comma or ']' after an array element.
|
// Must be a comma or ']' after an array element.
|
||||||
@ -359,7 +367,6 @@ TEST(Reader, ParseArray_Error) {
|
|||||||
|
|
||||||
#undef TEST_ARRAY_ERROR
|
#undef TEST_ARRAY_ERROR
|
||||||
}
|
}
|
||||||
#endif // RAPIDJSON_USE_EXCEPTION
|
|
||||||
|
|
||||||
struct ParseObjectHandler : BaseReaderHandler<> {
|
struct ParseObjectHandler : BaseReaderHandler<> {
|
||||||
ParseObjectHandler() : step_(0) {}
|
ParseObjectHandler() : step_(0) {}
|
||||||
@ -446,7 +453,6 @@ TEST(Reader, Parse_EmptyObject) {
|
|||||||
EXPECT_EQ(2, h.step_);
|
EXPECT_EQ(2, h.step_);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef RAPIDJSON_USE_EXCEPTION
|
|
||||||
TEST(Reader, ParseObject_Error) {
|
TEST(Reader, ParseObject_Error) {
|
||||||
#define TEST_OBJECT_ERROR(str) \
|
#define TEST_OBJECT_ERROR(str) \
|
||||||
{ \
|
{ \
|
||||||
@ -454,8 +460,8 @@ TEST(Reader, ParseObject_Error) {
|
|||||||
strncpy(buffer, str, 1000); \
|
strncpy(buffer, str, 1000); \
|
||||||
InsituStringStream s(buffer); \
|
InsituStringStream s(buffer); \
|
||||||
BaseReaderHandler<> h; \
|
BaseReaderHandler<> h; \
|
||||||
Reader<UTF8<>, CrtAllocator> reader; \
|
GenericReader<UTF8<>, CrtAllocator> reader; \
|
||||||
EXPECT_ERROR(reader.ParseObject<0>(s, h), ParseException); \
|
EXPECT_FALSE(reader.Parse<0>(s, h)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// Name of an object member must be a string
|
// Name of an object member must be a string
|
||||||
@ -477,9 +483,7 @@ TEST(Reader, ParseObject_Error) {
|
|||||||
|
|
||||||
#undef TEST_OBJECT_ERROR
|
#undef TEST_OBJECT_ERROR
|
||||||
}
|
}
|
||||||
#endif // RAPIDJSON_USE_EXCEPTION
|
|
||||||
|
|
||||||
#ifdef RAPIDJSON_USE_EXCEPTION
|
|
||||||
TEST(Reader, Parse_Error) {
|
TEST(Reader, Parse_Error) {
|
||||||
#define TEST_ERROR(str) \
|
#define TEST_ERROR(str) \
|
||||||
{ \
|
{ \
|
||||||
@ -488,7 +492,7 @@ TEST(Reader, Parse_Error) {
|
|||||||
InsituStringStream s(buffer); \
|
InsituStringStream s(buffer); \
|
||||||
BaseReaderHandler<> h; \
|
BaseReaderHandler<> h; \
|
||||||
Reader reader; \
|
Reader reader; \
|
||||||
EXPECT_ERROR(reader.Parse<0>(s, h), ParseException); \
|
EXPECT_FALSE(reader.Parse<0>(s, h)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
// Text only contains white space(s)
|
// Text only contains white space(s)
|
||||||
@ -514,4 +518,3 @@ TEST(Reader, Parse_Error) {
|
|||||||
|
|
||||||
#undef TEST_ERROR
|
#undef TEST_ERROR
|
||||||
}
|
}
|
||||||
#endif // RAPIDJSON_USE_EXCEPTION
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user