Merge pull request #907 from almavi/master
Fixed bug on space hexadecimal encoding
This commit is contained in:
commit
b8f0414b9a
@ -955,7 +955,7 @@ private:
|
|||||||
if (c == '\0')
|
if (c == '\0')
|
||||||
RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
|
RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
|
||||||
else
|
else
|
||||||
RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell());
|
RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
size_t offset = is.Tell();
|
size_t offset = is.Tell();
|
||||||
@ -990,7 +990,7 @@ private:
|
|||||||
// The rest of string using SIMD
|
// The rest of string using SIMD
|
||||||
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
|
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
|
||||||
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
|
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
|
||||||
static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
|
static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
|
||||||
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
|
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
|
||||||
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
|
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
|
||||||
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
|
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
|
||||||
@ -999,7 +999,7 @@ private:
|
|||||||
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
|
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
|
||||||
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
|
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
|
||||||
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
|
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
|
||||||
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
|
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
|
||||||
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
|
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
|
||||||
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
|
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
|
||||||
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
|
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
|
||||||
@ -1053,7 +1053,7 @@ private:
|
|||||||
// The rest of string using SIMD
|
// The rest of string using SIMD
|
||||||
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
|
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
|
||||||
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
|
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
|
||||||
static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
|
static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
|
||||||
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
|
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
|
||||||
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
|
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
|
||||||
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
|
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
|
||||||
@ -1062,7 +1062,7 @@ private:
|
|||||||
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
|
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
|
||||||
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
|
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
|
||||||
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
|
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
|
||||||
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
|
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
|
||||||
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
|
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
|
||||||
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
|
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
|
||||||
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
|
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
|
||||||
@ -1101,7 +1101,7 @@ private:
|
|||||||
// The rest of string using SIMD
|
// The rest of string using SIMD
|
||||||
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
|
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
|
||||||
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
|
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
|
||||||
static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
|
static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
|
||||||
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
|
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
|
||||||
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
|
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
|
||||||
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
|
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
|
||||||
@ -1110,7 +1110,7 @@ private:
|
|||||||
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
|
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
|
||||||
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
|
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
|
||||||
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
|
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
|
||||||
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
|
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
|
||||||
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
|
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
|
||||||
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
|
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
|
||||||
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
|
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
|
||||||
|
@ -585,7 +585,7 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
|
|||||||
// The rest of string using SIMD
|
// The rest of string using SIMD
|
||||||
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
|
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
|
||||||
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
|
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
|
||||||
static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
|
static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
|
||||||
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
|
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
|
||||||
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
|
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
|
||||||
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
|
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
|
||||||
@ -594,7 +594,7 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
|
|||||||
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
|
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
|
||||||
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
|
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
|
||||||
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
|
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
|
||||||
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
|
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
|
||||||
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
|
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
|
||||||
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
|
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
|
||||||
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
|
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
|
||||||
|
@ -725,6 +725,8 @@ TEST(Reader, ParseString_Error) {
|
|||||||
|
|
||||||
// Malform ASCII sequence
|
// Malform ASCII sequence
|
||||||
TEST_STRINGENCODING_ERROR(ASCII<>, UTF8<>, char, ARRAY('[', '\"', char(0x80u), '\"', ']', '\0'));
|
TEST_STRINGENCODING_ERROR(ASCII<>, UTF8<>, char, ARRAY('[', '\"', char(0x80u), '\"', ']', '\0'));
|
||||||
|
TEST_STRINGENCODING_ERROR(ASCII<>, UTF8<>, char, ARRAY('[', '\"', char(0x01u), '\"', ']', '\0'));
|
||||||
|
TEST_STRINGENCODING_ERROR(ASCII<>, UTF8<>, char, ARRAY('[', '\"', char(0x1Cu), '\"', ']', '\0'));
|
||||||
|
|
||||||
#undef ARRAY
|
#undef ARRAY
|
||||||
#undef TEST_STRINGARRAY_ERROR
|
#undef TEST_STRINGARRAY_ERROR
|
||||||
|
@ -412,8 +412,10 @@ TEST(Writer, ValidateEncoding) {
|
|||||||
EXPECT_TRUE(writer.String("\xC2\xA2")); // Cents sign U+00A2
|
EXPECT_TRUE(writer.String("\xC2\xA2")); // Cents sign U+00A2
|
||||||
EXPECT_TRUE(writer.String("\xE2\x82\xAC")); // Euro sign U+20AC
|
EXPECT_TRUE(writer.String("\xE2\x82\xAC")); // Euro sign U+20AC
|
||||||
EXPECT_TRUE(writer.String("\xF0\x9D\x84\x9E")); // G clef sign U+1D11E
|
EXPECT_TRUE(writer.String("\xF0\x9D\x84\x9E")); // G clef sign U+1D11E
|
||||||
|
EXPECT_TRUE(writer.String("\x01")); // SOH control U+0001
|
||||||
|
EXPECT_TRUE(writer.String("\x1B")); // Escape control U+001B
|
||||||
writer.EndArray();
|
writer.EndArray();
|
||||||
EXPECT_STREQ("[\"\x24\",\"\xC2\xA2\",\"\xE2\x82\xAC\",\"\xF0\x9D\x84\x9E\"]", buffer.GetString());
|
EXPECT_STREQ("[\"\x24\",\"\xC2\xA2\",\"\xE2\x82\xAC\",\"\xF0\x9D\x84\x9E\",\"\\u0001\",\"\\u001B\"]", buffer.GetString());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fail in decoding invalid UTF-8 sequence http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
|
// Fail in decoding invalid UTF-8 sequence http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
|
||||||
|
Loading…
x
Reference in New Issue
Block a user