Merge pull request #1744 from lklein53/improve-surrogate-handling

Improve surrogate handling (#1738)
This commit is contained in:
Milo Yip 2020-07-13 11:10:39 +08:00 committed by GitHub
commit f56928de85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 19 additions and 8 deletions

View File

@ -1023,7 +1023,9 @@ private:
is.Take(); is.Take();
unsigned codepoint = ParseHex4(is, escapeOffset); unsigned codepoint = ParseHex4(is, escapeOffset);
RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) { if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
// high surrogate, check if followed by valid low surrogate
if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) {
// Handle UTF-16 surrogate pair // Handle UTF-16 surrogate pair
if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
@ -1033,6 +1035,12 @@ private:
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
} }
// single low surrogate
else
{
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
}
}
TEncoding::Encode(os, codepoint); TEncoding::Encode(os, codepoint);
} }
else else

View File

@ -944,6 +944,9 @@ TEST(Reader, ParseString_Error) {
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800X\"]", 2u, 8u); TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800X\"]", 2u, 8u);
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800\\uFFFF\"]", 2u, 14u); TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800\\uFFFF\"]", 2u, 14u);
// Single low surrogate pair in string is invalid.
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\udc4d\"]", 2u, 8u);
// Missing a closing quotation mark in string. // Missing a closing quotation mark in string.
TEST_STRING_ERROR(kParseErrorStringMissQuotationMark, "[\"Test]", 7u, 7u); TEST_STRING_ERROR(kParseErrorStringMissQuotationMark, "[\"Test]", 7u, 7u);