Merge pull request #1744 from lklein53/improve-surrogate-handling
Improve surrogate handling (#1738)
This commit is contained in:
commit
f56928de85
@ -1023,15 +1023,23 @@ private:
|
||||
is.Take();
|
||||
unsigned codepoint = ParseHex4(is, escapeOffset);
|
||||
RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
|
||||
if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
|
||||
// Handle UTF-16 surrogate pair
|
||||
if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
|
||||
if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
|
||||
// high surrogate, check if followed by valid low surrogate
|
||||
if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) {
|
||||
// Handle UTF-16 surrogate pair
|
||||
if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
|
||||
unsigned codepoint2 = ParseHex4(is, escapeOffset);
|
||||
RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
|
||||
if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
|
||||
codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
|
||||
}
|
||||
// single low surrogate
|
||||
else
|
||||
{
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
|
||||
unsigned codepoint2 = ParseHex4(is, escapeOffset);
|
||||
RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
|
||||
if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
|
||||
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
|
||||
codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
|
||||
}
|
||||
}
|
||||
TEncoding::Encode(os, codepoint);
|
||||
}
|
||||
|
@ -944,6 +944,9 @@ TEST(Reader, ParseString_Error) {
|
||||
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800X\"]", 2u, 8u);
|
||||
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800\\uFFFF\"]", 2u, 14u);
|
||||
|
||||
// Single low surrogate pair in string is invalid.
|
||||
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\udc4d\"]", 2u, 8u);
|
||||
|
||||
// Missing a closing quotation mark in string.
|
||||
TEST_STRING_ERROR(kParseErrorStringMissQuotationMark, "[\"Test]", 7u, 7u);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user