diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 0f85032..30e45e1 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -1023,15 +1023,23 @@ private: is.Take(); unsigned codepoint = ParseHex4(is, escapeOffset); RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) { - // Handle UTF-16 surrogate pair - if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) + if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) { + // high surrogate, check if followed by valid low surrogate + if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) { + // Handle UTF-16 surrogate pair + if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + unsigned codepoint2 = ParseHex4(is, escapeOffset); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; + } + // single low surrogate + else + { RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); - unsigned codepoint2 = ParseHex4(is, escapeOffset); - RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) - RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); - codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; + } } TEncoding::Encode(os, codepoint); } diff --git a/test/unittest/readertest.cpp b/test/unittest/readertest.cpp index 2795766..2a4a626 100644 --- a/test/unittest/readertest.cpp +++ b/test/unittest/readertest.cpp @@ -944,6 +944,9 @@ TEST(Reader, ParseString_Error) { TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800X\"]", 2u, 8u); TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800\\uFFFF\"]", 2u, 14u); + // Single low surrogate pair in string is invalid. + TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\udc4d\"]", 2u, 8u); + // Missing a closing quotation mark in string. TEST_STRING_ERROR(kParseErrorStringMissQuotationMark, "[\"Test]", 7u, 7u);