From a5990f3eea871e4390c93da95f850d762c372605 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Fri, 22 Jan 2016 18:26:24 +0800 Subject: [PATCH] Optimize ScanCopyUnescapedString for insitu parsing --- include/rapidjson/reader.h | 110 ++++++++++++++++++++++++++++++-- test/perftest/perftest.h | 9 +-- test/perftest/rapidjsontest.cpp | 9 +++ 3 files changed, 120 insertions(+), 8 deletions(-) diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index a629f5b..3930808 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -705,6 +705,9 @@ private: internal::StreamLocalCopy copy(is); InputStream& s(copy.s); + RAPIDJSON_ASSERT(s.Peek() == '\"'); + s.Take(); // Skip '\"' + bool success = false; if (parseFlags & kParseInsituFlag) { typename InputStream::Ch *head = s.PutBegin(); @@ -743,9 +746,6 @@ private: #undef Z16 //!@endcond - RAPIDJSON_ASSERT(is.Peek() == '\"'); - is.Take(); // Skip '\"' - for (;;) { // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation. if (!(parseFlags & kParseValidateEncodingFlag)) @@ -801,13 +801,14 @@ private: } #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) + // StringStream -> StackStream static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream& os) { const char* p = is.src_; // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); while (p != nextAligned) - if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(*p < 0x20)) { + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) { is.src_ = p; return; } @@ -850,6 +851,107 @@ private: is.src_ = p; } + + // InsituStringStream -> InsituStringStream + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { + RAPIDJSON_ASSERT(&is == &os); + + if (is.src_ == is.dst_) { + SkipUnescapedString(is); + return; + } + + char* p = is.src_; + char *q = is.dst_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) + goto exit; + else + *q++ = *p++; + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (;; p += 16, q += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + size_t length; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; +#else + length = static_cast(__builtin_ffs(r) - 1); +#endif + for (const char* pend = p + length; p != pend; ) + *q++ = *p++; + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s); + } + +exit: + is.src_ = p; + is.dst_ = q; + } + + // When read/write pointers are the same for insitu stream, just skip unescaped characters + static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { + RAPIDJSON_ASSERT(is.src_ == is.dst_); + char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + bool found = false; + for (; p != nextAligned; p++) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast(*p) < 0x20)) + goto exit; + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + size_t length; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; +#else + length = static_cast(__builtin_ffs(r) - 1); +#endif + p += length; + break; + } + } + + exit: + is.src_ = is.dst_ = p; + } #endif template diff --git a/test/perftest/perftest.h b/test/perftest/perftest.h index 2afe641..0d31602 100644 --- a/test/perftest/perftest.h +++ b/test/perftest/perftest.h @@ -135,11 +135,11 @@ public: sprintf(filename, "%s/%s", typespaths[i], typesfilenames[j]); if (FILE* fp = fopen(filename, "rb")) { fseek(fp, 0, SEEK_END); - size_t length = (size_t)ftell(fp); + typesLength_[j] = (size_t)ftell(fp); fseek(fp, 0, SEEK_SET); - types_[j] = (char*)malloc(length + 1); - ASSERT_EQ(length, fread(types_[j], 1, length, fp)); - types_[j][length] = '\0'; + types_[j] = (char*)malloc(typesLength_[j] + 1); + ASSERT_EQ(typesLength_[j], fread(types_[j], 1, typesLength_[j], fp)); + types_[j][typesLength_[j]] = '\0'; fclose(fp); break; } @@ -170,6 +170,7 @@ protected: char *whitespace_; size_t whitespace_length_; char *types_[7]; + size_t typesLength_[7]; static const size_t kTrialCount = 1000; }; diff --git a/test/perftest/rapidjsontest.cpp b/test/perftest/rapidjsontest.cpp index 4693755..7f5fc08 100644 --- a/test/perftest/rapidjsontest.cpp +++ b/test/perftest/rapidjsontest.cpp @@ -103,6 +103,15 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_##Name)) {\ Reader reader;\ EXPECT_TRUE(reader.Parse(s, h));\ }\ +}\ +TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler_##Name)) {\ + for (size_t i = 0; i < kTrialCount * 10; i++) {\ + memcpy(temp_, types_[index], typesLength_[index] + 1);\ + InsituStringStream s(temp_);\ + BaseReaderHandler<> h;\ + Reader reader;\ + EXPECT_TRUE(reader.Parse(s, h));\ + }\ } TEST_TYPED(0, Booleans)