From 6a6d9c7e05d2c312b0a9058ee143f168719d93af Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sun, 14 Feb 2016 17:37:53 +0800 Subject: [PATCH] Optimize Writer::WriteString() with SIMD --- include/rapidjson/stringbuffer.h | 1 + include/rapidjson/writer.h | 69 +++++++++++++++++++++++++++++++- test/perftest/rapidjsontest.cpp | 6 +-- test/unittest/simdtest.cpp | 46 +++++++++++++++++++++ 4 files changed, 118 insertions(+), 4 deletions(-) diff --git a/include/rapidjson/stringbuffer.h b/include/rapidjson/stringbuffer.h index 41c8dfc..bb939a9 100644 --- a/include/rapidjson/stringbuffer.h +++ b/include/rapidjson/stringbuffer.h @@ -67,6 +67,7 @@ public: void Reserve(size_t count) { stack_.template Reserve(count); } Ch* Push(size_t count) { return stack_.template Push(count); } + Ch* PushUnsafe(size_t count) { return stack_.template PushUnsafe(count); } void Pop(size_t count) { stack_.template Pop(count); } const Ch* GetString() const { diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h index 6e6f2fd..f61e5af 100644 --- a/include/rapidjson/writer.h +++ b/include/rapidjson/writer.h @@ -294,7 +294,7 @@ protected: PutUnsafe(*os_, '\"'); GenericStringStream is(str); - while (RAPIDJSON_LIKELY(is.Tell() < length)) { + while (ScanWriteUnescapedString(is, length)) { const Ch c = is.Peek(); if (!TargetEncoding::supportUnicode && static_cast(c) >= 0x80) { // Unicode escaping @@ -347,6 +347,10 @@ protected: return true; } + bool ScanWriteUnescapedString(GenericStringStream& is, size_t length) { + return RAPIDJSON_LIKELY(is.Tell() < length); + } + bool WriteStartObject() { os_->Put('{'); return true; } bool WriteEndObject() { os_->Put('}'); return true; } bool WriteStartArray() { os_->Put('['); return true; } @@ -427,6 +431,69 @@ inline bool Writer::WriteDouble(double d) { return true; } +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) +template<> +inline bool Writer::ScanWriteUnescapedString(StringStream& is, size_t length) { + if (length < 16) + return RAPIDJSON_LIKELY(is.Tell() < length); + + if (!RAPIDJSON_LIKELY(is.Tell() < length)) + return false; + + const char* p = is.src_; + const char* end = is.head_ + length; + const char* nextAligned = reinterpret_cast((reinterpret_cast(p) + 15) & static_cast(~15)); + const char* endAligned = reinterpret_cast(reinterpret_cast(end) & static_cast(~15)); + if (nextAligned > end) + return true; + + while (p != nextAligned) + if (*p < 0x20 || *p == '\"' || *p == '\\') { + is.src_ = p; + return RAPIDJSON_LIKELY(is.Tell() < length); + } + else + os_->PutUnsafe(*p++); + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast(&space[0])); + + for (; p != endAligned; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19 + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + SizeType len; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + len = offset; +#else + len = static_cast(__builtin_ffs(r) - 1); +#endif + char* q = reinterpret_cast(os_->PushUnsafe(len)); + for (size_t i = 0; i < len; i++) + q[i] = p[i]; + + p += len; + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(os_->PushUnsafe(16)), s); + } + + is.src_ = p; + return RAPIDJSON_LIKELY(is.Tell() < length); +} +#endif // defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) + RAPIDJSON_NAMESPACE_END #ifdef _MSC_VER diff --git a/test/perftest/rapidjsontest.cpp b/test/perftest/rapidjsontest.cpp index 7f5fc08..5584178 100644 --- a/test/perftest/rapidjsontest.cpp +++ b/test/perftest/rapidjsontest.cpp @@ -301,7 +301,7 @@ TEST_F(RapidJson, Writer_NullStream) { } } -TEST_F(RapidJson, Writer_StringBuffer) { +TEST_F(RapidJson, SIMD_SUFFIX(Writer_StringBuffer)) { for (size_t i = 0; i < kTrialCount; i++) { StringBuffer s(0, 1024 * 1024); Writer writer(s); @@ -314,7 +314,7 @@ TEST_F(RapidJson, Writer_StringBuffer) { } #define TEST_TYPED(index, Name)\ -TEST_F(RapidJson, Writer_StringBuffer_##Name) {\ +TEST_F(RapidJson, SIMD_SUFFIX(Writer_StringBuffer_##Name)) {\ for (size_t i = 0; i < kTrialCount * 10; i++) {\ StringBuffer s(0, 1024 * 1024);\ Writer writer(s);\ @@ -334,7 +334,7 @@ TEST_TYPED(6, Paragraphs) #undef TEST_TYPED -TEST_F(RapidJson, PrettyWriter_StringBuffer) { +TEST_F(RapidJson, SIMD_SUFFIX(PrettyWriter_StringBuffer)) { for (size_t i = 0; i < kTrialCount; i++) { StringBuffer s(0, 2048 * 1024); PrettyWriter writer(s); diff --git a/test/unittest/simdtest.cpp b/test/unittest/simdtest.cpp index 3dfb5b3..6ded740 100644 --- a/test/unittest/simdtest.cpp +++ b/test/unittest/simdtest.cpp @@ -28,6 +28,7 @@ #include "unittest.h" #include "rapidjson/reader.h" +#include "rapidjson/writer.h" #ifdef __GNUC__ RAPIDJSON_DIAG_PUSH @@ -108,6 +109,51 @@ TEST(SIMD, SIMD_SUFFIX(ScanCopyUnescapedString)) { TestScanCopyUnescapedString(); } +TEST(SIMD, SIMD_SUFFIX(ScanWriteUnescapedString)) { + for (size_t step = 0; step < 1024; step++) { + char s[2048 + 1]; + char *p = s; + for (size_t i = 0; i < step; i++) + *p++= "ABCD"[i % 4]; + char escape = "\0\n\\\""[step % 4]; + *p++ = escape; + for (size_t i = 0; i < step; i++) + *p++= "ABCD"[i % 4]; + + StringBuffer sb; + Writer writer(sb); + writer.String(s, SizeType(step * 2 + 1)); + const char* q = sb.GetString(); + EXPECT_EQ('\"', *q++); + for (size_t i = 0; i < step; i++) + EXPECT_EQ("ABCD"[i % 4], *q++); + if (escape == '\0') { + EXPECT_EQ('\\', *q++); + EXPECT_EQ('u', *q++); + EXPECT_EQ('0', *q++); + EXPECT_EQ('0', *q++); + EXPECT_EQ('0', *q++); + EXPECT_EQ('0', *q++); + } + else if (escape == '\n') { + EXPECT_EQ('\\', *q++); + EXPECT_EQ('n', *q++); + } + else if (escape == '\\') { + EXPECT_EQ('\\', *q++); + EXPECT_EQ('\\', *q++); + } + else if (escape == '\"') { + EXPECT_EQ('\\', *q++); + EXPECT_EQ('\"', *q++); + } + for (size_t i = 0; i < step; i++) + EXPECT_EQ("ABCD"[i % 4], *q++); + EXPECT_EQ('\"', *q++); + EXPECT_EQ('\0', *q++); + } +} + #ifdef __GNUC__ RAPIDJSON_DIAG_POP #endif