Optimize ScanCopyUnescapedString for insitu parsing
This commit is contained in:
parent
f13caadded
commit
a5990f3eea
@ -705,6 +705,9 @@ private:
|
|||||||
internal::StreamLocalCopy<InputStream> copy(is);
|
internal::StreamLocalCopy<InputStream> copy(is);
|
||||||
InputStream& s(copy.s);
|
InputStream& s(copy.s);
|
||||||
|
|
||||||
|
RAPIDJSON_ASSERT(s.Peek() == '\"');
|
||||||
|
s.Take(); // Skip '\"'
|
||||||
|
|
||||||
bool success = false;
|
bool success = false;
|
||||||
if (parseFlags & kParseInsituFlag) {
|
if (parseFlags & kParseInsituFlag) {
|
||||||
typename InputStream::Ch *head = s.PutBegin();
|
typename InputStream::Ch *head = s.PutBegin();
|
||||||
@ -743,9 +746,6 @@ private:
|
|||||||
#undef Z16
|
#undef Z16
|
||||||
//!@endcond
|
//!@endcond
|
||||||
|
|
||||||
RAPIDJSON_ASSERT(is.Peek() == '\"');
|
|
||||||
is.Take(); // Skip '\"'
|
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
// Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
|
// Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
|
||||||
if (!(parseFlags & kParseValidateEncodingFlag))
|
if (!(parseFlags & kParseValidateEncodingFlag))
|
||||||
@ -801,13 +801,14 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
|
#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
|
||||||
|
// StringStream -> StackStream<char>
|
||||||
static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
|
static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
|
||||||
const char* p = is.src_;
|
const char* p = is.src_;
|
||||||
|
|
||||||
// Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
|
// Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
|
||||||
const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
|
const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
|
||||||
while (p != nextAligned)
|
while (p != nextAligned)
|
||||||
if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(*p < 0x20)) {
|
if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
|
||||||
is.src_ = p;
|
is.src_ = p;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -850,6 +851,107 @@ private:
|
|||||||
|
|
||||||
is.src_ = p;
|
is.src_ = p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// InsituStringStream -> InsituStringStream
|
||||||
|
static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
|
||||||
|
RAPIDJSON_ASSERT(&is == &os);
|
||||||
|
|
||||||
|
if (is.src_ == is.dst_) {
|
||||||
|
SkipUnescapedString(is);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
char* p = is.src_;
|
||||||
|
char *q = is.dst_;
|
||||||
|
|
||||||
|
// Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
|
||||||
|
const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
|
||||||
|
while (p != nextAligned)
|
||||||
|
if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20))
|
||||||
|
goto exit;
|
||||||
|
else
|
||||||
|
*q++ = *p++;
|
||||||
|
|
||||||
|
// The rest of string using SIMD
|
||||||
|
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
|
||||||
|
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
|
||||||
|
static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
|
||||||
|
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
|
||||||
|
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
|
||||||
|
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
|
||||||
|
|
||||||
|
for (;; p += 16, q += 16) {
|
||||||
|
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
|
||||||
|
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
|
||||||
|
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
|
||||||
|
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
|
||||||
|
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
|
||||||
|
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
|
||||||
|
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
|
||||||
|
size_t length;
|
||||||
|
#ifdef _MSC_VER // Find the index of first escaped
|
||||||
|
unsigned long offset;
|
||||||
|
_BitScanForward(&offset, r);
|
||||||
|
length = offset;
|
||||||
|
#else
|
||||||
|
length = static_cast<size_t>(__builtin_ffs(r) - 1);
|
||||||
|
#endif
|
||||||
|
for (const char* pend = p + length; p != pend; )
|
||||||
|
*q++ = *p++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
exit:
|
||||||
|
is.src_ = p;
|
||||||
|
is.dst_ = q;
|
||||||
|
}
|
||||||
|
|
||||||
|
// When read/write pointers are the same for insitu stream, just skip unescaped characters
|
||||||
|
static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
|
||||||
|
RAPIDJSON_ASSERT(is.src_ == is.dst_);
|
||||||
|
char* p = is.src_;
|
||||||
|
|
||||||
|
// Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
|
||||||
|
const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
|
||||||
|
bool found = false;
|
||||||
|
for (; p != nextAligned; p++)
|
||||||
|
if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20))
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
// The rest of string using SIMD
|
||||||
|
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
|
||||||
|
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
|
||||||
|
static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
|
||||||
|
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
|
||||||
|
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
|
||||||
|
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
|
||||||
|
|
||||||
|
for (;; p += 16) {
|
||||||
|
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
|
||||||
|
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
|
||||||
|
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
|
||||||
|
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
|
||||||
|
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
|
||||||
|
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
|
||||||
|
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
|
||||||
|
size_t length;
|
||||||
|
#ifdef _MSC_VER // Find the index of first escaped
|
||||||
|
unsigned long offset;
|
||||||
|
_BitScanForward(&offset, r);
|
||||||
|
length = offset;
|
||||||
|
#else
|
||||||
|
length = static_cast<size_t>(__builtin_ffs(r) - 1);
|
||||||
|
#endif
|
||||||
|
p += length;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
exit:
|
||||||
|
is.src_ = is.dst_ = p;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<typename InputStream, bool backup>
|
template<typename InputStream, bool backup>
|
||||||
|
@ -135,11 +135,11 @@ public:
|
|||||||
sprintf(filename, "%s/%s", typespaths[i], typesfilenames[j]);
|
sprintf(filename, "%s/%s", typespaths[i], typesfilenames[j]);
|
||||||
if (FILE* fp = fopen(filename, "rb")) {
|
if (FILE* fp = fopen(filename, "rb")) {
|
||||||
fseek(fp, 0, SEEK_END);
|
fseek(fp, 0, SEEK_END);
|
||||||
size_t length = (size_t)ftell(fp);
|
typesLength_[j] = (size_t)ftell(fp);
|
||||||
fseek(fp, 0, SEEK_SET);
|
fseek(fp, 0, SEEK_SET);
|
||||||
types_[j] = (char*)malloc(length + 1);
|
types_[j] = (char*)malloc(typesLength_[j] + 1);
|
||||||
ASSERT_EQ(length, fread(types_[j], 1, length, fp));
|
ASSERT_EQ(typesLength_[j], fread(types_[j], 1, typesLength_[j], fp));
|
||||||
types_[j][length] = '\0';
|
types_[j][typesLength_[j]] = '\0';
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -170,6 +170,7 @@ protected:
|
|||||||
char *whitespace_;
|
char *whitespace_;
|
||||||
size_t whitespace_length_;
|
size_t whitespace_length_;
|
||||||
char *types_[7];
|
char *types_[7];
|
||||||
|
size_t typesLength_[7];
|
||||||
|
|
||||||
static const size_t kTrialCount = 1000;
|
static const size_t kTrialCount = 1000;
|
||||||
};
|
};
|
||||||
|
@ -103,6 +103,15 @@ TEST_F(RapidJson, SIMD_SUFFIX(ReaderParse_DummyHandler_##Name)) {\
|
|||||||
Reader reader;\
|
Reader reader;\
|
||||||
EXPECT_TRUE(reader.Parse(s, h));\
|
EXPECT_TRUE(reader.Parse(s, h));\
|
||||||
}\
|
}\
|
||||||
|
}\
|
||||||
|
TEST_F(RapidJson, SIMD_SUFFIX(ReaderParseInsitu_DummyHandler_##Name)) {\
|
||||||
|
for (size_t i = 0; i < kTrialCount * 10; i++) {\
|
||||||
|
memcpy(temp_, types_[index], typesLength_[index] + 1);\
|
||||||
|
InsituStringStream s(temp_);\
|
||||||
|
BaseReaderHandler<> h;\
|
||||||
|
Reader reader;\
|
||||||
|
EXPECT_TRUE(reader.Parse<kParseInsituFlag>(s, h));\
|
||||||
|
}\
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_TYPED(0, Booleans)
|
TEST_TYPED(0, Booleans)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user