From d5d7171f6d21f2894e67bbe47c9a48e01114700f Mon Sep 17 00:00:00 2001 From: Charles Milette Date: Thu, 25 Jul 2019 16:21:11 -0400 Subject: [PATCH] Fix ARM NEON under MSVC --- include/rapidjson/internal/clzll.h | 72 ++++++++++++++++++++++++++++++ include/rapidjson/internal/diyfp.h | 18 +------- include/rapidjson/reader.h | 43 +++++++++--------- include/rapidjson/writer.h | 11 ++--- 4 files changed, 102 insertions(+), 42 deletions(-) create mode 100644 include/rapidjson/internal/clzll.h diff --git a/include/rapidjson/internal/clzll.h b/include/rapidjson/internal/clzll.h new file mode 100644 index 0000000..7c2fa48 --- /dev/null +++ b/include/rapidjson/internal/clzll.h @@ -0,0 +1,72 @@ +// Tencent is pleased to support the open source community by making RapidJSON available. +// +// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. +// +// Licensed under the MIT License (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://opensource.org/licenses/MIT +// +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +#ifndef RAPIDJSON_CLZLL_H_ +#define RAPIDJSON_CLZLL_H_ + +#include "../rapidjson.h" + +#if defined(_MSC_VER) +#include +#if defined(_WIN64) +#pragma intrinsic(_BitScanReverse64) +#else +#pragma intrinsic(_BitScanReverse) +#endif +#endif + +RAPIDJSON_NAMESPACE_BEGIN +namespace internal { + +#if defined(__has_builtin) && __has_builtin(__builtin_clzll) +#define RAPIDJSON_CLZLL __builtin_clzll +#else + +inline uint32_t clzll(uint64_t x) { + // Passing 0 to __builtin_clzll is UB in GCC and results in an + // infinite loop in the software implementation. + RAPIDJSON_ASSERT(x != 0); + +#ifdef _MSC_VER + unsigned long r = 0; +#ifdef _WIN64 + _BitScanReverse64(&r, x); +#else + // Scan the high 32 bits. + if (_BitScanReverse(&r, static_cast(x >> 32))) + return 63 - (r + 32); + + // Scan the low 32 bits. + _BitScanReverse(&r, static_cast(x & 0xFFFFFFFF)); +#endif // _WIN64 + + return 63 - r; +#else + uint32_t r; + while (!(x & (static_cast(1) << 63))) { + x <<= 1; + ++r; + } + + return r; +#endif // _MSC_VER +} + +#define RAPIDJSON_CLZLL RAPIDJSON_NAMESPACE::internal::clzll +#endif // defined(__has_builtin) && __has_builtin(__builtin_clzll) + +} // namespace internal +RAPIDJSON_NAMESPACE_END + +#endif // RAPIDJSON_CLZLL_H_ \ No newline at end of file diff --git a/include/rapidjson/internal/diyfp.h b/include/rapidjson/internal/diyfp.h index b6c2cf5..ab5ee07 100644 --- a/include/rapidjson/internal/diyfp.h +++ b/include/rapidjson/internal/diyfp.h @@ -20,11 +20,11 @@ #define RAPIDJSON_DIYFP_H_ #include "../rapidjson.h" +#include "clzll.h" #include #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER) #include -#pragma intrinsic(_BitScanReverse64) #pragma intrinsic(_umul128) #endif @@ -100,22 +100,8 @@ struct DiyFp { } DiyFp Normalize() const { - RAPIDJSON_ASSERT(f != 0); // https://stackoverflow.com/a/26809183/291737 -#if defined(_MSC_VER) && defined(_M_AMD64) - unsigned long index; - _BitScanReverse64(&index, f); - return DiyFp(f << (63 - index), e - (63 - index)); -#elif defined(__GNUC__) && __GNUC__ >= 4 - int s = __builtin_clzll(f); + uint32_t s = RAPIDJSON_CLZLL(f); return DiyFp(f << s, e - s); -#else - DiyFp res = *this; - while (!(res.f & (static_cast(1) << 63))) { - res.f <<= 1; - res.e--; - } - return res; -#endif } DiyFp NormalizeBoundary() const { diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 44a6bcd..13d27c2 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -20,6 +20,7 @@ #include "allocators.h" #include "stream.h" #include "encodedstream.h" +#include "internal/clzll.h" #include "internal/meta.h" #include "internal/stack.h" #include "internal/strtod.h" @@ -443,16 +444,16 @@ inline const char *SkipWhitespace_SIMD(const char* p) { x = vmvnq_u8(x); // Negate x = vrev64q_u8(x); // Rev in 64 - uint64_t low = vgetq_lane_u64(reinterpret_cast(x), 0); // extract - uint64_t high = vgetq_lane_u64(reinterpret_cast(x), 1); // extract + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract if (low == 0) { if (high != 0) { - int lz =__builtin_clzll(high);; + uint32_t lz = RAPIDJSON_CLZLL(high); return p + 8 + (lz >> 3); } } else { - int lz = __builtin_clzll(low);; + uint32_t lz = RAPIDJSON_CLZLL(low); return p + (lz >> 3); } } @@ -479,16 +480,16 @@ inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { x = vmvnq_u8(x); // Negate x = vrev64q_u8(x); // Rev in 64 - uint64_t low = vgetq_lane_u64(reinterpret_cast(x), 0); // extract - uint64_t high = vgetq_lane_u64(reinterpret_cast(x), 1); // extract + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract if (low == 0) { if (high != 0) { - int lz = __builtin_clzll(high); + uint32_t lz = RAPIDJSON_CLZLL(high); return p + 8 + (lz >> 3); } } else { - int lz = __builtin_clzll(low); + uint32_t lz = RAPIDJSON_CLZLL(low); return p + (lz >> 3); } } @@ -1244,19 +1245,19 @@ private: x = vorrq_u8(x, vcltq_u8(s, s3)); x = vrev64q_u8(x); // Rev in 64 - uint64_t low = vgetq_lane_u64(reinterpret_cast(x), 0); // extract - uint64_t high = vgetq_lane_u64(reinterpret_cast(x), 1); // extract + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract SizeType length = 0; bool escaped = false; if (low == 0) { if (high != 0) { - unsigned lz = (unsigned)__builtin_clzll(high);; + uint32_t lz = RAPIDJSON_CLZLL(high); length = 8 + (lz >> 3); escaped = true; } } else { - unsigned lz = (unsigned)__builtin_clzll(low);; + uint32_t lz = RAPIDJSON_CLZLL(low); length = lz >> 3; escaped = true; } @@ -1314,19 +1315,19 @@ private: x = vorrq_u8(x, vcltq_u8(s, s3)); x = vrev64q_u8(x); // Rev in 64 - uint64_t low = vgetq_lane_u64(reinterpret_cast(x), 0); // extract - uint64_t high = vgetq_lane_u64(reinterpret_cast(x), 1); // extract + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract SizeType length = 0; bool escaped = false; if (low == 0) { if (high != 0) { - unsigned lz = (unsigned)__builtin_clzll(high); + uint32_t lz = RAPIDJSON_CLZLL(high); length = 8 + (lz >> 3); escaped = true; } } else { - unsigned lz = (unsigned)__builtin_clzll(low); + uint32_t lz = RAPIDJSON_CLZLL(low); length = lz >> 3; escaped = true; } @@ -1370,17 +1371,17 @@ private: x = vorrq_u8(x, vcltq_u8(s, s3)); x = vrev64q_u8(x); // Rev in 64 - uint64_t low = vgetq_lane_u64(reinterpret_cast(x), 0); // extract - uint64_t high = vgetq_lane_u64(reinterpret_cast(x), 1); // extract + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract if (low == 0) { if (high != 0) { - int lz = __builtin_clzll(high); + uint32_t lz = RAPIDJSON_CLZLL(high); p += 8 + (lz >> 3); break; } } else { - int lz = __builtin_clzll(low); + uint32_t lz = RAPIDJSON_CLZLL(low); p += lz >> 3; break; } @@ -1403,7 +1404,7 @@ private: RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); } RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); } RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); } - RAPIDJSON_FORCEINLINE void Push(char) {} + RAPIDJSON_FORCEINLINE void Push(char) {} size_t Tell() { return is.Tell(); } size_t Length() { return 0; } diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h index 6f5b690..ce39e76 100644 --- a/include/rapidjson/writer.h +++ b/include/rapidjson/writer.h @@ -16,6 +16,7 @@ #define RAPIDJSON_WRITER_H_ #include "stream.h" +#include "internal/clzll.h" #include "internal/meta.h" #include "internal/stack.h" #include "internal/strfunc.h" @@ -226,7 +227,7 @@ public: return Key(str.data(), SizeType(str.size())); } #endif - + bool EndObject(SizeType memberCount = 0) { (void)memberCount; RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object @@ -668,19 +669,19 @@ inline bool Writer::ScanWriteUnescapedString(StringStream& is, siz x = vorrq_u8(x, vcltq_u8(s, s3)); x = vrev64q_u8(x); // Rev in 64 - uint64_t low = vgetq_lane_u64(reinterpret_cast(x), 0); // extract - uint64_t high = vgetq_lane_u64(reinterpret_cast(x), 1); // extract + uint64_t low = vgetq_lane_u64(vreinterpretq_u64_u8(x), 0); // extract + uint64_t high = vgetq_lane_u64(vreinterpretq_u64_u8(x), 1); // extract SizeType len = 0; bool escaped = false; if (low == 0) { if (high != 0) { - unsigned lz = (unsigned)__builtin_clzll(high); + uint32_t lz = RAPIDJSON_CLZLL(high); len = 8 + (lz >> 3); escaped = true; } } else { - unsigned lz = (unsigned)__builtin_clzll(low); + uint32_t lz = RAPIDJSON_CLZLL(low); len = lz >> 3; escaped = true; }