From 6978778884ea112fd07b4680c43a269c4cb636eb Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 9 Aug 2014 21:11:37 +0800 Subject: [PATCH 1/5] Change double parsing with fast-path conversion Accurate rounding in normal numerical ranges, also reduce lookup table size. --- include/rapidjson/internal/pow10.h | 25 +++++-------------------- include/rapidjson/reader.h | 18 +++++++++++++++--- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/include/rapidjson/internal/pow10.h b/include/rapidjson/internal/pow10.h index 0852539..d6b92f0 100644 --- a/include/rapidjson/internal/pow10.h +++ b/include/rapidjson/internal/pow10.h @@ -6,27 +6,12 @@ namespace internal { //! Computes integer powers of 10 in double (10.0^n). /*! This function uses lookup table for fast and accurate results. - \param n positive/negative exponent. Must <= 308. + \param n non-negative exponent. Must <= 308. \return 10.0^n */ inline double Pow10(int n) { - static const double e[] = { // 1e-308...1e308: 617 * 8 bytes = 4936 bytes - 1e-308,1e-307,1e-306,1e-305,1e-304,1e-303,1e-302,1e-301,1e-300, - 1e-299,1e-298,1e-297,1e-296,1e-295,1e-294,1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284,1e-283,1e-282,1e-281,1e-280, - 1e-279,1e-278,1e-277,1e-276,1e-275,1e-274,1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264,1e-263,1e-262,1e-261,1e-260, - 1e-259,1e-258,1e-257,1e-256,1e-255,1e-254,1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244,1e-243,1e-242,1e-241,1e-240, - 1e-239,1e-238,1e-237,1e-236,1e-235,1e-234,1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224,1e-223,1e-222,1e-221,1e-220, - 1e-219,1e-218,1e-217,1e-216,1e-215,1e-214,1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204,1e-203,1e-202,1e-201,1e-200, - 1e-199,1e-198,1e-197,1e-196,1e-195,1e-194,1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184,1e-183,1e-182,1e-181,1e-180, - 1e-179,1e-178,1e-177,1e-176,1e-175,1e-174,1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164,1e-163,1e-162,1e-161,1e-160, - 1e-159,1e-158,1e-157,1e-156,1e-155,1e-154,1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144,1e-143,1e-142,1e-141,1e-140, - 1e-139,1e-138,1e-137,1e-136,1e-135,1e-134,1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124,1e-123,1e-122,1e-121,1e-120, - 1e-119,1e-118,1e-117,1e-116,1e-115,1e-114,1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104,1e-103,1e-102,1e-101,1e-100, - 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, - 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, - 1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, - 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, - 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e+0, + static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes + 1e+0, 1e+1, 1e+2, 1e+3, 1e+4, 1e+5, 1e+6, 1e+7, 1e+8, 1e+9, 1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, @@ -44,8 +29,8 @@ inline double Pow10(int n) { 1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300, 1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308 }; - RAPIDJSON_ASSERT(n <= 308); - return n < -308 ? 0.0 : e[n + 308]; + RAPIDJSON_ASSERT(n >= 0 && n <= 308); + return e[n]; } } // namespace internal diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h index 8d4c1d1..647ecde 100644 --- a/include/rapidjson/reader.h +++ b/include/rapidjson/reader.h @@ -668,6 +668,18 @@ private: } } + inline double StrtodFastPath(double significand, int exp) { + // Fast path only works on limited range of values. + // But for simplicity and performance, currently only implement this. + // see http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/ + if (exp < -308) + return 0.0; + else if (exp >= 0) + return significand * internal::Pow10(exp); + else + return significand / internal::Pow10(-exp); + } + template void ParseNumber(InputStream& is, Handler& handler) { internal::StreamLocalCopy copy(is); @@ -813,11 +825,11 @@ private: int expSum = exp + expFrac; if (expSum < -308) { // Prevent expSum < -308, making Pow10(expSum) = 0 - d *= internal::Pow10(exp); - d *= internal::Pow10(expFrac); + d = StrtodFastPath(d, exp); + d = StrtodFastPath(d, expFrac); } else - d *= internal::Pow10(expSum); + d = StrtodFastPath(d, expSum); cont = handler.Double(minus ? -d : d); } From a7762a345336cdeb57cbfd03f48350c16b55b6f2 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 9 Aug 2014 21:12:58 +0800 Subject: [PATCH 2/5] Custom dtoa() impleemntation Modified from Milo's Grisu2 implementation. 99.9% cases return shortest decimal format. --- include/rapidjson/internal/dtoa.h | 414 ++++++++++++++++++++++++++++++ include/rapidjson/writer.h | 30 +-- test/unittest/writertest.cpp | 111 ++++---- 3 files changed, 487 insertions(+), 68 deletions(-) create mode 100644 include/rapidjson/internal/dtoa.h diff --git a/include/rapidjson/internal/dtoa.h b/include/rapidjson/internal/dtoa.h new file mode 100644 index 0000000..a2f8aae --- /dev/null +++ b/include/rapidjson/internal/dtoa.h @@ -0,0 +1,414 @@ +// Modified from https://github.com/miloyip/dtoa-benchmark/blob/master/src/milo/dtoa_milo.h +// API is changed to return the character passed the end of string, without writing '\0' + +#ifndef RAPIDJSON_DTOA_ +#define RAPIDJSON_DTOA_ + +#if defined(_MSC_VER) +#include +#if defined(_M_AMD64) +#pragma intrinsic(_BitScanReverse64) +#endif +#endif + +#include "itoa.h" // GetDigitsLut() + +namespace rapidjson { +namespace internal { + +struct DiyFp { + DiyFp() {} + + DiyFp(uint64_t f, int e) : f(f), e(e) {} + + DiyFp(double d) { + union { + double d; + uint64_t u64; + } u = { d }; + + int biased_e = (u.u64 & kDpExponentMask) >> kDpSignificandSize; + uint64_t significand = (u.u64 & kDpSignificandMask); + if (biased_e != 0) { + f = significand + kDpHiddenBit; + e = biased_e - kDpExponentBias; + } + else { + f = significand; + e = kDpMinExponent + 1; + } + } + + DiyFp operator-(const DiyFp& rhs) const { + return DiyFp(f - rhs.f, e); + } + + DiyFp operator*(const DiyFp& rhs) const { +#if defined(_MSC_VER) && defined(_M_AMD64) + uint64_t h; + uint64_t l = _umul128(f, rhs.f, &h); + if (l & (uint64_t(1) << 63)) // rounding + h++; + return DiyFp(h, e + rhs.e + 64); +#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__) + unsigned __int128 p = static_cast(f) * static_cast(rhs.f); + uint64_t h = p >> 64; + uint64_t l = static_cast(p); + if (l & (uint64_t(1) << 63)) // rounding + h++; + return DiyFp(h, e + rhs.e + 64); +#else + const uint64_t M32 = 0xFFFFFFFF; + const uint64_t a = f >> 32; + const uint64_t b = f & M32; + const uint64_t c = rhs.f >> 32; + const uint64_t d = rhs.f & M32; + const uint64_t ac = a * c; + const uint64_t bc = b * c; + const uint64_t ad = a * d; + const uint64_t bd = b * d; + uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32); + tmp += 1U << 31; /// mult_round + return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64); +#endif + } + + DiyFp Normalize() const { +#if defined(_MSC_VER) && defined(_M_AMD64) + unsigned long index; + _BitScanReverse64(&index, f); + return DiyFp(f << (63 - index), e - (63 - index)); +#elif defined(__GNUC__) + int s = __builtin_clzll(f) + 1; + return DiyFp(f << s, e - s); +#else + DiyFp res = *this; + while (!(res.f & kDpHiddenBit)) { + res.f <<= 1; + res.e--; + } + res.f <<= (kDiySignificandSize - kDpSignificandSize - 1); + res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 1); + return res; +#endif + } + + DiyFp NormalizeBoundary() const { +#if defined(_MSC_VER) && defined(_M_AMD64) + unsigned long index; + _BitScanReverse64(&index, f); + return DiyFp (f << (63 - index), e - (63 - index)); +#else + DiyFp res = *this; + while (!(res.f & (kDpHiddenBit << 1))) { + res.f <<= 1; + res.e--; + } + res.f <<= (kDiySignificandSize - kDpSignificandSize - 2); + res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2); + return res; +#endif + } + + void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const { + DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary(); + DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1); + mi.f <<= mi.e - pl.e; + mi.e = pl.e; + *plus = pl; + *minus = mi; + } + + static const int kDiySignificandSize = 64; + static const int kDpSignificandSize = 52; + static const int kDpExponentBias = 0x3FF + kDpSignificandSize; + static const int kDpMinExponent = -kDpExponentBias; + static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000); + static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF); + static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000); + + uint64_t f; + int e; +}; + +inline DiyFp GetCachedPower(int e, int* K) { + // 10^-348, 10^-340, ..., 10^340 + static const uint64_t kCachedPowers_F[] = { + RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76), + RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea), + RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df), + RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f), + RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c), + RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5), + RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d), + RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637), + RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7), + RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5), + RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b), + RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996), + RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6), + RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8), + RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053), + RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd), + RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94), + RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b), + RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac), + RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3), + RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb), + RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c), + RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000), + RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984), + RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70), + RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245), + RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8), + RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a), + RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea), + RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85), + RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2), + RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3), + RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25), + RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece), + RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5), + RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a), + RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c), + RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a), + RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129), + RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429), + RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d), + RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841), + RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9), + RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b) + }; + static const int16_t kCachedPowers_E[] = { + -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, + -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, + -688, -661, -635, -608, -582, -555, -529, -502, -475, -449, + -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, + -157, -130, -103, -77, -50, -24, 3, 30, 56, 83, + 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, + 375, 402, 428, 455, 481, 508, 534, 561, 588, 614, + 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, + 907, 933, 960, 986, 1013, 1039, 1066 + }; + + //int k = static_cast(ceil((-61 - e) * 0.30102999566398114)) + 374; + double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive + int k = static_cast(dk); + if (k != dk) + k++; + + unsigned index = static_cast((k >> 3) + 1); + *K = -(-348 + static_cast(index << 3)); // decimal exponent no need lookup table + + return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]); +} + +inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) { + while (rest < wp_w && delta - rest >= ten_kappa && + (rest + ten_kappa < wp_w || /// closer + wp_w - rest > rest + ten_kappa - wp_w)) { + buffer[len - 1]--; + rest += ten_kappa; + } +} + +inline unsigned CountDecimalDigit32(uint32_t n) { + static const uint32_t powers_of_10[] = { + 0, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000 + }; + +#ifdef _MSC_VER + unsigned long i = 0; + _BitScanReverse(&i, n | 1); + uint32_t t = (i + 1) * 1233 >> 12; +#elif __GNUC__ + uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12; +#endif + return t - (n < powers_of_10[t]) + 1; +} + +inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) { + static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 }; + const DiyFp one(uint64_t(1) << -Mp.e, Mp.e); + const DiyFp wp_w = Mp - W; + uint32_t p1 = static_cast(Mp.f >> -one.e); + uint64_t p2 = Mp.f & (one.f - 1); + int kappa = CountDecimalDigit32(p1); + *len = 0; + + while (kappa > 0) { + uint32_t d; + switch (kappa) { + case 10: d = p1 / 1000000000; p1 %= 1000000000; break; + case 9: d = p1 / 100000000; p1 %= 100000000; break; + case 8: d = p1 / 10000000; p1 %= 10000000; break; + case 7: d = p1 / 1000000; p1 %= 1000000; break; + case 6: d = p1 / 100000; p1 %= 100000; break; + case 5: d = p1 / 10000; p1 %= 10000; break; + case 4: d = p1 / 1000; p1 %= 1000; break; + case 3: d = p1 / 100; p1 %= 100; break; + case 2: d = p1 / 10; p1 %= 10; break; + case 1: d = p1; p1 = 0; break; + default: +#if defined(_MSC_VER) + __assume(0); +#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) + __builtin_unreachable(); +#else + d = 0; +#endif + } + if (d || *len) + buffer[(*len)++] = '0' + static_cast(d); + kappa--; + uint64_t tmp = (static_cast(p1) << -one.e) + p2; + if (tmp <= delta) { + *K += kappa; + GrisuRound(buffer, *len, delta, tmp, static_cast(kPow10[kappa]) << -one.e, wp_w.f); + return; + } + } + + // kappa = 0 + for (;;) { + p2 *= 10; + delta *= 10; + char d = static_cast(p2 >> -one.e); + if (d || *len) + buffer[(*len)++] = '0' + d; + p2 &= one.f - 1; + kappa--; + if (p2 < delta) { + *K += kappa; + GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * kPow10[-kappa]); + return; + } + } +} + +inline void Grisu2(double value, char* buffer, int* length, int* K) { + const DiyFp v(value); + DiyFp w_m, w_p; + v.NormalizedBoundaries(&w_m, &w_p); + + const DiyFp c_mk = GetCachedPower(w_p.e, K); + const DiyFp W = v.Normalize() * c_mk; + DiyFp Wp = w_p * c_mk; + DiyFp Wm = w_m * c_mk; + Wm.f++; + Wp.f--; + DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K); +} + +//inline const char* GetDigitsLut() { +// static const char cDigitsLut[200] = { +// '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', +// '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', +// '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', +// '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', +// '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', +// '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', +// '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', +// '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', +// '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9', +// '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9' +// }; +// return cDigitsLut; +//} + +inline char* WriteExponent(int K, char* buffer) { + if (K < 0) { + *buffer++ = '-'; + K = -K; + } + + if (K >= 100) { + *buffer++ = '0' + static_cast(K / 100); + K %= 100; + const char* d = GetDigitsLut() + K * 2; + *buffer++ = d[0]; + *buffer++ = d[1]; + } + else if (K >= 10) { + const char* d = GetDigitsLut() + K * 2; + *buffer++ = d[0]; + *buffer++ = d[1]; + } + else + *buffer++ = '0' + static_cast(K); + + return buffer; +} + +inline char* Prettify(char* buffer, int length, int k) { + const int kk = length + k; // 10^(kk-1) <= v < 10^kk + + if (length <= kk && kk <= 21) { + // 1234e7 -> 12340000000 + for (int i = length; i < kk; i++) + buffer[i] = '0'; + buffer[kk] = '.'; + buffer[kk + 1] = '0'; + return &buffer[kk + 2]; + } + else if (0 < kk && kk <= 21) { + // 1234e-2 -> 12.34 + memmove(&buffer[kk + 1], &buffer[kk], length - kk); + buffer[kk] = '.'; + return &buffer[length + 1]; + } + else if (-6 < kk && kk <= 0) { + // 1234e-6 -> 0.001234 + const int offset = 2 - kk; + memmove(&buffer[offset], &buffer[0], length); + buffer[0] = '0'; + buffer[1] = '.'; + for (int i = 2; i < offset; i++) + buffer[i] = '0'; + return &buffer[length + offset]; + } + else if (length == 1) { + // 1e30 + buffer[1] = 'e'; + return WriteExponent(kk - 1, &buffer[2]); + } + else { + // 1234e30 -> 1.234e33 + memmove(&buffer[2], &buffer[1], length - 1); + buffer[1] = '.'; + buffer[length + 1] = 'e'; + return WriteExponent(kk - 1, &buffer[0 + length + 2]); + } +} + +inline char* dtoa(double value, char* buffer) { + if (value == 0) { + buffer[0] = '0'; + buffer[1] = '.'; + buffer[2] = '0'; + return &buffer[3]; + } + else { + if (value < 0) { + *buffer++ = '-'; + value = -value; + } + int length, K; + Grisu2(value, buffer, &length, &K); + return Prettify(buffer, length, K); + } +} + +} // namespace internal +} // namespace rapidjson + +#endif // RAPIDJSON_DTOA_ diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h index d72d274..af9fea3 100644 --- a/include/rapidjson/writer.h +++ b/include/rapidjson/writer.h @@ -4,9 +4,9 @@ #include "rapidjson.h" #include "internal/stack.h" #include "internal/strfunc.h" +#include "internal/dtoa.h" #include "internal/itoa.h" #include "stringbuffer.h" -#include // snprintf() or _sprintf_s() #include // placement new #ifdef _MSC_VER @@ -239,25 +239,17 @@ protected: bool WriteUint64(uint64_t u64) { char buffer[20]; - const char* end = internal::u64toa(u64, buffer); - for (const char* p = buffer; p != end; ++p) + char* end = internal::u64toa(u64, buffer); + for (char* p = buffer; p != end; ++p) os_->Put(*p); return true; } -#ifdef _MSC_VER -#define RAPIDJSON_SNPRINTF sprintf_s -#else -#define RAPIDJSON_SNPRINTF snprintf -#endif - - //! \todo Optimization with custom double-to-string converter. bool WriteDouble(double d) { - char buffer[100]; - int ret = RAPIDJSON_SNPRINTF(buffer, sizeof(buffer), "%.*g", doublePrecision_, d); - RAPIDJSON_ASSERT(ret >= 1); - for (int i = 0; i < ret; i++) - os_->Put(buffer[i]); + char buffer[25]; + char* end = internal::dtoa(d, buffer); + for (char* p = buffer; p != end; ++p) + os_->Put(*p); return true; } #undef RAPIDJSON_SNPRINTF @@ -403,6 +395,14 @@ inline bool Writer::WriteUint64(uint64_t u) { return true; } +template<> +inline bool Writer::WriteDouble(double d) { + char *buffer = os_->Push(25); + char* end = internal::dtoa(d, buffer); + os_->Pop(25 - (end - buffer)); + return true; +} + } // namespace rapidjson #ifdef _MSC_VER diff --git a/test/unittest/writertest.cpp b/test/unittest/writertest.cpp index 48d7c4e..c834227 100644 --- a/test/unittest/writertest.cpp +++ b/test/unittest/writertest.cpp @@ -60,62 +60,67 @@ TEST(Writer, String) { TEST_ROUNDTRIP("[\"\\\"\\\\/\\b\\f\\n\\r\\t\"]"); } -TEST(Writer,DoublePrecision) { - const char json[] = "[1.2345,1.2345678,0.123456789012,1234567.8]"; +TEST(Writer, Double) { + TEST_ROUNDTRIP("[1.2345,1.2345678,0.123456789012,1234567.8]"); - StringBuffer buffer; - Writer writer(buffer); - - const int kDefaultDoublePrecision = 6; - // handling the double precision - EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); - writer.SetDoublePrecision(17); - EXPECT_EQ(writer.GetDoublePrecision(), 17); - writer.SetDoublePrecision(-1); // negative equivalent to reset - EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); - writer.SetDoublePrecision(1); - writer.SetDoublePrecision(); // reset again - EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); - - { // write with explicitly increased precision - StringStream s(json); - Reader reader; - reader.Parse<0>(s, writer.SetDoublePrecision(12)); - EXPECT_EQ(writer.GetDoublePrecision(), 12); - EXPECT_STREQ(json, buffer.GetString()); - } - { // explicit individual double precisions - buffer.Clear(); - writer.Reset(buffer); - writer.SetDoublePrecision(2); - writer.StartArray(); - writer.Double(1.2345, 5); - writer.Double(1.2345678, 9); - writer.Double(0.123456789012, 12); - writer.Double(1234567.8, 8); - writer.EndArray(); - - EXPECT_EQ(writer.GetDoublePrecision(), 2); - EXPECT_STREQ(json, buffer.GetString()); - } - { // write with default precision (output with precision loss) - Document d; - d.Parse<0>(json); - buffer.Clear(); - writer.Reset(buffer); - d.Accept(writer.SetDoublePrecision()); - - // parsed again to avoid platform-dependent floating point outputs - // (e.g. width of exponents) - d.Parse<0>(buffer.GetString()); - EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); - EXPECT_DOUBLE_EQ(d[0u].GetDouble(), 1.2345); - EXPECT_DOUBLE_EQ(d[1u].GetDouble(), 1.23457); - EXPECT_DOUBLE_EQ(d[2u].GetDouble(), 0.123457); - EXPECT_DOUBLE_EQ(d[3u].GetDouble(), 1234570); - } } +//TEST(Writer,DoublePrecision) { +// const char json[] = "[1.2345,1.2345678,0.123456789012,1234567.8]"; +// +// StringBuffer buffer; +// Writer writer(buffer); +// +// const int kDefaultDoublePrecision = 6; +// // handling the double precision +// EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); +// writer.SetDoublePrecision(17); +// EXPECT_EQ(writer.GetDoublePrecision(), 17); +// writer.SetDoublePrecision(-1); // negative equivalent to reset +// EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); +// writer.SetDoublePrecision(1); +// writer.SetDoublePrecision(); // reset again +// EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); +// +// { // write with explicitly increased precision +// StringStream s(json); +// Reader reader; +// reader.Parse<0>(s, writer.SetDoublePrecision(12)); +// EXPECT_EQ(writer.GetDoublePrecision(), 12); +// EXPECT_STREQ(json, buffer.GetString()); +// } +// { // explicit individual double precisions +// buffer.Clear(); +// writer.Reset(buffer); +// writer.SetDoublePrecision(2); +// writer.StartArray(); +// writer.Double(1.2345, 5); +// writer.Double(1.2345678, 9); +// writer.Double(0.123456789012, 12); +// writer.Double(1234567.8, 8); +// writer.EndArray(); +// +// EXPECT_EQ(writer.GetDoublePrecision(), 2); +// EXPECT_STREQ(json, buffer.GetString()); +// } +// { // write with default precision (output with precision loss) +// Document d; +// d.Parse<0>(json); +// buffer.Clear(); +// writer.Reset(buffer); +// d.Accept(writer.SetDoublePrecision()); +// +// // parsed again to avoid platform-dependent floating point outputs +// // (e.g. width of exponents) +// d.Parse<0>(buffer.GetString()); +// EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); +// EXPECT_DOUBLE_EQ(d[0u].GetDouble(), 1.2345); +// EXPECT_DOUBLE_EQ(d[1u].GetDouble(), 1.23457); +// EXPECT_DOUBLE_EQ(d[2u].GetDouble(), 0.123457); +// EXPECT_DOUBLE_EQ(d[3u].GetDouble(), 1234570); +// } +//} + TEST(Writer, Transcode) { const char json[] = "{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}"; From 0d915644a4cf03ef14ecffe54e6b8cb506fd40e9 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 9 Aug 2014 21:27:32 +0800 Subject: [PATCH 3/5] Fixed gcc effc++ warning in dtoa.h --- include/rapidjson/internal/dtoa.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/rapidjson/internal/dtoa.h b/include/rapidjson/internal/dtoa.h index a2f8aae..044e2fb 100644 --- a/include/rapidjson/internal/dtoa.h +++ b/include/rapidjson/internal/dtoa.h @@ -16,6 +16,11 @@ namespace rapidjson { namespace internal { +#ifdef __GNUC__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(effc++) +#endif + struct DiyFp { DiyFp() {} @@ -408,6 +413,10 @@ inline char* dtoa(double value, char* buffer) { } } +#ifdef __GNUC__ +RAPIDJSON_DIAG_POP +#endif + } // namespace internal } // namespace rapidjson From 1900b7bacea0d3329cd3ecc7aba4262b4bea8307 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Sat, 9 Aug 2014 21:37:02 +0800 Subject: [PATCH 4/5] Remove double precision settings API in Writer --- doc/sax.md | 16 +-------- include/rapidjson/prettywriter.h | 12 ------- include/rapidjson/writer.h | 48 ++------------------------- test/unittest/writertest.cpp | 56 -------------------------------- 4 files changed, 3 insertions(+), 129 deletions(-) diff --git a/doc/sax.md b/doc/sax.md index 9ad2e4b..bbdc550 100644 --- a/doc/sax.md +++ b/doc/sax.md @@ -225,7 +225,7 @@ You may doubt that, why not just using `sprintf()` or `std::stringstream` to bui There are various reasons: 1. `Writer` must output a well-formed JSON. If there is incorrect event sequence (e.g. `Int()` just after `StartObject()`), it generates assertion fail in debug mode. 2. `Writer::String()` can handle string escaping (e.g. converting code point `U+000A` to `\n`) and Unicode transcoding. -3. `Writer` handles number output consistently. For example, user can set precision for `Double()`. +3. `Writer` handles number output consistently. 4. `Writer` implements the event handler concept. It can be used to handle events from `Reader`, `Document` or other event publisher. 5. `Writer` can be optimized for different platforms. @@ -258,20 +258,6 @@ The last one, `Allocator` is the type of allocator, which is used for allocating Besides, the constructor of `Writer` has a `levelDepth` parameter. This parameter affects the initial memory allocated for storing information per hierarchy level. -## Precision (#WriterPrecision) - -When using `Double()`, the precision of output can be specified, for example: - -~~~~~~~~~~cpp -writer.SetDoublePrecision(4); -writer.StartArary(); -writer.Double(3.14159265359); -writer.EndArray(); -~~~~~~~~~~ -~~~~~~~~~~ -[3.1416] -~~~~~~~~~~ - ## PrettyWriter {#PrettyWriter} While the output of `Writer` is the most condensed JSON without white-spaces, suitable for network transfer or storage, it is not easily readable by human. diff --git a/include/rapidjson/prettywriter.h b/include/rapidjson/prettywriter.h index 02b9420..d351468 100644 --- a/include/rapidjson/prettywriter.h +++ b/include/rapidjson/prettywriter.h @@ -31,9 +31,6 @@ public: PrettyWriter(OutputStream& os, Allocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : Base(os, allocator, levelDepth), indentChar_(' '), indentCharCount_(4) {} - //! Overridden for fluent API, see \ref Writer::SetDoublePrecision() - PrettyWriter& SetDoublePrecision(int p) { Base::SetDoublePrecision(p); return *this; } - //! Set custom indentation. /*! \param indentChar Character for indentation. Must be whitespace character (' ', '\\t', '\\n', '\\r'). \param indentCharCount Number of indent characters for each indentation level. @@ -119,15 +116,6 @@ public: //! Simpler but slower overload. bool String(const Ch* str) { return String(str, internal::StrLen(str)); } - //! Overridden for fluent API, see \ref Writer::Double() - bool Double(double d, int precision) { - int oldPrecision = Base::GetDoublePrecision(); - SetDoublePrecision(precision); - bool ret = Double(d); - SetDoublePrecision(oldPrecision); - return ret; - } - //@} protected: void PrettyPrefix(Type type) { diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h index af9fea3..9d25b27 100644 --- a/include/rapidjson/writer.h +++ b/include/rapidjson/writer.h @@ -43,12 +43,10 @@ public: \param levelDepth Initial capacity of stack. */ Writer(OutputStream& os, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : - os_(&os), level_stack_(allocator, levelDepth * sizeof(Level)), - doublePrecision_(kDefaultDoublePrecision), hasRoot_(false) {} + os_(&os), level_stack_(allocator, levelDepth * sizeof(Level)), hasRoot_(false) {} Writer(Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : - os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), - doublePrecision_(kDefaultDoublePrecision), hasRoot_(false) {} + os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), hasRoot_(false) {} //! Reset the writer with a new stream. /*! @@ -70,7 +68,6 @@ public: */ void Reset(OutputStream& os) { os_ = &os; - doublePrecision_ = kDefaultDoublePrecision; hasRoot_ = false; level_stack_.Clear(); } @@ -83,21 +80,6 @@ public: return hasRoot_ && level_stack_.Empty(); } - //! Set the number of significant digits for \c double values - /*! When writing a \c double value to the \c OutputStream, the number - of significant digits is limited to 6 by default. - \param p maximum number of significant digits (default: 6) - \return The Writer itself for fluent API. - */ - Writer& SetDoublePrecision(int p = kDefaultDoublePrecision) { - if (p < 0) p = kDefaultDoublePrecision; // negative precision is ignored - doublePrecision_ = p; - return *this; - } - - //! \see SetDoublePrecision() - int GetDoublePrecision() const { return doublePrecision_; } - /*!@name Implementation of Handler \see Handler */ @@ -112,12 +94,6 @@ public: //! Writes the given \c double value to the stream /*! - The number of significant digits (the precision) to be written - can be set by \ref SetDoublePrecision() for the Writer: - \code - Writer<...> writer(...); - writer.SetDoublePrecision(12).Double(M_PI); - \endcode \param d The value to be written. \return Whether it is succeed. */ @@ -167,23 +143,6 @@ public: /*! @name Convenience extensions */ //@{ - //! Writes the given \c double value to the stream (explicit precision) - /*! - The currently set double precision is ignored in favor of the explicitly - given precision for this value. - \see Double(), SetDoublePrecision(), GetDoublePrecision() - \param d The value to be written - \param precision The number of significant digits for this value - \return Whether it is succeeded. - */ - bool Double(double d, int precision) { - int oldPrecision = GetDoublePrecision(); - SetDoublePrecision(precision); - bool ret = Double(d); - SetDoublePrecision(oldPrecision); - return ret; - } - //! Simpler but slower overload. bool String(const Ch* str) { return String(str, internal::StrLen(str)); } @@ -350,11 +309,8 @@ protected: OutputStream* os_; internal::Stack level_stack_; - int doublePrecision_; bool hasRoot_; - static const int kDefaultDoublePrecision = 6; - private: // Prohibit copy constructor & assignment operator. Writer(const Writer&); diff --git a/test/unittest/writertest.cpp b/test/unittest/writertest.cpp index c834227..59543b0 100644 --- a/test/unittest/writertest.cpp +++ b/test/unittest/writertest.cpp @@ -65,62 +65,6 @@ TEST(Writer, Double) { } -//TEST(Writer,DoublePrecision) { -// const char json[] = "[1.2345,1.2345678,0.123456789012,1234567.8]"; -// -// StringBuffer buffer; -// Writer writer(buffer); -// -// const int kDefaultDoublePrecision = 6; -// // handling the double precision -// EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); -// writer.SetDoublePrecision(17); -// EXPECT_EQ(writer.GetDoublePrecision(), 17); -// writer.SetDoublePrecision(-1); // negative equivalent to reset -// EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); -// writer.SetDoublePrecision(1); -// writer.SetDoublePrecision(); // reset again -// EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); -// -// { // write with explicitly increased precision -// StringStream s(json); -// Reader reader; -// reader.Parse<0>(s, writer.SetDoublePrecision(12)); -// EXPECT_EQ(writer.GetDoublePrecision(), 12); -// EXPECT_STREQ(json, buffer.GetString()); -// } -// { // explicit individual double precisions -// buffer.Clear(); -// writer.Reset(buffer); -// writer.SetDoublePrecision(2); -// writer.StartArray(); -// writer.Double(1.2345, 5); -// writer.Double(1.2345678, 9); -// writer.Double(0.123456789012, 12); -// writer.Double(1234567.8, 8); -// writer.EndArray(); -// -// EXPECT_EQ(writer.GetDoublePrecision(), 2); -// EXPECT_STREQ(json, buffer.GetString()); -// } -// { // write with default precision (output with precision loss) -// Document d; -// d.Parse<0>(json); -// buffer.Clear(); -// writer.Reset(buffer); -// d.Accept(writer.SetDoublePrecision()); -// -// // parsed again to avoid platform-dependent floating point outputs -// // (e.g. width of exponents) -// d.Parse<0>(buffer.GetString()); -// EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision); -// EXPECT_DOUBLE_EQ(d[0u].GetDouble(), 1.2345); -// EXPECT_DOUBLE_EQ(d[1u].GetDouble(), 1.23457); -// EXPECT_DOUBLE_EQ(d[2u].GetDouble(), 0.123457); -// EXPECT_DOUBLE_EQ(d[3u].GetDouble(), 1234570); -// } -//} - TEST(Writer, Transcode) { const char json[] = "{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}"; From c54915297b530639ae0eac97bff34df4693c7e15 Mon Sep 17 00:00:00 2001 From: Milo Yip Date: Mon, 11 Aug 2014 00:30:31 +0800 Subject: [PATCH 5/5] Change CountDecimalDigit32() to simple implementation It is simple and pure C++. And it is found in performance test that it is even faster than the original version, due to distribution of n. But the performance gain is not obvious in RapidJSON. --- include/rapidjson/internal/dtoa.h | 32 +++++++++++-------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/include/rapidjson/internal/dtoa.h b/include/rapidjson/internal/dtoa.h index 044e2fb..44f5c11 100644 --- a/include/rapidjson/internal/dtoa.h +++ b/include/rapidjson/internal/dtoa.h @@ -218,27 +218,17 @@ inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uin } inline unsigned CountDecimalDigit32(uint32_t n) { - static const uint32_t powers_of_10[] = { - 0, - 10, - 100, - 1000, - 10000, - 100000, - 1000000, - 10000000, - 100000000, - 1000000000 - }; - -#ifdef _MSC_VER - unsigned long i = 0; - _BitScanReverse(&i, n | 1); - uint32_t t = (i + 1) * 1233 >> 12; -#elif __GNUC__ - uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12; -#endif - return t - (n < powers_of_10[t]) + 1; + // Simple pure C++ implementation was faster than __builtin_clz version in this situation. + if (n < 10) return 1; + if (n < 100) return 2; + if (n < 1000) return 3; + if (n < 10000) return 4; + if (n < 100000) return 5; + if (n < 1000000) return 6; + if (n < 10000000) return 7; + if (n < 100000000) return 8; + if (n < 1000000000) return 9; + return 10; } inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {