Make custom strtod work for denormal numbers and some boundary cases [ci skip]

2014-09-13 23:24:40 +08:00 · 2014-09-13 23:24:40 +08:00 · 98dd0a0a64
commit 98dd0a0a64
parent 4bd240abee
2 changed files with 68 additions and 82 deletions
--- a/include/rapidjson/internal/strtod.h
+++ b/include/rapidjson/internal/strtod.h
@ -29,9 +29,13 @@ namespace internal {
 class Double {
 public:
    Double() {}
    Double(double d) : d(d) {}
    Double(uint64_t u) : u(u) {}
    double Value() const { return d; }
    uint64_t Uint64Value() const { return u; }
    double NextDouble() const {
        RAPIDJSON_ASSERT(!Sign());
        return Double(u + 1).Value();
@ -47,12 +51,20 @@ public:
    bool Sign() const { return (u & kSignMask) != 0; }
    uint64_t Significand() const { return u & kSignificandMask; }
-    int Exponent() const { return (u & kExponentMask) - kExponentBias; }
+    int Exponent() const { return ((u & kExponentMask) >> kSignificandSize) - kExponentBias; }
-    double Value() const { return d;}
+
    bool IsNan() const { return (u & kExponentMask) == kExponentMask && Significand() != 0; }
    bool IsInf() const { return (u & kExponentMask) == kExponentMask && Significand() == 0; }
    bool IsNormal() const { return (u & kExponentMask) != 0 || Significand() == 0; }
    uint64_t IntegerSignificand() const { return IsNormal() ? Significand() | kHiddenBit : Significand(); }
    int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; }
    uint64_t ToBias() const { return (u & kSignMask) ? ~u + 1 : u | kSignMask; }
 private:
    static const int kSignificandSize = 52;
    static const int kExponentBias = 0x3FF;
    static const int kDenormalExponent = 1 - kExponentBias;
    static const uint64_t kSignMask = RAPIDJSON_UINT64_C2(0x80000000, 0x00000000);
    static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
    static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
@ -328,19 +340,10 @@ inline double NormalPrecision(double d, int p) {
 }
 inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp, bool* adjustToNegative) {
-    static const int kSignificandSize = 52;
+    const Double db(b);
-    static const int kExponentBias = 0x3FF;
+    const uint64_t bInt = db.IntegerSignificand();
-    static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
+    const int bExp = db.IntegerExponent();
    static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
    static const uint64_t kHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
    union {
        double d;
        uint64_t u;
    }u;
    u.d = b;
    const uint64_t bInt = (u.u & kSignificandMask) | kHiddenBit;
    const int bExp = ((u.u & kExponentMask) >> kSignificandSize) - kExponentBias - kSignificandSize;
    const int hExp = bExp - 1;
    int dS_Exp2 = 0;
@ -396,7 +399,19 @@ inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp, bool* adj
    BigInteger delta(0);
    *adjustToNegative = dS.Difference(bS, &delta);
-    return delta.Compare(hS);
+    int cmp = delta.Compare(hS);
    // If delta is within 1/2 ULP, check for special case when significand is power of two.
    // In this case, need to compare with 1/2h in the lower bound.
    if (cmp < 0 && *adjustToNegative && db.IsNormal() && (bInt & (bInt - 1)) == 0) {
        delta <<= 1;
        if (delta.Compare(hS) <= 0)
            return -1;
        else
            return 1;
    }
    return cmp;
 }
 inline double FullPrecision(double d, int dExp, const char* decimals, size_t length) {
@ -413,17 +428,16 @@ inline double FullPrecision(double d, int dExp, const char* decimals, size_t len
        }
    }
-    if (p >= -22 && d <= 9007199254740991.0) // 2^53 - 1
+    if (p >= -22 && p <= 22 && d <= 9007199254740991.0) // 2^53 - 1
        return StrtodFastPath(d, p);
    if (p + int(length) < -324)
        return 0.0;
    //printf("s=%s p=%d\n", decimals, p);
    const BigInteger dInt(decimals, length);
    Double approx = NormalPrecision(d, p);
-    for (;;) {
+    bool lastAdjustToNegative;
-        //printf("approx=%.17g\n", approx.Value());
+    for (int i = 0; i < 10; i++) {
        bool adjustToNegative;
        int cmp = CheckWithinHalfULP(approx.Value(), dInt, dExp, &adjustToNegative);
        if (cmp < 0)
@ -436,13 +450,21 @@ inline double FullPrecision(double d, int dExp, const char* decimals, size_t len
                return approx.Value();
        }
        else {
            // If oscillate between negative/postive, terminate
            if (i > 0 && adjustToNegative != lastAdjustToNegative)
                break;
            // adjustment
            if (adjustToNegative)
                approx = approx.PreviousDouble();
            else
                approx = approx.NextDouble();
        }
        lastAdjustToNegative = adjustToNegative;
    }
    // This should not happen, but in case there is really a bug, break the infinite-loop
    return approx.Value();
 }
 } // namespace internal
--- a/test/unittest/readertest.cpp
+++ b/test/unittest/readertest.cpp
@ -31,36 +31,6 @@ RAPIDJSON_DIAG_PUSH
 RAPIDJSON_DIAG_OFF(effc++)
 #endif
 // Implement functions that may not be provided pre-C++11
 bool IsNan(double x) {
 	union {
 		double x;
 		uint64_t u;
 	}u = { x };
 	// E == 0x7FF && M != 0
 	return (u.u & RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000)) == RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000) && 
 		(u.u & RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF)) != 0;
 }
 bool IsInf(double x) {
 	union {
 		double x;
 		uint64_t u;
 	}u = { x };
 	// E = 0x7FF and M == 0
 	return (u.u & RAPIDJSON_UINT64_C2(0x7FFFFFFF, 0xFFFFFFFF)) == RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
 }
 bool IsNormal(double x) {
 	union {
 		double x;
 		uint64_t u;
 	}u = { x };
 	// E != 0 || M == 0
 	return (u.u & RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000)) != 0 || 
 		(u.u & RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF)) == 0;
 }
 template<bool expect>
 struct ParseBoolHandler : BaseReaderHandler<UTF8<>, ParseBoolHandler<expect> > {
    ParseBoolHandler() : step_(0) {}
@ -242,16 +212,22 @@ static void TestParseDouble() {
    TEST_DOUBLE(fullPrecision, "1.234E+10", 1.234E+10);
    TEST_DOUBLE(fullPrecision, "1.234E-10", 1.234E-10);
    TEST_DOUBLE(fullPrecision, "1.79769e+308", 1.79769e+308);
-    //TEST_DOUBLE(fullPrecision, "2.22507e-308", 2.22507e-308);
+    TEST_DOUBLE(fullPrecision, "2.22507e-308", 2.22507e-308);
    TEST_DOUBLE(fullPrecision, "-1.79769e+308", -1.79769e+308);
-    //TEST_DOUBLE(fullPrecision, "-2.22507e-308", -2.22507e-308);
+    TEST_DOUBLE(fullPrecision, "-2.22507e-308", -2.22507e-308);
-    //TEST_DOUBLE(fullPrecision, "4.9406564584124654e-324", 4.9406564584124654e-324); // minimum denormal
+    TEST_DOUBLE(fullPrecision, "4.9406564584124654e-324", 4.9406564584124654e-324); // minimum denormal
    TEST_DOUBLE(fullPrecision, "2.2250738585072009e-308", 2.2250738585072009e-308); // Max subnormal double
    TEST_DOUBLE(fullPrecision, "2.2250738585072014e-308", 2.2250738585072014e-308); // Min normal positive double
    TEST_DOUBLE(fullPrecision, "1.7976931348623157e+308", 1.7976931348623157e+308); // Max double
    TEST_DOUBLE(fullPrecision, "1e-10000", 0.0);                                   // must underflow
    TEST_DOUBLE(fullPrecision, "18446744073709551616", 18446744073709551616.0);    // 2^64 (max of uint64_t + 1, force to use double)
    TEST_DOUBLE(fullPrecision, "-9223372036854775809", -9223372036854775809.0);    // -2^63 - 1(min of int64_t + 1, force to use double)
    TEST_DOUBLE(fullPrecision, "0.9868011474609375", 0.9868011474609375);          // https://github.com/miloyip/rapidjson/issues/120
    TEST_DOUBLE(fullPrecision, "123e34", 123e34);                                  // Fast Path Cases In Disguise
    TEST_DOUBLE(fullPrecision, "45913141877270640000.0", 45913141877270640000.0);
    TEST_DOUBLE(fullPrecision, "2.2250738585072011e-308", 2.2250738585072011e-308); // http://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/
    TEST_DOUBLE(fullPrecision, "2.2250738585072012e-308", 2.2250738585072012e-308); // http://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
    TEST_DOUBLE(fullPrecision, "4503599627370495.9999999999", 4503599627370495.9999999999); // slightly smaller than 2^52 for power of two case
    {
        char n1e308[310];   // '1' followed by 308 '0'
@ -263,24 +239,23 @@ static void TestParseDouble() {
    }
 #if 1
    static const unsigned count = 1000000;
    // Random test for double
    {
        union {
            double d;
            uint64_t u;
        }u;
        Random r;
-        for (unsigned i = 0; i < 100000; i++) {
+        for (unsigned i = 0; i < count; i++) {
            internal::Double d;
            do {
                // Need to call r() in two statements for cross-platform coherent sequence.
-                u.u = uint64_t(r()) << 32;
+                uint64_t u = uint64_t(r()) << 32;
-                u.u |= uint64_t(r());
+                u |= uint64_t(r());
-            } while (IsNan(u.d) || IsInf(u.d) || !IsNormal(u.d));
+                d = internal::Double(u);
            } while (d.IsNan() || d.IsInf()/* || !d.IsNormal()*/);  // Also work for subnormal now
            char buffer[32];
-            *internal::dtoa(u.d, buffer) = '\0';
+            *internal::dtoa(d.Value(), buffer) = '\0';
-            TEST_DOUBLE(fullPrecision, buffer, u.d);
+            TEST_DOUBLE(fullPrecision, buffer, d.Value());
        }
    }
 #endif
@ -298,32 +273,21 @@ TEST(Reader, ParseNumber_FullPrecisionDouble) {
 TEST(Reader, ParseNumber_NormalPrecisionError) {
    static unsigned count = 1000000;
    static const uint64_t kSignMask = RAPIDJSON_UINT64_C2(0x80000000, 0);
    union {
        double d;
        uint64_t u;
        uint64_t ToBias() const {
            if (u & kSignMask)
                return ~u + 1;
            else
                return u | kSignMask;
        }
    }u, a;
    Random r;
    double ulpSum = 0.0;
    double ulpMax = 0.0;
    for (unsigned i = 0; i < count; i++) {
        internal::Double e, a;
        do {
            // Need to call r() in two statements for cross-platform coherent sequence.
-            u.u = uint64_t(r()) << 32;
+            uint64_t u = uint64_t(r()) << 32;
-            u.u |= uint64_t(r());
+            u |= uint64_t(r());
-        } while (IsNan(u.d) || IsInf(u.d) || !IsNormal(u.d));
+            e = u;
        } while (e.IsNan() || e.IsInf() || !e.IsNormal());
        char buffer[32];
-        *internal::dtoa(u.d, buffer) = '\0';
+        *internal::dtoa(e.Value(), buffer) = '\0';
        StringStream s(buffer);
        ParseDoubleHandler h;
@ -331,8 +295,8 @@ TEST(Reader, ParseNumber_NormalPrecisionError) {
        ASSERT_EQ(kParseErrorNone, reader.Parse(s, h).Code());
        EXPECT_EQ(1u, h.step_);
-        a.d = h.actual_;
+        a = h.actual_;
-        uint64_t bias1 = u.ToBias();
+        uint64_t bias1 = e.ToBias();
        uint64_t bias2 = a.ToBias();
        double ulp = bias1 >= bias2 ? bias1 - bias2 : bias2 - bias1;
        ulpMax = std::max(ulpMax, ulp);