From 6978778884ea112fd07b4680c43a269c4cb636eb Mon Sep 17 00:00:00 2001
From: Milo Yip <miloyip@gmail.com>
Date: Sat, 9 Aug 2014 21:11:37 +0800
Subject: [PATCH 1/5] Change double parsing with fast-path conversion

Accurate rounding in normal numerical ranges, also reduce lookup table
size.
---
 include/rapidjson/internal/pow10.h | 25 +++++--------------------
 include/rapidjson/reader.h         | 18 +++++++++++++++---
 2 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/include/rapidjson/internal/pow10.h b/include/rapidjson/internal/pow10.h
index 0852539..d6b92f0 100644
--- a/include/rapidjson/internal/pow10.h
+++ b/include/rapidjson/internal/pow10.h
@@ -6,27 +6,12 @@ namespace internal {
 
 //! Computes integer powers of 10 in double (10.0^n).
 /*! This function uses lookup table for fast and accurate results.
-	\param n positive/negative exponent. Must <= 308.
+	\param n non-negative exponent. Must <= 308.
 	\return 10.0^n
 */
 inline double Pow10(int n) {
-	static const double e[] = { // 1e-308...1e308: 617 * 8 bytes = 4936 bytes
-		1e-308,1e-307,1e-306,1e-305,1e-304,1e-303,1e-302,1e-301,1e-300,
-		1e-299,1e-298,1e-297,1e-296,1e-295,1e-294,1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284,1e-283,1e-282,1e-281,1e-280,
-		1e-279,1e-278,1e-277,1e-276,1e-275,1e-274,1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264,1e-263,1e-262,1e-261,1e-260,
-		1e-259,1e-258,1e-257,1e-256,1e-255,1e-254,1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244,1e-243,1e-242,1e-241,1e-240,
-		1e-239,1e-238,1e-237,1e-236,1e-235,1e-234,1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224,1e-223,1e-222,1e-221,1e-220,
-		1e-219,1e-218,1e-217,1e-216,1e-215,1e-214,1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204,1e-203,1e-202,1e-201,1e-200,
-		1e-199,1e-198,1e-197,1e-196,1e-195,1e-194,1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184,1e-183,1e-182,1e-181,1e-180,
-		1e-179,1e-178,1e-177,1e-176,1e-175,1e-174,1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164,1e-163,1e-162,1e-161,1e-160,
-		1e-159,1e-158,1e-157,1e-156,1e-155,1e-154,1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144,1e-143,1e-142,1e-141,1e-140,
-		1e-139,1e-138,1e-137,1e-136,1e-135,1e-134,1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124,1e-123,1e-122,1e-121,1e-120,
-		1e-119,1e-118,1e-117,1e-116,1e-115,1e-114,1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104,1e-103,1e-102,1e-101,1e-100,
-		1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, 
-		1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 
-		1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 
-		1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 
-		1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,  1e-8,  1e-7,  1e-6,  1e-5,  1e-4,  1e-3,  1e-2,  1e-1,  1e+0,  
+	static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes
+		1e+0,  
 		1e+1,  1e+2,  1e+3,  1e+4,  1e+5,  1e+6,  1e+7,  1e+8,  1e+9,  1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 
 		1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40,
 		1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60,
@@ -44,8 +29,8 @@ inline double Pow10(int n) {
 		1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300,
 		1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308
 	};
-	RAPIDJSON_ASSERT(n <= 308);
-	return n < -308 ? 0.0 : e[n + 308];
+	RAPIDJSON_ASSERT(n >= 0 && n <= 308);
+	return e[n];
 }
 
 } // namespace internal
diff --git a/include/rapidjson/reader.h b/include/rapidjson/reader.h
index 8d4c1d1..647ecde 100644
--- a/include/rapidjson/reader.h
+++ b/include/rapidjson/reader.h
@@ -668,6 +668,18 @@ private:
 		}
 	}
 
+	inline double StrtodFastPath(double significand, int exp) {
+		// Fast path only works on limited range of values.
+		// But for simplicity and performance, currently only implement this.
+		// see http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+		if (exp < -308)
+			return 0.0;
+		else if (exp >= 0)
+			return significand * internal::Pow10(exp);
+		else
+			return significand / internal::Pow10(-exp);
+	}
+
 	template<unsigned parseFlags, typename InputStream, typename Handler>
 	void ParseNumber(InputStream& is, Handler& handler) {
 		internal::StreamLocalCopy<InputStream> copy(is);
@@ -813,11 +825,11 @@ private:
 			int expSum = exp + expFrac;
 			if (expSum < -308) {
 				// Prevent expSum < -308, making Pow10(expSum) = 0
-				d *= internal::Pow10(exp);
-				d *= internal::Pow10(expFrac);
+				d = StrtodFastPath(d, exp);
+				d = StrtodFastPath(d, expFrac);
 			}
 			else
-				d *= internal::Pow10(expSum);
+				d = StrtodFastPath(d, expSum);
 
 			cont = handler.Double(minus ? -d : d);
 		}

From a7762a345336cdeb57cbfd03f48350c16b55b6f2 Mon Sep 17 00:00:00 2001
From: Milo Yip <miloyip@gmail.com>
Date: Sat, 9 Aug 2014 21:12:58 +0800
Subject: [PATCH 2/5] Custom dtoa() impleemntation

Modified from Milo's Grisu2 implementation. 99.9% cases return shortest
decimal format.
---
 include/rapidjson/internal/dtoa.h | 414 ++++++++++++++++++++++++++++++
 include/rapidjson/writer.h        |  30 +--
 test/unittest/writertest.cpp      | 111 ++++----
 3 files changed, 487 insertions(+), 68 deletions(-)
 create mode 100644 include/rapidjson/internal/dtoa.h

diff --git a/include/rapidjson/internal/dtoa.h b/include/rapidjson/internal/dtoa.h
new file mode 100644
index 0000000..a2f8aae
--- /dev/null
+++ b/include/rapidjson/internal/dtoa.h
@@ -0,0 +1,414 @@
+// Modified from https://github.com/miloyip/dtoa-benchmark/blob/master/src/milo/dtoa_milo.h
+// API is changed to return the character passed the end of string, without writing '\0'
+
+#ifndef RAPIDJSON_DTOA_
+#define RAPIDJSON_DTOA_
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#if defined(_M_AMD64)
+#pragma intrinsic(_BitScanReverse64)
+#endif
+#endif
+
+#include "itoa.h" // GetDigitsLut()
+
+namespace rapidjson {
+namespace internal {
+
+struct DiyFp {
+	DiyFp() {}
+
+	DiyFp(uint64_t f, int e) : f(f), e(e) {}
+
+	DiyFp(double d) {
+		union {
+			double d;
+			uint64_t u64;
+		} u = { d };
+
+		int biased_e = (u.u64 & kDpExponentMask) >> kDpSignificandSize;
+		uint64_t significand = (u.u64 & kDpSignificandMask);
+		if (biased_e != 0) {
+			f = significand + kDpHiddenBit;
+			e = biased_e - kDpExponentBias;
+		} 
+		else {
+			f = significand;
+			e = kDpMinExponent + 1;
+		}
+	}
+
+	DiyFp operator-(const DiyFp& rhs) const {
+		return DiyFp(f - rhs.f, e);
+	}
+
+	DiyFp operator*(const DiyFp& rhs) const {
+#if defined(_MSC_VER) && defined(_M_AMD64)
+		uint64_t h;
+		uint64_t l = _umul128(f, rhs.f, &h);
+		if (l & (uint64_t(1) << 63)) // rounding
+			h++;
+		return DiyFp(h, e + rhs.e + 64);
+#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
+		unsigned __int128 p = static_cast<unsigned __int128>(f) * static_cast<unsigned __int128>(rhs.f);
+		uint64_t h = p >> 64;
+		uint64_t l = static_cast<uint64_t>(p);
+		if (l & (uint64_t(1) << 63)) // rounding
+			h++;
+		return DiyFp(h, e + rhs.e + 64);
+#else
+		const uint64_t M32 = 0xFFFFFFFF;
+		const uint64_t a = f >> 32;
+		const uint64_t b = f & M32;
+		const uint64_t c = rhs.f >> 32;
+		const uint64_t d = rhs.f & M32;
+		const uint64_t ac = a * c;
+		const uint64_t bc = b * c;
+		const uint64_t ad = a * d;
+		const uint64_t bd = b * d;
+		uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
+		tmp += 1U << 31;  /// mult_round
+		return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
+#endif
+	}
+
+	DiyFp Normalize() const {
+#if defined(_MSC_VER) && defined(_M_AMD64)
+		unsigned long index;
+		_BitScanReverse64(&index, f);
+		return DiyFp(f << (63 - index), e - (63 - index));
+#elif defined(__GNUC__)
+		int s = __builtin_clzll(f) + 1;
+		return DiyFp(f << s, e - s);
+#else
+		DiyFp res = *this;
+		while (!(res.f & kDpHiddenBit)) {
+			res.f <<= 1;
+			res.e--;
+		}
+		res.f <<= (kDiySignificandSize - kDpSignificandSize - 1);
+		res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 1);
+		return res;
+#endif
+	}
+
+	DiyFp NormalizeBoundary() const {
+#if defined(_MSC_VER) && defined(_M_AMD64)
+		unsigned long index;
+		_BitScanReverse64(&index, f);
+		return DiyFp (f << (63 - index), e - (63 - index));
+#else
+		DiyFp res = *this;
+		while (!(res.f & (kDpHiddenBit << 1))) {
+			res.f <<= 1;
+			res.e--;
+		}
+		res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
+		res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
+		return res;
+#endif
+	}
+
+	void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
+		DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
+		DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
+		mi.f <<= mi.e - pl.e;
+		mi.e = pl.e;
+		*plus = pl;
+		*minus = mi;
+	}
+
+	static const int kDiySignificandSize = 64;
+	static const int kDpSignificandSize = 52;
+	static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
+	static const int kDpMinExponent = -kDpExponentBias;
+	static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
+	static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
+	static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
+
+	uint64_t f;
+	int e;
+};
+
+inline DiyFp GetCachedPower(int e, int* K) {
+	// 10^-348, 10^-340, ..., 10^340
+	static const uint64_t kCachedPowers_F[] = {
+		RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
+		RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
+		RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
+		RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
+		RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
+		RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
+		RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
+		RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
+		RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
+		RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
+		RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
+		RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
+		RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
+		RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
+		RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
+		RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
+		RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
+		RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
+		RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
+		RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
+		RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
+		RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
+		RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
+		RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
+		RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
+		RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
+		RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
+		RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
+		RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
+		RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
+		RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
+		RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
+		RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
+		RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
+		RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
+		RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
+		RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
+		RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
+		RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
+		RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
+		RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
+		RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
+		RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
+		RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
+	};
+	static const int16_t kCachedPowers_E[] = {
+		-1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007,  -980,
+		 -954,  -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,
+		 -688,  -661,  -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,
+		 -422,  -396,  -369,  -343,  -316,  -289,  -263,  -236,  -210,  -183,
+		 -157,  -130,  -103,   -77,   -50,   -24,     3,    30,    56,    83,
+		  109,   136,   162,   189,   216,   242,   269,   295,   322,   348,
+		  375,   402,   428,   455,   481,   508,   534,   561,   588,   614,
+		  641,   667,   694,   720,   747,   774,   800,   827,   853,   880,
+		  907,   933,   960,   986,  1013,  1039,  1066
+	};
+
+	//int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
+	double dk = (-61 - e) * 0.30102999566398114 + 347;	// dk must be positive, so can do ceiling in positive
+	int k = static_cast<int>(dk);
+	if (k != dk)
+		k++;
+
+	unsigned index = static_cast<unsigned>((k >> 3) + 1);
+	*K = -(-348 + static_cast<int>(index << 3));	// decimal exponent no need lookup table
+
+	return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
+}
+
+inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) {
+	while (rest < wp_w && delta - rest >= ten_kappa &&
+		   (rest + ten_kappa < wp_w ||  /// closer
+			wp_w - rest > rest + ten_kappa - wp_w)) {
+		buffer[len - 1]--;
+		rest += ten_kappa;
+	}
+}
+
+inline unsigned CountDecimalDigit32(uint32_t n) {
+	static const uint32_t powers_of_10[] = {
+		0,
+		10,
+		100,
+		1000,
+		10000,
+		100000,
+		1000000,
+		10000000,
+		100000000,
+		1000000000
+	};
+
+#ifdef _MSC_VER
+	unsigned long i = 0;
+	_BitScanReverse(&i, n | 1);
+	uint32_t t = (i + 1) * 1233 >> 12;
+#elif __GNUC__
+	uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12;
+#endif
+	return t - (n < powers_of_10[t]) + 1;
+}
+
+inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {
+	static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
+	const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
+	const DiyFp wp_w = Mp - W;
+	uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
+	uint64_t p2 = Mp.f & (one.f - 1);
+	int kappa = CountDecimalDigit32(p1);
+	*len = 0;
+
+	while (kappa > 0) {
+		uint32_t d;
+		switch (kappa) {
+			case 10: d = p1 / 1000000000; p1 %= 1000000000; break;
+			case  9: d = p1 /  100000000; p1 %=  100000000; break;
+			case  8: d = p1 /   10000000; p1 %=   10000000; break;
+			case  7: d = p1 /    1000000; p1 %=    1000000; break;
+			case  6: d = p1 /     100000; p1 %=     100000; break;
+			case  5: d = p1 /      10000; p1 %=      10000; break;
+			case  4: d = p1 /       1000; p1 %=       1000; break;
+			case  3: d = p1 /        100; p1 %=        100; break;
+			case  2: d = p1 /         10; p1 %=         10; break;
+			case  1: d = p1;              p1 =           0; break;
+			default: 
+#if defined(_MSC_VER)
+				__assume(0);
+#elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
+				__builtin_unreachable();
+#else
+				d = 0;
+#endif
+		}
+		if (d || *len)
+			buffer[(*len)++] = '0' + static_cast<char>(d);
+		kappa--;
+		uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
+		if (tmp <= delta) {
+			*K += kappa;
+			GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f);
+			return;
+		}
+	}
+
+	// kappa = 0
+	for (;;) {
+		p2 *= 10;
+		delta *= 10;
+		char d = static_cast<char>(p2 >> -one.e);
+		if (d || *len)
+			buffer[(*len)++] = '0' + d;
+		p2 &= one.f - 1;
+		kappa--;
+		if (p2 < delta) {
+			*K += kappa;
+			GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * kPow10[-kappa]);
+			return;
+		}
+	}
+}
+
+inline void Grisu2(double value, char* buffer, int* length, int* K) {
+	const DiyFp v(value);
+	DiyFp w_m, w_p;
+	v.NormalizedBoundaries(&w_m, &w_p);
+
+	const DiyFp c_mk = GetCachedPower(w_p.e, K);
+	const DiyFp W = v.Normalize() * c_mk;
+	DiyFp Wp = w_p * c_mk;
+	DiyFp Wm = w_m * c_mk;
+	Wm.f++;
+	Wp.f--;
+	DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K);
+}
+
+//inline const char* GetDigitsLut() {
+//	static const char cDigitsLut[200] = {
+//		'0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
+//		'1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
+//		'2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
+//		'3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
+//		'4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
+//		'5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
+//		'6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
+//		'7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
+//		'8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
+//		'9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
+//	};
+//	return cDigitsLut;
+//}
+
+inline char* WriteExponent(int K, char* buffer) {
+	if (K < 0) {
+		*buffer++ = '-';
+		K = -K;
+	}
+
+	if (K >= 100) {
+		*buffer++ = '0' + static_cast<char>(K / 100);
+		K %= 100;
+		const char* d = GetDigitsLut() + K * 2;
+		*buffer++ = d[0];
+		*buffer++ = d[1];
+	}
+	else if (K >= 10) {
+		const char* d = GetDigitsLut() + K * 2;
+		*buffer++ = d[0];
+		*buffer++ = d[1];
+	}
+	else
+		*buffer++ = '0' + static_cast<char>(K);
+
+	return buffer;
+}
+
+inline char* Prettify(char* buffer, int length, int k) {
+	const int kk = length + k;	// 10^(kk-1) <= v < 10^kk
+
+	if (length <= kk && kk <= 21) {
+		// 1234e7 -> 12340000000
+		for (int i = length; i < kk; i++)
+			buffer[i] = '0';
+		buffer[kk] = '.';
+		buffer[kk + 1] = '0';
+		return &buffer[kk + 2];
+	}
+	else if (0 < kk && kk <= 21) {
+		// 1234e-2 -> 12.34
+		memmove(&buffer[kk + 1], &buffer[kk], length - kk);
+		buffer[kk] = '.';
+		return &buffer[length + 1];
+	}
+	else if (-6 < kk && kk <= 0) {
+		// 1234e-6 -> 0.001234
+		const int offset = 2 - kk;
+		memmove(&buffer[offset], &buffer[0], length);
+		buffer[0] = '0';
+		buffer[1] = '.';
+		for (int i = 2; i < offset; i++)
+			buffer[i] = '0';
+		return &buffer[length + offset];
+	}
+	else if (length == 1) {
+		// 1e30
+		buffer[1] = 'e';
+		return WriteExponent(kk - 1, &buffer[2]);
+	}
+	else {
+		// 1234e30 -> 1.234e33
+		memmove(&buffer[2], &buffer[1], length - 1);
+		buffer[1] = '.';
+		buffer[length + 1] = 'e';
+		return WriteExponent(kk - 1, &buffer[0 + length + 2]);
+	}
+}
+
+inline char* dtoa(double value, char* buffer) {
+	if (value == 0) {
+		buffer[0] = '0';
+		buffer[1] = '.';
+		buffer[2] = '0';
+		return &buffer[3];
+	}
+	else {
+		if (value < 0) {
+			*buffer++ = '-';
+			value = -value;
+		}
+		int length, K;
+		Grisu2(value, buffer, &length, &K);
+		return Prettify(buffer, length, K);
+	}
+}
+
+} // namespace internal
+} // namespace rapidjson
+
+#endif // RAPIDJSON_DTOA_
diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h
index d72d274..af9fea3 100644
--- a/include/rapidjson/writer.h
+++ b/include/rapidjson/writer.h
@@ -4,9 +4,9 @@
 #include "rapidjson.h"
 #include "internal/stack.h"
 #include "internal/strfunc.h"
+#include "internal/dtoa.h"
 #include "internal/itoa.h"
 #include "stringbuffer.h"
-#include <cstdio>	// snprintf() or _sprintf_s()
 #include <new>		// placement new
 
 #ifdef _MSC_VER
@@ -239,25 +239,17 @@ protected:
 
 	bool WriteUint64(uint64_t u64) {
 		char buffer[20];
-		const char* end = internal::u64toa(u64, buffer);
-		for (const char* p = buffer; p != end; ++p)
+		char* end = internal::u64toa(u64, buffer);
+		for (char* p = buffer; p != end; ++p)
 			os_->Put(*p);
 		return true;
 	}
 
-#ifdef _MSC_VER
-#define RAPIDJSON_SNPRINTF sprintf_s
-#else
-#define RAPIDJSON_SNPRINTF snprintf
-#endif
-
-	//! \todo Optimization with custom double-to-string converter.
 	bool WriteDouble(double d) {
-		char buffer[100];
-		int ret = RAPIDJSON_SNPRINTF(buffer, sizeof(buffer), "%.*g", doublePrecision_, d);
-		RAPIDJSON_ASSERT(ret >= 1);
-		for (int i = 0; i < ret; i++)
-			os_->Put(buffer[i]);
+		char buffer[25];
+		char* end = internal::dtoa(d, buffer);
+		for (char* p = buffer; p != end; ++p)
+			os_->Put(*p);
 		return true;
 	}
 #undef RAPIDJSON_SNPRINTF
@@ -403,6 +395,14 @@ inline bool Writer<StringBuffer>::WriteUint64(uint64_t u) {
 	return true;
 }
 
+template<>
+inline bool Writer<StringBuffer>::WriteDouble(double d) {
+	char *buffer = os_->Push(25);
+	char* end = internal::dtoa(d, buffer);
+	os_->Pop(25 - (end - buffer));
+	return true;
+}
+
 } // namespace rapidjson
 
 #ifdef _MSC_VER
diff --git a/test/unittest/writertest.cpp b/test/unittest/writertest.cpp
index 48d7c4e..c834227 100644
--- a/test/unittest/writertest.cpp
+++ b/test/unittest/writertest.cpp
@@ -60,62 +60,67 @@ TEST(Writer, String) {
 	TEST_ROUNDTRIP("[\"\\\"\\\\/\\b\\f\\n\\r\\t\"]");
 }
 
-TEST(Writer,DoublePrecision) {
-	const char json[] = "[1.2345,1.2345678,0.123456789012,1234567.8]";
+TEST(Writer, Double) {
+	TEST_ROUNDTRIP("[1.2345,1.2345678,0.123456789012,1234567.8]");
 
-	StringBuffer buffer;
-	Writer<StringBuffer> writer(buffer);
-
-	const int kDefaultDoublePrecision = 6;
-	// handling the double precision
-	EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
-	writer.SetDoublePrecision(17);
-	EXPECT_EQ(writer.GetDoublePrecision(), 17);
-	writer.SetDoublePrecision(-1); // negative equivalent to reset
-	EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
-	writer.SetDoublePrecision(1);
-	writer.SetDoublePrecision();   // reset again
-	EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
-
-	{ // write with explicitly increased precision
-		StringStream s(json);
-		Reader reader;
-		reader.Parse<0>(s, writer.SetDoublePrecision(12));
-		EXPECT_EQ(writer.GetDoublePrecision(), 12);
-		EXPECT_STREQ(json, buffer.GetString());
-	}
-	{ // explicit individual double precisions
-		buffer.Clear();
-		writer.Reset(buffer);
-		writer.SetDoublePrecision(2);
-		writer.StartArray();
-		writer.Double(1.2345, 5);
-		writer.Double(1.2345678, 9);
-		writer.Double(0.123456789012, 12);
-		writer.Double(1234567.8, 8);
-		writer.EndArray();
-
-		EXPECT_EQ(writer.GetDoublePrecision(), 2);
-		EXPECT_STREQ(json, buffer.GetString());
-	}
-	{ // write with default precision (output with precision loss)
-		Document d;
-		d.Parse<0>(json);
-		buffer.Clear();
-		writer.Reset(buffer);
-		d.Accept(writer.SetDoublePrecision());
-
-		// parsed again to avoid platform-dependent floating point outputs
-		// (e.g. width of exponents)
-		d.Parse<0>(buffer.GetString());
-		EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
-		EXPECT_DOUBLE_EQ(d[0u].GetDouble(), 1.2345);
-		EXPECT_DOUBLE_EQ(d[1u].GetDouble(), 1.23457);
-		EXPECT_DOUBLE_EQ(d[2u].GetDouble(), 0.123457);
-		EXPECT_DOUBLE_EQ(d[3u].GetDouble(), 1234570);
-	}
 }
 
+//TEST(Writer,DoublePrecision) {
+//	const char json[] = "[1.2345,1.2345678,0.123456789012,1234567.8]";
+//
+//	StringBuffer buffer;
+//	Writer<StringBuffer> writer(buffer);
+//
+//	const int kDefaultDoublePrecision = 6;
+//	// handling the double precision
+//	EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
+//	writer.SetDoublePrecision(17);
+//	EXPECT_EQ(writer.GetDoublePrecision(), 17);
+//	writer.SetDoublePrecision(-1); // negative equivalent to reset
+//	EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
+//	writer.SetDoublePrecision(1);
+//	writer.SetDoublePrecision();   // reset again
+//	EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
+//
+//	{ // write with explicitly increased precision
+//		StringStream s(json);
+//		Reader reader;
+//		reader.Parse<0>(s, writer.SetDoublePrecision(12));
+//		EXPECT_EQ(writer.GetDoublePrecision(), 12);
+//		EXPECT_STREQ(json, buffer.GetString());
+//	}
+//	{ // explicit individual double precisions
+//		buffer.Clear();
+//		writer.Reset(buffer);
+//		writer.SetDoublePrecision(2);
+//		writer.StartArray();
+//		writer.Double(1.2345, 5);
+//		writer.Double(1.2345678, 9);
+//		writer.Double(0.123456789012, 12);
+//		writer.Double(1234567.8, 8);
+//		writer.EndArray();
+//
+//		EXPECT_EQ(writer.GetDoublePrecision(), 2);
+//		EXPECT_STREQ(json, buffer.GetString());
+//	}
+//	{ // write with default precision (output with precision loss)
+//		Document d;
+//		d.Parse<0>(json);
+//		buffer.Clear();
+//		writer.Reset(buffer);
+//		d.Accept(writer.SetDoublePrecision());
+//
+//		// parsed again to avoid platform-dependent floating point outputs
+//		// (e.g. width of exponents)
+//		d.Parse<0>(buffer.GetString());
+//		EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
+//		EXPECT_DOUBLE_EQ(d[0u].GetDouble(), 1.2345);
+//		EXPECT_DOUBLE_EQ(d[1u].GetDouble(), 1.23457);
+//		EXPECT_DOUBLE_EQ(d[2u].GetDouble(), 0.123457);
+//		EXPECT_DOUBLE_EQ(d[3u].GetDouble(), 1234570);
+//	}
+//}
+
 TEST(Writer, Transcode) {
 	const char json[] = "{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}";
 

From 0d915644a4cf03ef14ecffe54e6b8cb506fd40e9 Mon Sep 17 00:00:00 2001
From: Milo Yip <miloyip@gmail.com>
Date: Sat, 9 Aug 2014 21:27:32 +0800
Subject: [PATCH 3/5] Fixed gcc effc++ warning in dtoa.h

---
 include/rapidjson/internal/dtoa.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/rapidjson/internal/dtoa.h b/include/rapidjson/internal/dtoa.h
index a2f8aae..044e2fb 100644
--- a/include/rapidjson/internal/dtoa.h
+++ b/include/rapidjson/internal/dtoa.h
@@ -16,6 +16,11 @@
 namespace rapidjson {
 namespace internal {
 
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
 struct DiyFp {
 	DiyFp() {}
 
@@ -408,6 +413,10 @@ inline char* dtoa(double value, char* buffer) {
 	}
 }
 
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
 } // namespace internal
 } // namespace rapidjson
 

From 1900b7bacea0d3329cd3ecc7aba4262b4bea8307 Mon Sep 17 00:00:00 2001
From: Milo Yip <miloyip@gmail.com>
Date: Sat, 9 Aug 2014 21:37:02 +0800
Subject: [PATCH 4/5] Remove double precision settings API in Writer

---
 doc/sax.md                       | 16 +--------
 include/rapidjson/prettywriter.h | 12 -------
 include/rapidjson/writer.h       | 48 ++-------------------------
 test/unittest/writertest.cpp     | 56 --------------------------------
 4 files changed, 3 insertions(+), 129 deletions(-)

diff --git a/doc/sax.md b/doc/sax.md
index 9ad2e4b..bbdc550 100644
--- a/doc/sax.md
+++ b/doc/sax.md
@@ -225,7 +225,7 @@ You may doubt that, why not just using `sprintf()` or `std::stringstream` to bui
 There are various reasons:
 1. `Writer` must output a well-formed JSON. If there is incorrect event sequence (e.g. `Int()` just after `StartObject()`), it generates assertion fail in debug mode.
 2. `Writer::String()` can handle string escaping (e.g. converting code point `U+000A` to `\n`) and Unicode transcoding.
-3. `Writer` handles number output consistently. For example, user can set precision for `Double()`.
+3. `Writer` handles number output consistently.
 4. `Writer` implements the event handler concept. It can be used to handle events from `Reader`, `Document` or other event publisher.
 5. `Writer` can be optimized for different platforms.
 
@@ -258,20 +258,6 @@ The last one, `Allocator` is the type of allocator, which is used for allocating
 
 Besides, the constructor of `Writer` has a `levelDepth` parameter. This parameter affects the initial memory allocated for storing information per hierarchy level.
 
-## Precision (#WriterPrecision)
-
-When using `Double()`, the precision of output can be specified, for example:
-
-~~~~~~~~~~cpp
-writer.SetDoublePrecision(4);
-writer.StartArary();
-writer.Double(3.14159265359);
-writer.EndArray();
-~~~~~~~~~~
-~~~~~~~~~~
-[3.1416]
-~~~~~~~~~~
-
 ## PrettyWriter {#PrettyWriter}
 
 While the output of `Writer` is the most condensed JSON without white-spaces, suitable for network transfer or storage, it is not easily readable by human.
diff --git a/include/rapidjson/prettywriter.h b/include/rapidjson/prettywriter.h
index 02b9420..d351468 100644
--- a/include/rapidjson/prettywriter.h
+++ b/include/rapidjson/prettywriter.h
@@ -31,9 +31,6 @@ public:
 	PrettyWriter(OutputStream& os, Allocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
 		Base(os, allocator, levelDepth), indentChar_(' '), indentCharCount_(4) {}
 
-	//! Overridden for fluent API, see \ref Writer::SetDoublePrecision()
-	PrettyWriter& SetDoublePrecision(int p) { Base::SetDoublePrecision(p); return *this; }
-
 	//! Set custom indentation.
 	/*! \param indentChar		Character for indentation. Must be whitespace character (' ', '\\t', '\\n', '\\r').
 		\param indentCharCount	Number of indent characters for each indentation level.
@@ -119,15 +116,6 @@ public:
 	//! Simpler but slower overload.
 	bool String(const Ch* str) { return String(str, internal::StrLen(str)); }
 
-	//! Overridden for fluent API, see \ref Writer::Double()
-	bool Double(double d, int precision) {
-		int oldPrecision = Base::GetDoublePrecision();
-		SetDoublePrecision(precision);
-		bool ret = Double(d);
-		SetDoublePrecision(oldPrecision);
-		return ret;
-	}
-
 	//@}
 protected:
 	void PrettyPrefix(Type type) {
diff --git a/include/rapidjson/writer.h b/include/rapidjson/writer.h
index af9fea3..9d25b27 100644
--- a/include/rapidjson/writer.h
+++ b/include/rapidjson/writer.h
@@ -43,12 +43,10 @@ public:
 		\param levelDepth Initial capacity of stack.
 	*/
 	Writer(OutputStream& os, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : 
-		os_(&os), level_stack_(allocator, levelDepth * sizeof(Level)),
-		doublePrecision_(kDefaultDoublePrecision), hasRoot_(false) {}
+		os_(&os), level_stack_(allocator, levelDepth * sizeof(Level)), hasRoot_(false) {}
 
 	Writer(Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) :
-		os_(0), level_stack_(allocator, levelDepth * sizeof(Level)),
-		doublePrecision_(kDefaultDoublePrecision), hasRoot_(false) {}
+		os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), hasRoot_(false) {}
 
 	//! Reset the writer with a new stream.
 	/*!
@@ -70,7 +68,6 @@ public:
 	*/
 	void Reset(OutputStream& os) {
 		os_ = &os;
-		doublePrecision_ = kDefaultDoublePrecision;
 		hasRoot_ = false;
 		level_stack_.Clear();
 	}
@@ -83,21 +80,6 @@ public:
 		return hasRoot_ && level_stack_.Empty();
 	}
 
-	//! Set the number of significant digits for \c double values
-	/*! When writing a \c double value to the \c OutputStream, the number
-		of significant digits is limited to 6 by default.
-		\param p maximum number of significant digits (default: 6)
-		\return The Writer itself for fluent API.
-	*/
-	Writer& SetDoublePrecision(int p = kDefaultDoublePrecision) {
-		if (p < 0) p = kDefaultDoublePrecision; // negative precision is ignored
-		doublePrecision_ = p;
-		return *this;
-	}
-
-	//! \see SetDoublePrecision()
-	int GetDoublePrecision() const { return doublePrecision_; }
-
 	/*!@name Implementation of Handler
 		\see Handler
 	*/
@@ -112,12 +94,6 @@ public:
 
 	//! Writes the given \c double value to the stream
 	/*!
-		The number of significant digits (the precision) to be written
-		can be set by \ref SetDoublePrecision() for the Writer:
-		\code
-		Writer<...> writer(...);
-		writer.SetDoublePrecision(12).Double(M_PI);
-		\endcode
 		\param d The value to be written.
 		\return Whether it is succeed.
 	*/
@@ -167,23 +143,6 @@ public:
 	/*! @name Convenience extensions */
 	//@{
 
-	//! Writes the given \c double value to the stream (explicit precision)
-	/*!
-		The currently set double precision is ignored in favor of the explicitly
-		given precision for this value.
-		\see Double(), SetDoublePrecision(), GetDoublePrecision()
-		\param d The value to be written
-		\param precision The number of significant digits for this value
-		\return Whether it is succeeded.
-	*/
-	bool Double(double d, int precision) {
-		int oldPrecision = GetDoublePrecision();
-		SetDoublePrecision(precision);
-		bool ret = Double(d);
-		SetDoublePrecision(oldPrecision);
-		return ret;
-	}
-
 	//! Simpler but slower overload.
 	bool String(const Ch* str) { return String(str, internal::StrLen(str)); }
 
@@ -350,11 +309,8 @@ protected:
 
 	OutputStream* os_;
 	internal::Stack<Allocator> level_stack_;
-	int doublePrecision_;
 	bool hasRoot_;
 
-	static const int kDefaultDoublePrecision = 6;
-
 private:
 	// Prohibit copy constructor & assignment operator.
 	Writer(const Writer&);
diff --git a/test/unittest/writertest.cpp b/test/unittest/writertest.cpp
index c834227..59543b0 100644
--- a/test/unittest/writertest.cpp
+++ b/test/unittest/writertest.cpp
@@ -65,62 +65,6 @@ TEST(Writer, Double) {
 
 }
 
-//TEST(Writer,DoublePrecision) {
-//	const char json[] = "[1.2345,1.2345678,0.123456789012,1234567.8]";
-//
-//	StringBuffer buffer;
-//	Writer<StringBuffer> writer(buffer);
-//
-//	const int kDefaultDoublePrecision = 6;
-//	// handling the double precision
-//	EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
-//	writer.SetDoublePrecision(17);
-//	EXPECT_EQ(writer.GetDoublePrecision(), 17);
-//	writer.SetDoublePrecision(-1); // negative equivalent to reset
-//	EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
-//	writer.SetDoublePrecision(1);
-//	writer.SetDoublePrecision();   // reset again
-//	EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
-//
-//	{ // write with explicitly increased precision
-//		StringStream s(json);
-//		Reader reader;
-//		reader.Parse<0>(s, writer.SetDoublePrecision(12));
-//		EXPECT_EQ(writer.GetDoublePrecision(), 12);
-//		EXPECT_STREQ(json, buffer.GetString());
-//	}
-//	{ // explicit individual double precisions
-//		buffer.Clear();
-//		writer.Reset(buffer);
-//		writer.SetDoublePrecision(2);
-//		writer.StartArray();
-//		writer.Double(1.2345, 5);
-//		writer.Double(1.2345678, 9);
-//		writer.Double(0.123456789012, 12);
-//		writer.Double(1234567.8, 8);
-//		writer.EndArray();
-//
-//		EXPECT_EQ(writer.GetDoublePrecision(), 2);
-//		EXPECT_STREQ(json, buffer.GetString());
-//	}
-//	{ // write with default precision (output with precision loss)
-//		Document d;
-//		d.Parse<0>(json);
-//		buffer.Clear();
-//		writer.Reset(buffer);
-//		d.Accept(writer.SetDoublePrecision());
-//
-//		// parsed again to avoid platform-dependent floating point outputs
-//		// (e.g. width of exponents)
-//		d.Parse<0>(buffer.GetString());
-//		EXPECT_EQ(writer.GetDoublePrecision(), kDefaultDoublePrecision);
-//		EXPECT_DOUBLE_EQ(d[0u].GetDouble(), 1.2345);
-//		EXPECT_DOUBLE_EQ(d[1u].GetDouble(), 1.23457);
-//		EXPECT_DOUBLE_EQ(d[2u].GetDouble(), 0.123457);
-//		EXPECT_DOUBLE_EQ(d[3u].GetDouble(), 1234570);
-//	}
-//}
-
 TEST(Writer, Transcode) {
 	const char json[] = "{\"hello\":\"world\",\"t\":true,\"f\":false,\"n\":null,\"i\":123,\"pi\":3.1416,\"a\":[1,2,3],\"dollar\":\"\x24\",\"cents\":\"\xC2\xA2\",\"euro\":\"\xE2\x82\xAC\",\"gclef\":\"\xF0\x9D\x84\x9E\"}";
 

From c54915297b530639ae0eac97bff34df4693c7e15 Mon Sep 17 00:00:00 2001
From: Milo Yip <miloyip@gmail.com>
Date: Mon, 11 Aug 2014 00:30:31 +0800
Subject: [PATCH 5/5] Change CountDecimalDigit32() to simple implementation

It is simple and pure C++. And it is found in performance test that it
is even faster than the original version, due to distribution of n. But
the performance gain is not obvious in RapidJSON.
---
 include/rapidjson/internal/dtoa.h | 32 +++++++++++--------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

diff --git a/include/rapidjson/internal/dtoa.h b/include/rapidjson/internal/dtoa.h
index 044e2fb..44f5c11 100644
--- a/include/rapidjson/internal/dtoa.h
+++ b/include/rapidjson/internal/dtoa.h
@@ -218,27 +218,17 @@ inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uin
 }
 
 inline unsigned CountDecimalDigit32(uint32_t n) {
-	static const uint32_t powers_of_10[] = {
-		0,
-		10,
-		100,
-		1000,
-		10000,
-		100000,
-		1000000,
-		10000000,
-		100000000,
-		1000000000
-	};
-
-#ifdef _MSC_VER
-	unsigned long i = 0;
-	_BitScanReverse(&i, n | 1);
-	uint32_t t = (i + 1) * 1233 >> 12;
-#elif __GNUC__
-	uint32_t t = (32 - __builtin_clz(n | 1)) * 1233 >> 12;
-#endif
-	return t - (n < powers_of_10[t]) + 1;
+	// Simple pure C++ implementation was faster than __builtin_clz version in this situation.
+	if (n < 10) return 1;
+	if (n < 100) return 2;
+	if (n < 1000) return 3;
+	if (n < 10000) return 4;
+	if (n < 100000) return 5;
+	if (n < 1000000) return 6;
+	if (n < 10000000) return 7;
+	if (n < 100000000) return 8;
+	if (n < 1000000000) return 9;
+	return 10;
 }
 
 inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {