From 54f0156e8cbf1a58243d3a2b35f61311e1a034a4 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 16:15:55 +0200 Subject: [PATCH] Set HLL_SPARSE_MAX to 3000. After running a few benchmarks, 3000 looks like a reasonable value to keep HLLs with a few thousand elements small while the CPU cost is still not huge. This covers all the cases where the dense representation would use N orders of magnitude more space, like in the case of many HLLs with carinality of a few tens or hundreds. It is not impossible that in the future this gets user configurable, however it is easy to pick an unreasoable value just looking at savings in the space dimension without checking what happens in the time dimension. --- src/hyperloglog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index c4cb5674d..f5df8fc31 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -202,7 +202,7 @@ struct hllhdr { #define HLL_SPARSE 1 /* Sparse encoding */ #define HLL_MAX_ENCODING 1 -#define HLL_SPARSE_MAX 12000 +#define HLL_SPARSE_MAX 3000 static char *invalid_hll_err = "Corrupted HLL object detected";