From 76f102e460540379c9bbe39ee88410706ce9ba35 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 7 Apr 2014 08:57:05 +0200 Subject: [PATCH 01/58] Add casting to match printf format. adjustOpenFilesLimit() and clusterUpdateSlotsWithConfig() that were assuming uint64_t is the same as unsigned long long, which is true probably for all the systems out there that we target, but still GCC emitted a warning since technically they are two different types. --- src/cluster.c | 5 +++-- src/redis.c | 14 +++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 0e2f41456..106c5a41b 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1178,8 +1178,9 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc "I've still keys about this slot! " "Putting the slot in IMPORTING state. " "Please run the 'redis-trib fix' command.", - j, sender->name, senderConfigEpoch, - myself->configEpoch); + j, sender->name, + (unsigned long long) senderConfigEpoch, + (unsigned long long) myself->configEpoch); server.cluster->importing_slots_from[j] = sender; } diff --git a/src/redis.c b/src/redis.c index cc48f1de8..da821abf7 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1555,24 +1555,28 @@ void adjustOpenFilesLimit(void) { redisLog(REDIS_WARNING,"Your current 'ulimit -n' " "of %llu is not enough for Redis to start. " "Please increase your open file limit to at least " - "%llu. Exiting.", oldlimit, maxfiles); + "%llu. Exiting.", + (unsigned long long) oldlimit, + (unsigned long long) maxfiles); exit(1); } redisLog(REDIS_WARNING,"You requested maxclients of %d " "requiring at least %llu max file descriptors.", - old_maxclients, maxfiles); + old_maxclients, + (unsigned long long) maxfiles); redisLog(REDIS_WARNING,"Redis can't set maximum open files " "to %llu because of OS error: %s.", - maxfiles, strerror(setrlimit_error)); + (unsigned long long) maxfiles, strerror(setrlimit_error)); redisLog(REDIS_WARNING,"Current maximum open files is %llu. " "maxclients has been reduced to %d to compensate for " "low ulimit. " "If you need higher maxclients increase 'ulimit -n'.", - oldlimit, server.maxclients); + (unsigned long long) oldlimit, server.maxclients); } else { redisLog(REDIS_NOTICE,"Increased maximum number of open files " "to %llu (it was originally set to %llu).", - maxfiles, oldlimit); + (unsigned long long) maxfiles, + (unsigned long long) oldlimit); } } } From a13ac3588983aab17312b8a17f54cf0362512411 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 9 Apr 2014 18:56:00 +0200 Subject: [PATCH 02/58] HyperLogLog sparse representation description and macros. --- src/hyperloglog.c | 107 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 104 insertions(+), 3 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index aba74803a..07846ad33 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -53,7 +53,18 @@ * [2] P. Flajolet, Éric Fusy, O. Gandouet, and F. Meunier. Hyperloglog: The * analysis of a near-optimal cardinality estimation algorithm. * - * The representation used by Redis is the following: + * Redis uses two representations: + * + * 1) A "dense" representation where every entry is represented by + * a 6-bit integer. + * 2) A "sparse" representation using run length compression suitable + * for representing HyperLogLogs with many registers set to 0 in + * a memory efficient way. + * + * Dense representation + * === + * + * The dense representation used by Redis is the following: * * +--------+--------+--------+------// //--+----------+------+-----+ * |11000000|22221111|33333322|55444444 .... | uint64_t | HYLL | Ver | @@ -75,7 +86,85 @@ * * When the most significant bit in the most significant byte of the cached * cardinality is set, it means that the data structure was modified and - * we can't reuse the cached value that must be recomputed. */ + * we can't reuse the cached value that must be recomputed. + * + * Sparse representation + * === + * + * The sparse representation encodes registers using three possible + * kind of "opcodes", two composed of just one byte, and one composed + * of two bytes. The opcodes are called ZERO, XZERO and VAL. + * + * ZERO opcode is represented as 00xxxxxx. The 6-bit integer represented + * by the six bits 'xxxxxx', plus 1, means that there are N registers set + * to 0. This opcode can represent from 1 to 64 contiguous registers set + * to the value of 0. + * + * XZERO opcode is represented by two bytes 01xxxxxx yyyyyyyy. The 14-bit + * integer represented by the bits 'xxxxxx' as most significant bits and + * 'yyyyyyyy' as least significant bits, plus 1, means that there are N + * registers set to 0. This opcode can represent from 65 to 16384 contiguous + * registers set to the value of 0. + * + * VAL opcode is represented as 1vvvvxxx. It contains a 4-bit integer + * representing the value of a register, and a 3-bit integer representing + * the number of contiguous registers set to that value 'vvvv'. + * As with the other opcodes, to obtain the value and run length, the + * integers vvvv and xxx must be additioned to 1. + * This opcode can represent values from 1 to 16, repeated from 1 to 8 times. + * + * The sparse representation can't represent registers with a value greater + * than 16, however it is very unlikely that we find such a register in an + * HLL with a cardinality where the sparse representation is still more + * memory efficient than the dense representation. When this happens the + * HLL is converted to the dense representation. + * + * The sparse representation is purely positional. For example a sparse + * representation of an empty HLL is just: XZERO:16384. + * + * An HLL having only 3 non-zero registers at position 1000, 1020, 1021 + * respectively set to 2, 3, 3, is represented by the following three + * opcodes: + * + * XZERO:1000 (Registers 0-999 are set to 0) + * VAL:2,1 (1 register set to value 2, that is register 1000) + * ZERO:19 (Registers 1001-1019 set to 0) + * VAL:3,2 (2 registers set to value 3, that is registers 1020,1021) + * XZERO:15362 (Registers 1022-16383 set to 0) + * + * In the example the sparse representation used just 7 bytes instead + * of 12k in order to represent the HLL registers. In general for low + * cardinality there is a big win in terms of space efficiency, traded + * with CPU time since the sparse representation is slower to access: + * + * The following table shows real-world space savings obtained: + * + * cardinality 1: 5 bytes (0.00244140625 bits/reg, 1 registers) + * cardinality 10: 31 bytes (0.01513671875 bits/reg, 10 registers) + * cardinality 100: 271 bytes (0.13232421875 bits/reg, 100 registers) + * cardinality 1000: 1906 bytes (0.9306640625 bits/reg, 971 registers) + * cardinality 2000: 3517 bytes (1.71728515625 bits/reg, 1888 registers) + * cardinality 3000: 4918 bytes (2.4013671875 bits/reg, 2745 registers) + * cardinality 4000: 6129 bytes (2.99267578125 bits/reg, 3552 registers) + * cardinality 5000: 7206 bytes (3.5185546875 bits/reg, 4297 registers) + * cardinality 6000: 8099 bytes (3.95458984375 bits/reg, 5013 registers) + * cardinality 7000: 8868 bytes (4.330078125 bits/reg, 5673 registers) + * cardinality 8000: 9571 bytes (4.67333984375 bits/reg, 6312 registers) + * cardinality 9000: 10138 bytes (4.9501953125 bits/reg, 6901 registers) + * cardinality 10000: 10717 bytes (5.23291015625 bits/reg, 7473 registers}) + * cardinality 11000: 11137 bytes (5.43798828125 bits/reg, 8005 registers}) + * cardinality 12000: 11514 bytes (5.6220703125 bits/reg, 8517 registers}) + * cardinality 13000: 11809 bytes (5.76611328125 bits/reg, 8962 registers}) + * cardinality 14000: 12055 bytes (5.88623046875 bits/reg, 9384 registers}) + * cardinality 15000: 12285 bytes (5.99853515625 bits/reg, 9790 registers}) + * cardinality 16000: 12459 bytes (6.08349609375 bits/reg, 10180 registers}) + * + * At cardinality around ~16000 is when it is no longer more space efficient + * to use the sparse representation. However the exact maximum length of the + * sparse representation when this implementation switches to the dense + * representation is configured via the define REDIS_HLL_SPARSE_MAX and + * can be smaller than 12k in order to save CPU time. + */ #define REDIS_HLL_P 14 /* The greater is P, the smaller the error. */ #define REDIS_HLL_REGISTERS (1<> _fb8; \ } while(0) +/* Macros to access the sparse representation. + * The macros parameter is expected to be an uint8_t pointer. */ +#define HLL_SPARSE_IS_ZERO(p) (((*p) & 0xc0) == 0) /* 00xxxxxx */ +#define HLL_SPARSE_IS_XZERO(p) (((*p) & 0xc0) == 0x40) /* 01xxxxxx */ +#define HLL_SPARSE_IS_VAL(p) ((*p) & 0x80) /* 1vvvvxxx */ +#define HLL_SPARSE_ZERO_LEN(p) ((*p) & 0x3f) +#define HLL_SPARSE_XZERO_LEN(p) ((((*p) & 0x3f) << 6) | (*p)) +#define HLL_SPARSE_VAL_VALUE(p) (((*p) >> 3) & 0xf) +#define HLL_SPARSE_VAL_LEN(p) ((*p) & 0x7) + /* ========================= HyperLogLog algorithm ========================= */ /* Our hash function is MurmurHash2, 64 bit version. From 1ea25e7cdb12e3569eb18a2c930eab3ecb635455 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 10 Apr 2014 16:36:31 +0200 Subject: [PATCH 03/58] HyperLogLog sparse representation slightly modified. After running a few simulations with different alternative encodings, it was found that the VAL opcode performs better using 5 bits for the value and 2 bits for the run length, at least for cardinalities in the range of interest. --- src/hyperloglog.c | 78 +++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 07846ad33..6cc627461 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -91,8 +91,8 @@ * Sparse representation * === * - * The sparse representation encodes registers using three possible - * kind of "opcodes", two composed of just one byte, and one composed + * The sparse representation encodes registers using a run length + * encoding composed of three opcodes, two using one byte, and one using * of two bytes. The opcodes are called ZERO, XZERO and VAL. * * ZERO opcode is represented as 00xxxxxx. The 6-bit integer represented @@ -106,15 +106,15 @@ * registers set to 0. This opcode can represent from 65 to 16384 contiguous * registers set to the value of 0. * - * VAL opcode is represented as 1vvvvxxx. It contains a 4-bit integer - * representing the value of a register, and a 3-bit integer representing - * the number of contiguous registers set to that value 'vvvv'. - * As with the other opcodes, to obtain the value and run length, the - * integers vvvv and xxx must be additioned to 1. - * This opcode can represent values from 1 to 16, repeated from 1 to 8 times. + * VAL opcode is represented as 1vvvvvxx. It contains a 5-bit integer + * representing the value of a register, and a 2-bit integer representing + * the number of contiguous registers set to that value 'vvvvv'. + * To obtain the value and run length, the integers vvvvv and xx must be + * incremented by one. This opcode can represent values from 1 to 32, + * repeated from 1 to 4 times. * * The sparse representation can't represent registers with a value greater - * than 16, however it is very unlikely that we find such a register in an + * than 32, however it is very unlikely that we find such a register in an * HLL with a cardinality where the sparse representation is still more * memory efficient than the dense representation. When this happens the * HLL is converted to the dense representation. @@ -137,33 +137,37 @@ * cardinality there is a big win in terms of space efficiency, traded * with CPU time since the sparse representation is slower to access: * - * The following table shows real-world space savings obtained: + * The following table shows average cardinality vs bytes used, 100 + * samples per cardinality (when the set was not representable because + * of registers with too big value, the dense representation size was used + * as a sample). * - * cardinality 1: 5 bytes (0.00244140625 bits/reg, 1 registers) - * cardinality 10: 31 bytes (0.01513671875 bits/reg, 10 registers) - * cardinality 100: 271 bytes (0.13232421875 bits/reg, 100 registers) - * cardinality 1000: 1906 bytes (0.9306640625 bits/reg, 971 registers) - * cardinality 2000: 3517 bytes (1.71728515625 bits/reg, 1888 registers) - * cardinality 3000: 4918 bytes (2.4013671875 bits/reg, 2745 registers) - * cardinality 4000: 6129 bytes (2.99267578125 bits/reg, 3552 registers) - * cardinality 5000: 7206 bytes (3.5185546875 bits/reg, 4297 registers) - * cardinality 6000: 8099 bytes (3.95458984375 bits/reg, 5013 registers) - * cardinality 7000: 8868 bytes (4.330078125 bits/reg, 5673 registers) - * cardinality 8000: 9571 bytes (4.67333984375 bits/reg, 6312 registers) - * cardinality 9000: 10138 bytes (4.9501953125 bits/reg, 6901 registers) - * cardinality 10000: 10717 bytes (5.23291015625 bits/reg, 7473 registers}) - * cardinality 11000: 11137 bytes (5.43798828125 bits/reg, 8005 registers}) - * cardinality 12000: 11514 bytes (5.6220703125 bits/reg, 8517 registers}) - * cardinality 13000: 11809 bytes (5.76611328125 bits/reg, 8962 registers}) - * cardinality 14000: 12055 bytes (5.88623046875 bits/reg, 9384 registers}) - * cardinality 15000: 12285 bytes (5.99853515625 bits/reg, 9790 registers}) - * cardinality 16000: 12459 bytes (6.08349609375 bits/reg, 10180 registers}) + * 100 267 + * 200 485 + * 300 678 + * 400 859 + * 500 1033 + * 600 1205 + * 700 1375 + * 800 1544 + * 900 1713 + * 1000 1882 + * 2000 3480 + * 3000 4879 + * 4000 6089 + * 5000 7138 + * 6000 8042 + * 7000 8823 + * 8000 9500 + * 9000 10088 + * 10000 10591 * - * At cardinality around ~16000 is when it is no longer more space efficient - * to use the sparse representation. However the exact maximum length of the - * sparse representation when this implementation switches to the dense - * representation is configured via the define REDIS_HLL_SPARSE_MAX and - * can be smaller than 12k in order to save CPU time. + * The dense representation uses 12288 bytes, so there is a big win up to + * a cardinality of ~2000-3000. For bigger cardinalities the constant times + * involved in updating the sparse representation is not justified by the + * memory savings. The exact maximum length of the sparse representation + * when this implementation switches to the dense representation is + * configured via the define REDIS_HLL_SPARSE_MAX. */ #define REDIS_HLL_P 14 /* The greater is P, the smaller the error. */ @@ -332,11 +336,11 @@ * The macros parameter is expected to be an uint8_t pointer. */ #define HLL_SPARSE_IS_ZERO(p) (((*p) & 0xc0) == 0) /* 00xxxxxx */ #define HLL_SPARSE_IS_XZERO(p) (((*p) & 0xc0) == 0x40) /* 01xxxxxx */ -#define HLL_SPARSE_IS_VAL(p) ((*p) & 0x80) /* 1vvvvxxx */ +#define HLL_SPARSE_IS_VAL(p) ((*p) & 0x80) /* 1vvvvvxx */ #define HLL_SPARSE_ZERO_LEN(p) ((*p) & 0x3f) #define HLL_SPARSE_XZERO_LEN(p) ((((*p) & 0x3f) << 6) | (*p)) -#define HLL_SPARSE_VAL_VALUE(p) (((*p) >> 3) & 0xf) -#define HLL_SPARSE_VAL_LEN(p) ((*p) & 0x7) +#define HLL_SPARSE_VAL_VALUE(p) (((*p) >> 2) & 0x1f) +#define HLL_SPARSE_VAL_LEN(p) ((*p) & 0x3) /* ========================= HyperLogLog algorithm ========================= */ From 4a86f708475894038470e67d381fd52b5cb1ccd1 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 11 Apr 2014 09:26:45 +0200 Subject: [PATCH 04/58] HyperLogLog refactoring to support different encodings. Metadata are now placed at the start of the representation as an header. There is a proper structure to access the representation. Still work to do in order to truly abstract the implementation from the representation, commands still work assuming dense representation. --- src/hyperloglog.c | 236 ++++++++++++++++++++++++++-------------------- 1 file changed, 136 insertions(+), 100 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 6cc627461..98ea4f006 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -61,33 +61,42 @@ * for representing HyperLogLogs with many registers set to 0 in * a memory efficient way. * + * + * HLL header + * === + * + * Both the dense and sparse representation have a 16 byte header as follows: + * + * +------+---+-----+----------+ + * | HYLL | E | N/U | Cardin. | + * +------+---+-----+----------+ + * + * The first 4 bytes are a magic string set to the bytes "HYLL". + * "E" is one byte encoding, currently set to HLL_DENSE or + * HLL_SPARSE. N/U are three not used bytes. + * + * The "Cardin." field is a 64 bit integer stored in little endian format + * with the latest cardinality computed that can be reused if the data + * structure was not modified since the last computation (this is useful + * because there are high probabilities that HLLADD operations don't + * modify the actual data structure and hence the approximated cardinality). + * + * When the most significant bit in the most significant byte of the cached + * cardinality is set, it means that the data structure was modified and + * we can't reuse the cached value that must be recomputed. + * * Dense representation * === * * The dense representation used by Redis is the following: * - * +--------+--------+--------+------// //--+----------+------+-----+ - * |11000000|22221111|33333322|55444444 .... | uint64_t | HYLL | Ver | - * +--------+--------+--------+------// //--+----------+------+-----+ + * +--------+--------+--------+------// //--+ + * |11000000|22221111|33333322|55444444 .... | + * +--------+--------+--------+------// //--+ * * The 6 bits counters are encoded one after the other starting from the * LSB to the MSB, and using the next bytes as needed. * - * At the end of the 16k counters, there is an additional 64 bit integer - * stored in little endian format with the latest cardinality computed that - * can be reused if the data structure was not modified since the last - * computation (this is useful because there are high probabilities that - * HLLADD operations don't modify the actual data structure and hence the - * approximated cardinality). - * - * After the cached cardinality there are 4 bytes of magic set to the - * string "HYLL", and a 4 bytes version field that is reserved for - * future uses and is currently set to 0. - * - * When the most significant bit in the most significant byte of the cached - * cardinality is set, it means that the data structure was modified and - * we can't reuse the cached value that must be recomputed. - * * Sparse representation * === * @@ -167,17 +176,31 @@ * involved in updating the sparse representation is not justified by the * memory savings. The exact maximum length of the sparse representation * when this implementation switches to the dense representation is - * configured via the define REDIS_HLL_SPARSE_MAX. + * configured via the define HLL_SPARSE_MAX. */ -#define REDIS_HLL_P 14 /* The greater is P, the smaller the error. */ -#define REDIS_HLL_REGISTERS (1<card[0] |= (1<<7) +#define HLL_VALID_CACHE(hdr) (((hdr)->card[0] & (1<<7)) == 0) + +#define HLL_P 14 /* The greater is P, the smaller the error. */ +#define HLL_REGISTERS (1<> _fb) | (b1 << _fb8)) & REDIS_HLL_REGISTER_MAX; \ + target = ((b0 >> _fb) | (b1 << _fb8)) & HLL_REGISTER_MAX; \ } while(0) /* Set the value of the register at position 'regnum' to 'val'. * 'p' is an array of unsigned bytes. */ #define HLL_SET_REGISTER(p,regnum,val) do { \ uint8_t *_p = (uint8_t*) p; \ - unsigned long _byte = regnum*REDIS_HLL_BITS/8; \ - unsigned long _fb = regnum*REDIS_HLL_BITS&7; \ + unsigned long _byte = regnum*HLL_BITS/8; \ + unsigned long _fb = regnum*HLL_BITS&7; \ unsigned long _fb8 = 8 - _fb; \ unsigned long _v = val; \ - _p[_byte] &= ~(REDIS_HLL_REGISTER_MAX << _fb); \ + _p[_byte] &= ~(HLL_REGISTER_MAX << _fb); \ _p[_byte] |= _v << _fb; \ - _p[_byte+1] &= ~(REDIS_HLL_REGISTER_MAX >> _fb8); \ + _p[_byte+1] &= ~(HLL_REGISTER_MAX >> _fb8); \ _p[_byte+1] |= _v >> _fb8; \ } while(0) @@ -399,7 +422,7 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) { * Actually nothing is added, but the max 0 pattern counter of the subset * the element belongs to is incremented if needed. * - * 'registers' is expected to have room for REDIS_HLL_REGISTERS plus an + * 'registers' is expected to have room for HLL_REGISTERS plus an * additional byte on the right. This requirement is met by sds strings * automatically since they are implicitly null terminated. * @@ -410,7 +433,7 @@ int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { uint64_t hash, bit, index; uint8_t oldcount, count; - /* Count the number of zeroes starting from bit REDIS_HLL_REGISTERS + /* Count the number of zeroes starting from bit HLL_REGISTERS * (that is a power of two corresponding to the first bit we don't use * as index). The max run can be 64-P+1 bits. * @@ -423,7 +446,7 @@ int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { * there are high probabilities to find a 1 after a few iterations. */ hash = MurmurHash64A(ele,elesize,0xadc83b19ULL); hash |= ((uint64_t)1<<63); /* Make sure the loop terminates. */ - bit = REDIS_HLL_REGISTERS; /* First bit not used to address the register. */ + bit = HLL_REGISTERS; /* First bit not used to address the register. */ count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */ while((hash & bit) == 0) { count++; @@ -431,7 +454,7 @@ int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { } /* Update the register if this element produced a longer run of zeroes. */ - index = hash & REDIS_HLL_P_MASK; /* Index a register inside registers. */ + index = hash & HLL_P_MASK; /* Index a register inside registers. */ HLL_GET_REGISTER(oldcount,registers,index); if (count > oldcount) { HLL_SET_REGISTER(registers,index,count); @@ -444,7 +467,7 @@ int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { /* Return the approximated cardinality of the set based on the armonic * mean of the registers values. */ uint64_t hllCount(uint8_t *registers) { - double m = REDIS_HLL_REGISTERS; + double m = HLL_REGISTERS; double alpha = 0.7213/(1+1.079/m); double E = 0; int ez = 0; /* Number of registers equal to 0. */ @@ -467,7 +490,7 @@ uint64_t hllCount(uint8_t *registers) { * Redis default is to use 16384 registers 6 bits each. The code works * with other values by modifying the defines, but for our target value * we take a faster path with unrolled loops. */ - if (REDIS_HLL_REGISTERS == 16384 && REDIS_HLL_BITS == 6) { + if (HLL_REGISTERS == 16384 && HLL_BITS == 6) { uint8_t *r = registers; unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; @@ -499,7 +522,7 @@ uint64_t hllCount(uint8_t *registers) { r += 12; } } else { - for (j = 0; j < REDIS_HLL_REGISTERS; j++) { + for (j = 0; j < HLL_REGISTERS; j++) { unsigned long reg; HLL_GET_REGISTER(reg,registers,j); @@ -552,35 +575,49 @@ robj *createHLLObject(void) { /* Create a string of the right size filled with zero bytes. * Note that the cached cardinality is set to 0 as a side effect * that is exactly the cardinality of an empty HLL. */ - o = createObject(REDIS_STRING,sdsnewlen(NULL,REDIS_HLL_SIZE)); + o = createObject(REDIS_STRING,sdsnewlen(NULL,HLL_DENSE_SIZE)); p = o->ptr; - memcpy(p+REDIS_HLL_SIZE-8,"HYLL",4); + memcpy(p,"HYLL",4); return o; } -/* Check if the object is a String of REDIS_HLL_SIZE bytes. +/* Check if the object is a String with a valid HLL representation. * Return REDIS_OK if this is true, otherwise reply to the client * with an error and return REDIS_ERR. */ int isHLLObjectOrReply(redisClient *c, robj *o) { + struct hllhdr *hdr; + /* Key exists, check type */ if (checkType(c,o,REDIS_STRING)) return REDIS_ERR; /* Error already sent. */ - /* If this is a string representing an HLL, the size should match - * exactly. */ - if (stringObjectLen(o) != REDIS_HLL_SIZE) { - addReplySds(c, - sdsnew("-WRONGTYPE Key is not a valid " - "HyperLogLog string value.\r\n")); - return REDIS_ERR; - } + if (stringObjectLen(o) < sizeof(*hdr)) goto invalid; + hdr = o->ptr; + + /* Magic should be "HYLL". */ + if (hdr->magic[0] != 'H' || hdr->magic[1] != 'Y' || + hdr->magic[2] != 'L' || hdr->magic[3] != 'L') goto invalid; + + if (hdr->encoding > HLL_MAX_ENCODING) goto invalid; + + /* Dense representation string length should match exactly. */ + if (hdr->encoding == HLL_DENSE && + stringObjectLen(o) != HLL_DENSE_SIZE) goto invalid; + + /* All tests passed. */ return REDIS_OK; + +invalid: + addReplySds(c, + sdsnew("-WRONGTYPE Key is not a valid " + "HyperLogLog string value.\r\n")); + return REDIS_ERR; } /* PFADD var ele ele ele ... ele => :0 or :1 */ void pfaddCommand(redisClient *c) { robj *o = lookupKeyWrite(c->db,c->argv[1]); - uint8_t *registers; + struct hllhdr *hdr; int updated = 0, j; if (o == NULL) { @@ -595,9 +632,9 @@ void pfaddCommand(redisClient *c) { o = dbUnshareStringValue(c->db,c->argv[1],o); } /* Perform the low level ADD operation for every element. */ - registers = o->ptr; + hdr = o->ptr; for (j = 2; j < c->argc; j++) { - if (hllAdd(registers, (unsigned char*)c->argv[j]->ptr, + if (hllAdd(hdr->registers, (unsigned char*)c->argv[j]->ptr, sdslen(c->argv[j]->ptr))) { updated++; @@ -607,8 +644,7 @@ void pfaddCommand(redisClient *c) { signalModifiedKey(c->db,c->argv[1]); notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"pfadd",c->argv[1],c->db->id); server.dirty++; - /* Invalidate the cached cardinality. */ - registers[REDIS_HLL_SIZE-9] |= (1<<7); + HLL_INVALIDATE_CACHE(hdr); } addReply(c, updated ? shared.cone : shared.czero); } @@ -616,7 +652,7 @@ void pfaddCommand(redisClient *c) { /* PFCOUNT var -> approximated cardinality of set. */ void pfcountCommand(redisClient *c) { robj *o = lookupKeyRead(c->db,c->argv[1]); - uint8_t *registers; + struct hllhdr *hdr; uint64_t card; if (o == NULL) { @@ -628,28 +664,28 @@ void pfcountCommand(redisClient *c) { o = dbUnshareStringValue(c->db,c->argv[1],o); /* Check if the cached cardinality is valid. */ - registers = o->ptr; - if ((registers[REDIS_HLL_SIZE-9] & (1<<7)) == 0) { + hdr = o->ptr; + if (HLL_VALID_CACHE(hdr)) { /* Just return the cached value. */ - card = (uint64_t)registers[REDIS_HLL_SIZE-16]; - card |= (uint64_t)registers[REDIS_HLL_SIZE-15] << 8; - card |= (uint64_t)registers[REDIS_HLL_SIZE-14] << 16; - card |= (uint64_t)registers[REDIS_HLL_SIZE-13] << 24; - card |= (uint64_t)registers[REDIS_HLL_SIZE-12] << 32; - card |= (uint64_t)registers[REDIS_HLL_SIZE-11] << 40; - card |= (uint64_t)registers[REDIS_HLL_SIZE-10] << 48; - card |= (uint64_t)registers[REDIS_HLL_SIZE-9] << 56; + card = (uint64_t)hdr->card[0]; + card |= (uint64_t)hdr->card[1] << 8; + card |= (uint64_t)hdr->card[2] << 16; + card |= (uint64_t)hdr->card[3] << 24; + card |= (uint64_t)hdr->card[4] << 32; + card |= (uint64_t)hdr->card[5] << 40; + card |= (uint64_t)hdr->card[6] << 48; + card |= (uint64_t)hdr->card[7] << 56; } else { /* Recompute it and update the cached value. */ - card = hllCount(registers); - registers[REDIS_HLL_SIZE-16] = card & 0xff; - registers[REDIS_HLL_SIZE-15] = (card >> 8) & 0xff; - registers[REDIS_HLL_SIZE-14] = (card >> 16) & 0xff; - registers[REDIS_HLL_SIZE-13] = (card >> 24) & 0xff; - registers[REDIS_HLL_SIZE-12] = (card >> 32) & 0xff; - registers[REDIS_HLL_SIZE-11] = (card >> 40) & 0xff; - registers[REDIS_HLL_SIZE-10] = (card >> 48) & 0xff; - registers[REDIS_HLL_SIZE-9] = (card >> 56) & 0xff; + card = hllCount(hdr->registers); + hdr->card[0] = card & 0xff; + hdr->card[1] = (card >> 8) & 0xff; + hdr->card[2] = (card >> 16) & 0xff; + hdr->card[3] = (card >> 24) & 0xff; + hdr->card[4] = (card >> 32) & 0xff; + hdr->card[5] = (card >> 40) & 0xff; + hdr->card[6] = (card >> 48) & 0xff; + hdr->card[7] = (card >> 56) & 0xff; /* This is not considered a read-only command even if the * data structure is not modified, since the cached value * may be modified and given that the HLL is a Redis string @@ -663,8 +699,8 @@ void pfcountCommand(redisClient *c) { /* PFMERGE dest src1 src2 src3 ... srcN => OK */ void pfmergeCommand(redisClient *c) { - uint8_t max[REDIS_HLL_REGISTERS]; - uint8_t *registers; + uint8_t max[HLL_REGISTERS]; + struct hllhdr *hdr; int j, i; /* Compute an HLL with M[i] = MAX(M[i]_j). @@ -681,9 +717,9 @@ void pfmergeCommand(redisClient *c) { /* Merge with this HLL with our 'max' HHL by setting max[i] * to MAX(max[i],hll[i]). */ - registers = o->ptr; - for (i = 0; i < REDIS_HLL_REGISTERS; i++) { - HLL_GET_REGISTER(val,registers,i); + hdr = o->ptr; + for (i = 0; i < HLL_REGISTERS; i++) { + HLL_GET_REGISTER(val,hdr->registers,i); if (val > max[i]) max[i] = val; } } @@ -705,11 +741,11 @@ void pfmergeCommand(redisClient *c) { /* Write the resulting HLL to the destination HLL registers and * invalidate the cached value. */ - registers = o->ptr; - for (j = 0; j < REDIS_HLL_REGISTERS; j++) { - HLL_SET_REGISTER(registers,j,max[j]); + hdr = o->ptr; + for (j = 0; j < HLL_REGISTERS; j++) { + HLL_SET_REGISTER(hdr->registers,j,max[j]); } - registers[REDIS_HLL_SIZE-9] |= (1<<7); + HLL_INVALIDATE_CACHE(hdr); signalModifiedKey(c->db,c->argv[1]); /* We generate an HLLADD event for HLLMERGE for semantical simplicity @@ -724,27 +760,27 @@ void pfmergeCommand(redisClient *c) { /* PFSELFTEST * This command performs a self-test of the HLL registers implementation. * Something that is not easy to test from within the outside. */ -#define REDIS_HLL_TEST_CYCLES 1000 +#define HLL_TEST_CYCLES 1000 void pfselftestCommand(redisClient *c) { int j, i; - sds bitcounters = sdsnewlen(NULL,REDIS_HLL_SIZE); - uint8_t bytecounters[REDIS_HLL_REGISTERS]; + sds bitcounters = sdsnewlen(NULL,HLL_DENSE_SIZE); + uint8_t bytecounters[HLL_REGISTERS]; /* Test 1: access registers. * The test is conceived to test that the different counters of our data * structure are accessible and that setting their values both result in * the correct value to be retained and not affect adjacent values. */ - for (j = 0; j < REDIS_HLL_TEST_CYCLES; j++) { + for (j = 0; j < HLL_TEST_CYCLES; j++) { /* Set the HLL counters and an array of unsigned byes of the * same size to the same set of random values. */ - for (i = 0; i < REDIS_HLL_REGISTERS; i++) { - unsigned int r = rand() & REDIS_HLL_REGISTER_MAX; + for (i = 0; i < HLL_REGISTERS; i++) { + unsigned int r = rand() & HLL_REGISTER_MAX; bytecounters[i] = r; HLL_SET_REGISTER(bitcounters,i,r); } /* Check that we are able to retrieve the same values. */ - for (i = 0; i < REDIS_HLL_REGISTERS; i++) { + for (i = 0; i < HLL_REGISTERS; i++) { unsigned int val; HLL_GET_REGISTER(val,bitcounters,i); @@ -764,8 +800,8 @@ void pfselftestCommand(redisClient *c) { * We check that the error is smaller than 4 times than the expected * standard error, to make it very unlikely for the test to fail because * of a "bad" run. */ - memset(bitcounters,0,REDIS_HLL_SIZE); - double relerr = 1.04/sqrt(REDIS_HLL_REGISTERS); + memset(bitcounters,0,HLL_DENSE_SIZE); + double relerr = 1.04/sqrt(HLL_REGISTERS); int64_t checkpoint = 1000; uint64_t seed = (uint64_t)rand() | (uint64_t)rand() << 32; uint64_t ele; @@ -798,7 +834,7 @@ cleanup: * Return the registers values of the specified HLL. */ void pfgetregCommand(redisClient *c) { robj *o = lookupKeyRead(c->db,c->argv[1]); - uint8_t *registers; + struct hllhdr *hdr; int j; if (o == NULL) { @@ -807,12 +843,12 @@ void pfgetregCommand(redisClient *c) { } else { if (isHLLObjectOrReply(c,o) != REDIS_OK) return; - registers = o->ptr; - addReplyMultiBulkLen(c,REDIS_HLL_REGISTERS); - for (j = 0; j < REDIS_HLL_REGISTERS; j++) { + hdr = o->ptr; + addReplyMultiBulkLen(c,HLL_REGISTERS); + for (j = 0; j < HLL_REGISTERS; j++) { uint8_t val; - HLL_GET_REGISTER(val,registers,j); + HLL_GET_REGISTER(val,hdr->registers,j); addReplyLongLong(c,val); } } From c9405002fcb9caacc59c3bbd1c6b1d0a15144579 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 11 Apr 2014 09:47:52 +0200 Subject: [PATCH 05/58] hllAdd() refactored into two functions. Also dense representation access macro renamed accordingly. --- src/hyperloglog.c | 59 ++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 98ea4f006..32d2bbd91 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -331,7 +331,7 @@ struct hllhdr { /* Store the value of the register at position 'regnum' into variable 'target'. * 'p' is an array of unsigned bytes. */ -#define HLL_GET_REGISTER(target,p,regnum) do { \ +#define HLL_DENSE_GET_REGISTER(target,p,regnum) do { \ uint8_t *_p = (uint8_t*) p; \ unsigned long _byte = regnum*HLL_BITS/8; \ unsigned long _fb = regnum*HLL_BITS&7; \ @@ -343,7 +343,7 @@ struct hllhdr { /* Set the value of the register at position 'regnum' to 'val'. * 'p' is an array of unsigned bytes. */ -#define HLL_SET_REGISTER(p,regnum,val) do { \ +#define HLL_DENSE_SET_REGISTER(p,regnum,val) do { \ uint8_t *_p = (uint8_t*) p; \ unsigned long _byte = regnum*HLL_BITS/8; \ unsigned long _fb = regnum*HLL_BITS&7; \ @@ -418,20 +418,12 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) { return h; } -/* "Add" the element in the hyperloglog data structure. - * Actually nothing is added, but the max 0 pattern counter of the subset - * the element belongs to is incremented if needed. - * - * 'registers' is expected to have room for HLL_REGISTERS plus an - * additional byte on the right. This requirement is met by sds strings - * automatically since they are implicitly null terminated. - * - * The function always succeed, however if as a result of the operation - * the approximated cardinality changed, 1 is returned. Otherwise 0 - * is returned. */ -int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { +/* Given a string element to add to the HyperLogLog, returns the length + * of the pattern 000..1 of the element hash. As a side effect 'regp' is + * set to the register index this element hashes to. */ +int hllPatLen(unsigned char *ele, size_t elesize, int *regp) { uint64_t hash, bit, index; - uint8_t oldcount, count; + int count; /* Count the number of zeroes starting from bit HLL_REGISTERS * (that is a power of two corresponding to the first bit we don't use @@ -445,6 +437,7 @@ int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { * This may sound like inefficient, but actually in the average case * there are high probabilities to find a 1 after a few iterations. */ hash = MurmurHash64A(ele,elesize,0xadc83b19ULL); + index = hash & HLL_P_MASK; /* Register index. */ hash |= ((uint64_t)1<<63); /* Make sure the loop terminates. */ bit = HLL_REGISTERS; /* First bit not used to address the register. */ count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */ @@ -452,12 +445,30 @@ int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { count++; bit <<= 1; } + if (regp) *regp = (int) index; + return count; +} + +/* "Add" the element in the hyperloglog data structure. + * Actually nothing is added, but the max 0 pattern counter of the subset + * the element belongs to is incremented if needed. + * + * 'registers' is expected to have room for HLL_REGISTERS plus an + * additional byte on the right. This requirement is met by sds strings + * automatically since they are implicitly null terminated. + * + * The function always succeed, however if as a result of the operation + * the approximated cardinality changed, 1 is returned. Otherwise 0 + * is returned. */ +int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { + uint8_t oldcount, count; + int index; /* Update the register if this element produced a longer run of zeroes. */ - index = hash & HLL_P_MASK; /* Index a register inside registers. */ - HLL_GET_REGISTER(oldcount,registers,index); + count = hllPatLen(ele,elesize,&index); + HLL_DENSE_GET_REGISTER(oldcount,registers,index); if (count > oldcount) { - HLL_SET_REGISTER(registers,index,count); + HLL_DENSE_SET_REGISTER(registers,index,count); return 1; } else { return 0; @@ -525,7 +536,7 @@ uint64_t hllCount(uint8_t *registers) { for (j = 0; j < HLL_REGISTERS; j++) { unsigned long reg; - HLL_GET_REGISTER(reg,registers,j); + HLL_DENSE_GET_REGISTER(reg,registers,j); if (reg == 0) { ez++; E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ @@ -719,7 +730,7 @@ void pfmergeCommand(redisClient *c) { * to MAX(max[i],hll[i]). */ hdr = o->ptr; for (i = 0; i < HLL_REGISTERS; i++) { - HLL_GET_REGISTER(val,hdr->registers,i); + HLL_DENSE_GET_REGISTER(val,hdr->registers,i); if (val > max[i]) max[i] = val; } } @@ -743,7 +754,7 @@ void pfmergeCommand(redisClient *c) { * invalidate the cached value. */ hdr = o->ptr; for (j = 0; j < HLL_REGISTERS; j++) { - HLL_SET_REGISTER(hdr->registers,j,max[j]); + HLL_DENSE_SET_REGISTER(hdr->registers,j,max[j]); } HLL_INVALIDATE_CACHE(hdr); @@ -777,13 +788,13 @@ void pfselftestCommand(redisClient *c) { unsigned int r = rand() & HLL_REGISTER_MAX; bytecounters[i] = r; - HLL_SET_REGISTER(bitcounters,i,r); + HLL_DENSE_SET_REGISTER(bitcounters,i,r); } /* Check that we are able to retrieve the same values. */ for (i = 0; i < HLL_REGISTERS; i++) { unsigned int val; - HLL_GET_REGISTER(val,bitcounters,i); + HLL_DENSE_GET_REGISTER(val,bitcounters,i); if (val != bytecounters[i]) { addReplyErrorFormat(c, "TESTFAILED Register %d should be %d but is %d", @@ -848,7 +859,7 @@ void pfgetregCommand(redisClient *c) { for (j = 0; j < HLL_REGISTERS; j++) { uint8_t val; - HLL_GET_REGISTER(val,hdr->registers,j); + HLL_DENSE_GET_REGISTER(val,hdr->registers,j); addReplyLongLong(c,val); } } From 0bce12ac5c18a717cce5b309adbcb5d62278b50f Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 11 Apr 2014 10:25:07 +0200 Subject: [PATCH 06/58] hllCount() refactored to support multiple representations. --- src/hyperloglog.c | 96 ++++++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 34 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 32d2bbd91..0cd92a0a5 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -445,11 +445,13 @@ int hllPatLen(unsigned char *ele, size_t elesize, int *regp) { count++; bit <<= 1; } - if (regp) *regp = (int) index; + *regp = (int) index; return count; } -/* "Add" the element in the hyperloglog data structure. +/* ================== Dense representation implementation ================== */ + +/* "Add" the element in the dense hyperloglog data structure. * Actually nothing is added, but the max 0 pattern counter of the subset * the element belongs to is incremented if needed. * @@ -460,7 +462,7 @@ int hllPatLen(unsigned char *ele, size_t elesize, int *regp) { * The function always succeed, however if as a result of the operation * the approximated cardinality changed, 1 is returned. Otherwise 0 * is returned. */ -int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { +int hllDenseAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { uint8_t oldcount, count; int index; @@ -475,30 +477,15 @@ int hllAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { } } -/* Return the approximated cardinality of the set based on the armonic - * mean of the registers values. */ -uint64_t hllCount(uint8_t *registers) { - double m = HLL_REGISTERS; - double alpha = 0.7213/(1+1.079/m); +/* Compute SUM(2^-reg) in the dense representation. + * PE is an array with a pre-computer table of values 2^-reg indexed by reg. + * As a side effect the integer pointed by 'ezp' is set to the number + * of zero registers. */ +double hllDenseSum(uint8_t *registers, double *PE, int *ezp) { double E = 0; - int ez = 0; /* Number of registers equal to 0. */ - int j; + int j, ez = 0; - /* We precompute 2^(-reg[j]) in a small table in order to - * speedup the computation of SUM(2^-register[0..i]). */ - static int initialized = 0; - static double PE[64]; - if (!initialized) { - PE[0] = 1; /* 2^(-reg[j]) is 1 when m is 0. */ - for (j = 1; j < 64; j++) { - /* 2^(-reg[j]) is the same as 1/2^reg[j]. */ - PE[j] = 1.0/(1ULL << j); - } - initialized = 1; - } - - /* Compute SUM(2^-register[0..i]). - * Redis default is to use 16384 registers 6 bits each. The code works + /* Redis default is to use 16384 registers 6 bits each. The code works * with other values by modifying the defines, but for our target value * we take a faster path with unrolled loops. */ if (HLL_REGISTERS == 16384 && HLL_BITS == 6) { @@ -545,6 +532,47 @@ uint64_t hllCount(uint8_t *registers) { } } } + *ezp = ez; + return E; +} + +/* ================== Sparse representation implementation ================= */ + +/* ========================= HyperLogLog Count ============================== + * This is the core of the algorithm where the approximated count is computed. + * The function uses the lower level hllDenseSum() and hllSparseSum() functions + * as helpers to compute the SUM(2^-reg) part of the computation, which is + * representation-specific, while all the rest is common. */ + +/* Return the approximated cardinality of the set based on the armonic + * mean of the registers values. */ +uint64_t hllCount(struct hllhdr *hdr) { + double m = HLL_REGISTERS; + double alpha = 0.7213/(1+1.079/m); + double E; + int ez; /* Number of registers equal to 0. */ + int j; + + /* We precompute 2^(-reg[j]) in a small table in order to + * speedup the computation of SUM(2^-register[0..i]). */ + static int initialized = 0; + static double PE[64]; + if (!initialized) { + PE[0] = 1; /* 2^(-reg[j]) is 1 when m is 0. */ + for (j = 1; j < 64; j++) { + /* 2^(-reg[j]) is the same as 1/2^reg[j]. */ + PE[j] = 1.0/(1ULL << j); + } + initialized = 1; + } + + /* Compute SUM(2^-register[0..i]). */ + if (hdr->encoding == HLL_DENSE) { + E = hllDenseSum(hdr->registers,PE,&ez); + } else { + E = 0; /* FIXME */ + } + /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ E = (1/E)*alpha*m*m; @@ -645,8 +673,8 @@ void pfaddCommand(redisClient *c) { /* Perform the low level ADD operation for every element. */ hdr = o->ptr; for (j = 2; j < c->argc; j++) { - if (hllAdd(hdr->registers, (unsigned char*)c->argv[j]->ptr, - sdslen(c->argv[j]->ptr))) + if (hllDenseAdd(hdr->registers, (unsigned char*)c->argv[j]->ptr, + sdslen(c->argv[j]->ptr))) { updated++; } @@ -688,7 +716,7 @@ void pfcountCommand(redisClient *c) { card |= (uint64_t)hdr->card[7] << 56; } else { /* Recompute it and update the cached value. */ - card = hllCount(hdr->registers); + card = hllCount(hdr); hdr->card[0] = card & 0xff; hdr->card[1] = (card >> 8) & 0xff; hdr->card[2] = (card >> 16) & 0xff; @@ -775,6 +803,7 @@ void pfmergeCommand(redisClient *c) { void pfselftestCommand(redisClient *c) { int j, i; sds bitcounters = sdsnewlen(NULL,HLL_DENSE_SIZE); + struct hllhdr *hdr = (struct hllhdr*) bitcounters; uint8_t bytecounters[HLL_REGISTERS]; /* Test 1: access registers. @@ -788,13 +817,13 @@ void pfselftestCommand(redisClient *c) { unsigned int r = rand() & HLL_REGISTER_MAX; bytecounters[i] = r; - HLL_DENSE_SET_REGISTER(bitcounters,i,r); + HLL_DENSE_SET_REGISTER(hdr->registers,i,r); } /* Check that we are able to retrieve the same values. */ for (i = 0; i < HLL_REGISTERS; i++) { unsigned int val; - HLL_DENSE_GET_REGISTER(val,bitcounters,i); + HLL_DENSE_GET_REGISTER(val,hdr->registers,i); if (val != bytecounters[i]) { addReplyErrorFormat(c, "TESTFAILED Register %d should be %d but is %d", @@ -811,17 +840,16 @@ void pfselftestCommand(redisClient *c) { * We check that the error is smaller than 4 times than the expected * standard error, to make it very unlikely for the test to fail because * of a "bad" run. */ - memset(bitcounters,0,HLL_DENSE_SIZE); + memset(hdr->registers,0,HLL_DENSE_SIZE-HLL_HDR_SIZE); double relerr = 1.04/sqrt(HLL_REGISTERS); int64_t checkpoint = 1000; uint64_t seed = (uint64_t)rand() | (uint64_t)rand() << 32; uint64_t ele; for (j = 1; j <= 10000000; j++) { ele = j ^ seed; - hllAdd((uint8_t*)bitcounters,(unsigned char*)&ele,sizeof(ele)); + hllDenseAdd(hdr->registers,(unsigned char*)&ele,sizeof(ele)); if (j == checkpoint) { - int64_t abserr = checkpoint- - (int64_t)hllCount((uint8_t*)bitcounters); + int64_t abserr = checkpoint- (int64_t)hllCount(hdr); if (abserr < 0) abserr = -abserr; if (abserr > (uint64_t)(relerr*4*checkpoint)) { addReplyErrorFormat(c, From b609192ddf44fa693223208d0e3fa7307828c5ce Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 11 Apr 2014 17:27:40 +0200 Subject: [PATCH 07/58] HyperLogLog sparse representation initial implementation. Code never tested, but the basic layout is shaped in this commit. Also missing: 1) Sparse -> Dense conversion function. 2) New HLL object creation using the sparse representation. 3) Implementation of PFMERGE for the sparse representation. --- src/hyperloglog.c | 278 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 269 insertions(+), 9 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 0cd92a0a5..a95a21c91 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -112,7 +112,7 @@ * XZERO opcode is represented by two bytes 01xxxxxx yyyyyyyy. The 14-bit * integer represented by the bits 'xxxxxx' as most significant bits and * 'yyyyyyyy' as least significant bits, plus 1, means that there are N - * registers set to 0. This opcode can represent from 65 to 16384 contiguous + * registers set to 0. This opcode can represent from 0 to 16384 contiguous * registers set to the value of 0. * * VAL opcode is represented as 1vvvvvxx. It contains a 5-bit integer @@ -357,13 +357,30 @@ struct hllhdr { /* Macros to access the sparse representation. * The macros parameter is expected to be an uint8_t pointer. */ +#define HLL_SPARSE_XZERO_BIT 0x40 /* 01xxxxxx */ +#define HLL_SPARSE_VAL_BIT 0x80 /* 1vvvvvxx */ #define HLL_SPARSE_IS_ZERO(p) (((*p) & 0xc0) == 0) /* 00xxxxxx */ -#define HLL_SPARSE_IS_XZERO(p) (((*p) & 0xc0) == 0x40) /* 01xxxxxx */ -#define HLL_SPARSE_IS_VAL(p) ((*p) & 0x80) /* 1vvvvvxx */ -#define HLL_SPARSE_ZERO_LEN(p) ((*p) & 0x3f) -#define HLL_SPARSE_XZERO_LEN(p) ((((*p) & 0x3f) << 6) | (*p)) -#define HLL_SPARSE_VAL_VALUE(p) (((*p) >> 2) & 0x1f) -#define HLL_SPARSE_VAL_LEN(p) ((*p) & 0x3) +#define HLL_SPARSE_IS_XZERO(p) (((*p) & 0xc0) == HLL_SPARSE_XZERO_BIT) +#define HLL_SPARSE_IS_VAL(p) ((*p) & HLL_SPARSE_VAL_BIT) +#define HLL_SPARSE_ZERO_LEN(p) (((*p) & 0x3f)+1) +#define HLL_SPARSE_XZERO_LEN(p) (((((*p) & 0x3f) << 6) | (*p))+1) +#define HLL_SPARSE_VAL_VALUE(p) ((((*p) >> 2) & 0x1f)+1) +#define HLL_SPARSE_VAL_LEN(p) (((*p) & 0x3)+1) +#define HLL_SPARSE_VAL_MAX_VALUE 32 +#define HLL_SPARSE_VAL_MAX_LEN 4 +#define HLL_SPARSE_ZERO_MAX_LEN 64 +#define HLL_SPARSE_XZERO_MAX_LEN 16384 +#define HLL_SPARSE_VAL_SET(p,val,len) do { \ + *(p) = (((val)-1)<<2|((len)-1))|HLL_SPARSE_VAL_BIT; \ +} while(0) +#define HLL_SPARSE_ZERO_SET(p,len) do { \ + *(p) = (len)-1; \ +} while(0) +#define HLL_SPARSE_XZERO_SET(p,len) do { \ + int _l = (len)-1; \ + *(p) = (_l>>8) | HLL_SPARSE_XZERO_BIT; \ + *(p+1) = (_l&0xff); \ +} while(0) /* ========================= HyperLogLog algorithm ========================= */ @@ -538,6 +555,248 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) { /* ================== Sparse representation implementation ================= */ +sds hllSparseToDense(sds *sparse) { + return sdsnew("TODO"); +} + +/* "Add" the element in the sparse hyperloglog data structure. + * Actually nothing is added, but the max 0 pattern counter of the subset + * the element belongs to is incremented if needed. + * + * The object 'o' is the String object holding the HLL. The function requires + * a reference to the object in order to be able to enlarge the string if + * needed. + * + * On success, the function returns 1 if the cardinality changed, or 0 + * if the register for this element was not updated. + * + * As a side effect the function may promote the HLL representation from + * sparse to dense: this happens when a register requires to be set to a value + * not representable with the sparse representation, or when the resulting + * size would be greater than HLL_SPARSE_MAX. */ +int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { + struct hllhdr *hdr; + uint8_t oldcount, count, *sparse, *end, *p, *prev, *next; + int index, first, span; + int is_zero = 0, is_xzero = 0, is_val = 0, runlen = 0; + + /* Update the register if this element produced a longer run of zeroes. */ + count = hllPatLen(ele,elesize,&index); + + /* If the count is too big to be representable by the sparse representation + * switch to dense representation. */ + if (count > HLL_SPARSE_VAL_MAX_VALUE) goto promote; + + /* When updating a sparse representation, sometimes we may need to + * enlarge the buffer for up to 3 bytes in the worst case (XZERO split + * into XZERO-VAL-XZERO). Make sure there is enough space right now + * so that the pointers we take during the execution of the function + * will be valid all the time. */ + o->ptr = sdsMakeRoomFor(o->ptr,3); + + /* Step 1: we need to locate the opcode we need to modify to check + * if a value update is actually needed. */ + sparse = p = ((uint8_t*)o->ptr) + HLL_HDR_SIZE; + end = p + sdslen(o->ptr) - HLL_HDR_SIZE; + + first = 0; + prev = NULL; /* Points to previos opcode at the end of the loop. */ + next = NULL; /* Points to the next opcode at the end of the loop. */ + while(p < end) { + /* Set span to the number of registers covered by this opcode. */ + if (HLL_SPARSE_IS_ZERO(p)) span = HLL_SPARSE_ZERO_LEN(p); + else if (HLL_SPARSE_IS_XZERO(p)) span = HLL_SPARSE_XZERO_LEN(p); + else span = HLL_SPARSE_VAL_LEN(p); + /* Break if this opcode covers the register as 'index'. */ + if (first+span >= index) break; + prev = p; + p += (HLL_SPARSE_IS_XZERO(p)) ? 2 : 1; + first += span; + } + + next = HLL_SPARSE_IS_XZERO(p) ? p+2 : p+1; + if (next >= end) next = NULL; + + /* Cache current opcode type to avoid using the macro again and + * again for something that will not change. + * Also cache the run-length of the opcode. */ + if (HLL_SPARSE_IS_ZERO(p)) { + is_zero = 1; + runlen = HLL_SPARSE_ZERO_LEN(p); + } else if (HLL_SPARSE_IS_XZERO(p)) { + is_xzero = 1; + runlen = HLL_SPARSE_XZERO_LEN(p); + } else { + is_val = 1; + runlen = HLL_SPARSE_VAL_LEN(p); + } + + /* Step 2: After the loop: + * + * 'first' stores to the index of the first register covered + * by the current opcode, which is pointed by 'p'. + * + * 'next' ad 'prev' store respectively the next and previous opcode, + * or NULL if the opcode at 'p' is respectively the last or first. + * + * 'span' is set to the number of registers covered by the current + * opcode. + * + * There are different cases in order to update the data structure + * in place without generating it from scratch: + * + * A) If it is a VAL opcode already set to a value >= our 'count' + * no update is needed, regardless of the VAL run-length field. + * In this case PFADD returns 0 since no changes are performed. + * + * B) If it is a VAL opcode with len = 1 (representing only our + * register) and the value is less than 'count', we just update it + * since this is a trivial case. */ + if (is_val) { + oldcount = HLL_SPARSE_VAL_VALUE(p); + /* Case A. */ + if (oldcount >= count) return 0; + + /* Case B. */ + if (runlen == 1) { + HLL_SPARSE_VAL_SET(p,count,1); + goto updated; + } + } + + /* C) Another trivial to handle case is a ZERO opcode with a len of 1. + * We can just replace it with a VAL opcode with our value and len of 1. */ + if (is_zero && runlen == 1) { + HLL_SPARSE_VAL_SET(p,count,1); + goto updated; + } + + /* D) General case. + * + * The other cases are more complex: our register requires to be updated + * and is either currently represented by a VAL opcode with len > 1, + * by a ZERO opcode with len > 1, or by an XZERO opcode. + * + * In those cases the original opcode must be split into muliple + * opcodes. The worst case is an XZERO split in the middle resuling into + * XZERO - VAL - XZERO, so the resulting sequence max length is + * 5 bytes. + * + * We perform the split writing the new sequence into the 'new' buffer + * with 'newlen' as length. Later the new sequence is inserted in place + * of the old one, possibly moving what is on the right a few bytes + * if the new sequence is longer than the older one. */ + uint8_t seq[5], *n = seq; + int last = first+span-1; /* Last register covered by the sequence. */ + int len; + + if (is_zero || is_xzero) { + /* Handle splitting of ZERO / XZERO. */ + if (index != first) { + len = index-first; + if (len > HLL_SPARSE_ZERO_MAX_LEN) { + HLL_SPARSE_XZERO_SET(n,len); + n += 2; + } else { + HLL_SPARSE_ZERO_SET(n,len); + n++; + } + } + HLL_SPARSE_VAL_SET(n,count,1); + n++; + if (index != last) { + len = last-index; + if (len > HLL_SPARSE_ZERO_MAX_LEN) { + HLL_SPARSE_XZERO_SET(n,len); + n += 2; + } else { + HLL_SPARSE_ZERO_SET(n,len); + n++; + } + } + } else { + /* Handle splitting of VAL. */ + int curval = HLL_SPARSE_VAL_VALUE(p); + + if (index != first) { + len = index-first; + HLL_SPARSE_VAL_SET(n,curval,len); + n++; + } + HLL_SPARSE_VAL_SET(n,count,1); + n++; + if (index != last) { + len = last-index; + HLL_SPARSE_VAL_SET(n,curval,len); + n++; + } + } + + /* Step 3: substitute the new sequence with the old one. + * + * Note that we already allocated space on the sds string + * calling sdsMakeRoomFor(). */ + int seqlen = seq-n; + int oldlen = is_xzero ? 2 : 1; + int deltalen = seqlen-oldlen; + if (deltalen && next) { + memmove(next+deltalen,next,next-sparse); + sdsIncrLen(o->ptr,deltalen); + } + memcpy(p,seq,seqlen); + +updated: + /* Step 4: Merge adjacent values if possible. + * + * The representation was updated, however the resulting representation + * may not be optimal: adjacent opcodes may be merged into a single one. + * We start from the opcode before the one we updated trying to merge + * opcodes up to the next 5 opcodes (since we need to consider the three + * opcodes resuling from the worst-case split of the updated opcode, + * plus the two opcodes at the left and right of the original one). */ + hdr = o->ptr; + HLL_INVALIDATE_CACHE(hdr); + return 1; + +promote: /* Promote to dense representation. */ + o->ptr = hllSparseToDense(o->ptr); + hdr = o->ptr; + return hllDenseAdd(hdr->registers, ele, elesize); +} + +/* Compute SUM(2^-reg) in the sparse representation. + * PE is an array with a pre-computer table of values 2^-reg indexed by reg. + * As a side effect the integer pointed by 'ezp' is set to the number + * of zero registers. */ +double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) { + double E = 0; + int ez = 0, idx = 0, runlen, regval; + uint8_t *end = sparse+sparselen, *p = sparse; + + while(p < end) { + /* Set span to the number of registers covered by this opcode. */ + if (HLL_SPARSE_IS_ZERO(p)) { + runlen = HLL_SPARSE_ZERO_LEN(p); + idx += runlen; + ez += runlen; + E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ + } else if (HLL_SPARSE_IS_XZERO(p)) { + runlen = HLL_SPARSE_XZERO_LEN(p); + idx += runlen; + ez += runlen; + E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ + } else { + runlen = HLL_SPARSE_VAL_LEN(p); + regval = HLL_SPARSE_VAL_VALUE(p); + idx += runlen; + E += PE[regval]*runlen; + } + } + redisAssert(idx == HLL_REGISTERS); + *ezp = ez; + return E; +} + /* ========================= HyperLogLog Count ============================== * This is the core of the algorithm where the approximated count is computed. * The function uses the lower level hllDenseSum() and hllSparseSum() functions @@ -545,7 +804,8 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) { * representation-specific, while all the rest is common. */ /* Return the approximated cardinality of the set based on the armonic - * mean of the registers values. */ + * mean of the registers values. 'hdr' points to the start of the SDS + * representing the String object holding the HLL representation. */ uint64_t hllCount(struct hllhdr *hdr) { double m = HLL_REGISTERS; double alpha = 0.7213/(1+1.079/m); @@ -570,7 +830,7 @@ uint64_t hllCount(struct hllhdr *hdr) { if (hdr->encoding == HLL_DENSE) { E = hllDenseSum(hdr->registers,PE,&ez); } else { - E = 0; /* FIXME */ + E = hllSparseSum(hdr->registers,sdslen((sds)hdr)-HLL_HDR_SIZE,PE,&ez); } /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ From 10b534899f829587c2201a64cedda46d1342c8ca Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 12 Apr 2014 10:55:28 +0200 Subject: [PATCH 08/58] HyperLogLog sparse to dense conversion function. --- src/hyperloglog.c | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index a95a21c91..5e5064b1b 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -202,6 +202,8 @@ struct hllhdr { #define HLL_SPARSE 1 /* Sparse encoding */ #define HLL_MAX_ENCODING 1 +#define HLL_SPARSE_MAX 3000 + /* =========================== Low level bit macros ========================= */ /* Macros to access the dense representation. @@ -555,8 +557,46 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) { /* ================== Sparse representation implementation ================= */ -sds hllSparseToDense(sds *sparse) { - return sdsnew("TODO"); +/* Convert the HLL with sparse representation given as input in its dense + * representation. Both representations are represented by SDS strings, and + * the input representation is freed as a side effect. */ +sds hllSparseToDense(sds sparse) { + sds dense; + struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse; + int idx = 0, runlen, regval; + uint8_t *p = (uint8_t*)sparse, *end = p+sdslen(sparse); + + /* Create a string of the right size filled with zero bytes. + * Note that the cached cardinality is set to 0 as a side effect + * that is exactly the cardinality of an empty HLL. */ + dense = sdsnewlen(NULL,HLL_DENSE_SIZE); + hdr = (struct hllhdr*) dense; + *hdr = *oldhdr; /* This will copy the magic and cached cardinality. */ + hdr->encoding = HLL_DENSE; + + /* Now read the sparse representation and set non-zero registers + * accordingly. */ + p += HLL_HDR_SIZE; + while(p < end) { + if (HLL_SPARSE_IS_ZERO(p)) { + runlen = HLL_SPARSE_ZERO_LEN(p); + idx += runlen; + } else if (HLL_SPARSE_IS_XZERO(p)) { + runlen = HLL_SPARSE_XZERO_LEN(p); + idx += runlen; + } else { + runlen = HLL_SPARSE_VAL_LEN(p); + regval = HLL_SPARSE_VAL_VALUE(p); + while(runlen--) { + HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval); + idx++; + } + } + } + + /* Free the old representation and return the new one. */ + sdsfree(sparse); + return dense; } /* "Add" the element in the sparse hyperloglog data structure. @@ -739,6 +779,8 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { int seqlen = seq-n; int oldlen = is_xzero ? 2 : 1; int deltalen = seqlen-oldlen; + + if (deltalen > 0 && sdslen(o->ptr) > HLL_SPARSE_MAX) goto promote; if (deltalen && next) { memmove(next+deltalen,next,next-sparse); sdsIncrLen(o->ptr,deltalen); @@ -774,7 +816,6 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) { uint8_t *end = sparse+sparselen, *p = sparse; while(p < end) { - /* Set span to the number of registers covered by this opcode. */ if (HLL_SPARSE_IS_ZERO(p)) { runlen = HLL_SPARSE_ZERO_LEN(p); idx += runlen; From 071c4d0fe53a71bfc04bf2cf665971eb86f69d80 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 12 Apr 2014 10:56:11 +0200 Subject: [PATCH 09/58] Create HyperLogLog objects with sparse encoding. --- src/hyperloglog.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 5e5064b1b..22d73ce44 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -905,19 +905,37 @@ uint64_t hllCount(struct hllhdr *hdr) { /* ========================== HyperLogLog commands ========================== */ -/* An HyperLogLog object is a string with space for 16k 6-bit integers, - * a cached 64 bit cardinality value, and a 4 byte "magic" and additional - * 4 bytes for version reserved for future use. */ +/* Create an HLL object. We always create the HLL using sparse encoding. + * This will be upgraded to the dense representation as needed. */ robj *createHLLObject(void) { robj *o; - char *p; + struct hllhdr *hdr; + sds s; + uint8_t *p; + int sparselen = HLL_HDR_SIZE + + ((HLL_REGISTERS+(HLL_SPARSE_XZERO_MAX_LEN-1)) / + HLL_SPARSE_XZERO_MAX_LEN); + int aux; - /* Create a string of the right size filled with zero bytes. - * Note that the cached cardinality is set to 0 as a side effect - * that is exactly the cardinality of an empty HLL. */ - o = createObject(REDIS_STRING,sdsnewlen(NULL,HLL_DENSE_SIZE)); - p = o->ptr; - memcpy(p,"HYLL",4); + /* Populate the sparse representation with as many XZERO opcodes as + * needed to represent all the registers. */ + aux = sparselen; + s = sdsnewlen(NULL,sparselen); + p = (uint8_t*)s + HLL_HDR_SIZE; + while(aux) { + int xzero = HLL_SPARSE_XZERO_MAX_LEN-1; + if (xzero > aux) xzero = aux; + HLL_SPARSE_XZERO_SET(p,xzero); + p += 2; + aux -= xzero; + } + redisAssert((p-(uint8_t*)s) == sparselen); + + /* Create the actual object. */ + o = createObject(REDIS_STRING,s); + hdr = o->ptr; + memcpy(hdr->magic,"HYLL",4); + hdr->encoding = HLL_SPARSE; return o; } From 6df05dcf7b494fcb1bb4f6050a06934936ed5495 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 12 Apr 2014 10:59:12 +0200 Subject: [PATCH 10/58] Fix HLL sparse object creation. The function didn't considered the fact that each XZERO opcode is two bytes. --- src/hyperloglog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 22d73ce44..5157cd8ef 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -913,8 +913,8 @@ robj *createHLLObject(void) { sds s; uint8_t *p; int sparselen = HLL_HDR_SIZE + - ((HLL_REGISTERS+(HLL_SPARSE_XZERO_MAX_LEN-1)) / - HLL_SPARSE_XZERO_MAX_LEN); + (((HLL_REGISTERS+(HLL_SPARSE_XZERO_MAX_LEN-1)) / + HLL_SPARSE_XZERO_MAX_LEN)*2); int aux; /* Populate the sparse representation with as many XZERO opcodes as From 179e37b6b061d788eff102f13d21e22ad713a1d8 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 12 Apr 2014 11:02:14 +0200 Subject: [PATCH 11/58] Increment pointer while iterating sparse HLL object. --- src/hyperloglog.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 5157cd8ef..969a86599 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -581,9 +581,11 @@ sds hllSparseToDense(sds sparse) { if (HLL_SPARSE_IS_ZERO(p)) { runlen = HLL_SPARSE_ZERO_LEN(p); idx += runlen; + p++; } else if (HLL_SPARSE_IS_XZERO(p)) { runlen = HLL_SPARSE_XZERO_LEN(p); idx += runlen; + p += 2; } else { runlen = HLL_SPARSE_VAL_LEN(p); regval = HLL_SPARSE_VAL_VALUE(p); @@ -591,6 +593,7 @@ sds hllSparseToDense(sds sparse) { HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval); idx++; } + p++; } } @@ -821,16 +824,19 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) { idx += runlen; ez += runlen; E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ + p++; } else if (HLL_SPARSE_IS_XZERO(p)) { runlen = HLL_SPARSE_XZERO_LEN(p); idx += runlen; ez += runlen; E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ + p += 2; } else { runlen = HLL_SPARSE_VAL_LEN(p); regval = HLL_SPARSE_VAL_VALUE(p); idx += runlen; E += PE[regval]*runlen; + p++; } } redisAssert(idx == HLL_REGISTERS); From 231325f260aa3e8d00fdd4a84db91afe0bc1f8a8 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 12 Apr 2014 16:37:11 +0200 Subject: [PATCH 12/58] Fix HLL sparse object creation #2. Two vars initialized to wrong values in createHLLObject(). --- src/hyperloglog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 969a86599..704c51376 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -925,11 +925,11 @@ robj *createHLLObject(void) { /* Populate the sparse representation with as many XZERO opcodes as * needed to represent all the registers. */ - aux = sparselen; + aux = HLL_REGISTERS; s = sdsnewlen(NULL,sparselen); p = (uint8_t*)s + HLL_HDR_SIZE; while(aux) { - int xzero = HLL_SPARSE_XZERO_MAX_LEN-1; + int xzero = HLL_SPARSE_XZERO_MAX_LEN; if (xzero > aux) xzero = aux; HLL_SPARSE_XZERO_SET(p,xzero); p += 2; From 2c3256769c7d1b911ff297863c3a9ac061cd4152 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 12 Apr 2014 16:46:08 +0200 Subject: [PATCH 13/58] Macro HLL_SPARSE_XZERO_LEN fixed. --- src/hyperloglog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 704c51376..beacee887 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -365,7 +365,7 @@ struct hllhdr { #define HLL_SPARSE_IS_XZERO(p) (((*p) & 0xc0) == HLL_SPARSE_XZERO_BIT) #define HLL_SPARSE_IS_VAL(p) ((*p) & HLL_SPARSE_VAL_BIT) #define HLL_SPARSE_ZERO_LEN(p) (((*p) & 0x3f)+1) -#define HLL_SPARSE_XZERO_LEN(p) (((((*p) & 0x3f) << 6) | (*p))+1) +#define HLL_SPARSE_XZERO_LEN(p) (((((*p) & 0x3f) << 8) | (*(p+1)))+1) #define HLL_SPARSE_VAL_VALUE(p) ((((*p) >> 2) & 0x1f)+1) #define HLL_SPARSE_VAL_LEN(p) (((*p) & 0x3)+1) #define HLL_SPARSE_VAL_MAX_VALUE 32 From 30476ea26f9fb18c904c8a1597e8c919201d6746 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 12 Apr 2014 16:47:50 +0200 Subject: [PATCH 14/58] hllSparseSum(): multiply 1 * runlen for zero entries. --- src/hyperloglog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index beacee887..7dfc34c19 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -823,13 +823,13 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) { runlen = HLL_SPARSE_ZERO_LEN(p); idx += runlen; ez += runlen; - E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ + E += 1*runlen; /* 2^(-reg[j]) is 1 when m is 0. */ p++; } else if (HLL_SPARSE_IS_XZERO(p)) { runlen = HLL_SPARSE_XZERO_LEN(p); idx += runlen; ez += runlen; - E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ + E += 1*runlen; /* 2^(-reg[j]) is 1 when m is 0. */ p += 2; } else { runlen = HLL_SPARSE_VAL_LEN(p); From 5c770c87eeb6f5539fba7498cbd8d3a83234c789 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 12 Apr 2014 23:42:48 +0200 Subject: [PATCH 15/58] Abstract hllSparseAdd() / hllDenseAdd() via hllAdd(). --- src/hyperloglog.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 7dfc34c19..8e0cba7ae 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -909,6 +909,16 @@ uint64_t hllCount(struct hllhdr *hdr) { return (uint64_t) E; } +/* Call hllDenseAdd() or hllSparseAdd() according to the HLL encoding. */ +int hllAdd(robj *o, unsigned char *ele, size_t elesize) { + struct hllhdr *hdr = o->ptr; + switch(hdr->encoding) { + case HLL_DENSE: return hllDenseAdd(hdr->registers,ele,elesize); + case HLL_SPARSE: return hllSparseAdd(o,ele,elesize); + default: return -1; /* Invalid representation. */ + } +} + /* ========================== HyperLogLog commands ========================== */ /* Create an HLL object. We always create the HLL using sparse encoding. @@ -996,14 +1006,19 @@ void pfaddCommand(redisClient *c) { o = dbUnshareStringValue(c->db,c->argv[1],o); } /* Perform the low level ADD operation for every element. */ - hdr = o->ptr; for (j = 2; j < c->argc; j++) { - if (hllDenseAdd(hdr->registers, (unsigned char*)c->argv[j]->ptr, - sdslen(c->argv[j]->ptr))) - { + int retval = hllAdd(o, (unsigned char*)c->argv[j]->ptr, + sdslen(c->argv[j]->ptr)); + switch(retval) { + case 1: updated++; + break; + case -1: + addReplyError(c,"Invalid HyperLogLog representation"); + return; } } + hdr = o->ptr; if (updated) { signalModifiedKey(c->db,c->argv[1]); notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"pfadd",c->argv[1],c->db->id); From c66e5e83a8c82d89cbdab0480e11feb91cb452fb Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 12 Apr 2014 23:52:36 +0200 Subject: [PATCH 16/58] Fix seqlen computation in hllSparseAdd(). --- src/hyperloglog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 8e0cba7ae..119cbab1c 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -779,7 +779,7 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { * * Note that we already allocated space on the sds string * calling sdsMakeRoomFor(). */ - int seqlen = seq-n; + int seqlen = n-seq; int oldlen = is_xzero ? 2 : 1; int deltalen = seqlen-oldlen; From 2b4f24e7466d94cdc41a2e65fa24e0dae5bf5779 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 12 Apr 2014 23:55:29 +0200 Subject: [PATCH 17/58] Fix hllSparseAdd() new sequence replacement when next is NULL. sdsIncrLen() must be called anyway even if we are replacing the last oppcode of the sparse representation. --- src/hyperloglog.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 119cbab1c..5a19443b4 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -784,10 +784,8 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { int deltalen = seqlen-oldlen; if (deltalen > 0 && sdslen(o->ptr) > HLL_SPARSE_MAX) goto promote; - if (deltalen && next) { - memmove(next+deltalen,next,next-sparse); - sdsIncrLen(o->ptr,deltalen); - } + if (deltalen && next) memmove(next+deltalen,next,next-sparse); + sdsIncrLen(o->ptr,deltalen); memcpy(p,seq,seqlen); updated: From fd412fcb557237997b5310a60a9d7bb845f8b925 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 13 Apr 2014 10:19:12 +0200 Subject: [PATCH 18/58] hllSparseAdd() sanity check for span != 0 added. --- src/hyperloglog.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 5a19443b4..b3235e1fb 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -612,6 +612,7 @@ sds hllSparseToDense(sds sparse) { * * On success, the function returns 1 if the cardinality changed, or 0 * if the register for this element was not updated. + * On error (if the representation is invalid) -1 is returned. * * As a side effect the function may promote the HLL representation from * sparse to dense: this happens when a register requires to be set to a value @@ -645,6 +646,7 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { first = 0; prev = NULL; /* Points to previos opcode at the end of the loop. */ next = NULL; /* Points to the next opcode at the end of the loop. */ + span = 0; while(p < end) { /* Set span to the number of registers covered by this opcode. */ if (HLL_SPARSE_IS_ZERO(p)) span = HLL_SPARSE_ZERO_LEN(p); @@ -656,6 +658,7 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { p += (HLL_SPARSE_IS_XZERO(p)) ? 2 : 1; first += span; } + if (span == 0) return -1; /* Invalid format. */ next = HLL_SPARSE_IS_XZERO(p) ? p+2 : p+1; if (next >= end) next = NULL; From 5349296f2992f86a936a86643b049c0c09489cac Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 13 Apr 2014 22:59:27 +0200 Subject: [PATCH 19/58] hllSparseToDense API changed to take ref to object. The new API takes directly the object doing everything needed to turn it into a dense representation, including setting the new representation as object->ptr. --- src/hyperloglog.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index b3235e1fb..0f92ff50a 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -560,12 +560,16 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) { /* Convert the HLL with sparse representation given as input in its dense * representation. Both representations are represented by SDS strings, and * the input representation is freed as a side effect. */ -sds hllSparseToDense(sds sparse) { - sds dense; +void hllSparseToDense(robj *o) { + sds sparse = o->ptr, dense; struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse; int idx = 0, runlen, regval; uint8_t *p = (uint8_t*)sparse, *end = p+sdslen(sparse); + /* If the representation is already the right one return ASAP. */ + hdr = (struct hllhdr*) sparse; + if (hdr->encoding == HLL_DENSE) return; + /* Create a string of the right size filled with zero bytes. * Note that the cached cardinality is set to 0 as a side effect * that is exactly the cardinality of an empty HLL. */ @@ -597,9 +601,9 @@ sds hllSparseToDense(sds sparse) { } } - /* Free the old representation and return the new one. */ - sdsfree(sparse); - return dense; + /* Free the old representation and set the new one. */ + sdsfree(o->ptr); + o->ptr = dense; } /* "Add" the element in the sparse hyperloglog data structure. @@ -805,7 +809,7 @@ updated: return 1; promote: /* Promote to dense representation. */ - o->ptr = hllSparseToDense(o->ptr); + hllSparseToDense(o); hdr = o->ptr; return hllDenseAdd(hdr->registers, ele, elesize); } From 62106ad517960982010af4bb5480d844d29aa6dd Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 13 Apr 2014 23:01:21 +0200 Subject: [PATCH 20/58] PFDEBUG added, PFGETREG removed. PFDEBUG will be the interface to do debugging tasks with a key containing an HLL object. --- src/hyperloglog.c | 30 +++++++++++++++++++++++------- src/redis.c | 2 +- src/redis.h | 2 +- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 0f92ff50a..f99d188f8 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1214,26 +1214,42 @@ cleanup: sdsfree(bitcounters); } -/* PFGETREG - * Return the registers values of the specified HLL. */ -void pfgetregCommand(redisClient *c) { - robj *o = lookupKeyRead(c->db,c->argv[1]); +/* PFDEBUG ... args ... + * Different debugging related operations about the HLL implementation. */ +void pfdebugCommand(redisClient *c) { + char *cmd = c->argv[1]->ptr; struct hllhdr *hdr; + robj *o; int j; + o = lookupKeyRead(c->db,c->argv[2]); if (o == NULL) { addReplyError(c,"The specified key does not exist"); return; - } else { - if (isHLLObjectOrReply(c,o) != REDIS_OK) return; + } + if (isHLLObjectOrReply(c,o) != REDIS_OK) return; + o = dbUnshareStringValue(c->db,c->argv[2],o); + hdr = o->ptr; + + /* PFDEBUG GETREG */ + if (!strcasecmp(cmd,"getreg")) { + if (c->argc != 3) goto arityerr; - hdr = o->ptr; addReplyMultiBulkLen(c,HLL_REGISTERS); + hllSparseToDense(o); for (j = 0; j < HLL_REGISTERS; j++) { uint8_t val; HLL_DENSE_GET_REGISTER(val,hdr->registers,j); addReplyLongLong(c,val); } + } else { + addReplyErrorFormat(c,"Unknown PFDEBUG subcommand '%s'", cmd); } + return; + +arityerr: + addReplyErrorFormat(c, + "Wrong number of arguments for the '%s' subcommand",cmd); } + diff --git a/src/redis.c b/src/redis.c index da821abf7..0c0106d75 100644 --- a/src/redis.c +++ b/src/redis.c @@ -274,7 +274,7 @@ struct redisCommand redisCommandTable[] = { {"pfadd",pfaddCommand,-2,"wm",0,NULL,1,1,1,0,0}, {"pfcount",pfcountCommand,2,"w",0,NULL,1,1,1,0,0}, {"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0}, - {"pfgetreg",pfgetregCommand,2,"r",0,NULL,0,0,0,0,0} + {"pfdebug",pfdebugCommand,-3,"r",0,NULL,0,0,0,0,0} }; struct evictionPoolEntry *evictionPoolAlloc(void); diff --git a/src/redis.h b/src/redis.h index edd37bc18..850ec1684 100644 --- a/src/redis.h +++ b/src/redis.h @@ -1460,7 +1460,7 @@ void pfselftestCommand(redisClient *c); void pfaddCommand(redisClient *c); void pfcountCommand(redisClient *c); void pfmergeCommand(redisClient *c); -void pfgetregCommand(redisClient *c); +void pfdebugCommand(redisClient *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); From 2800054f347aa8f4bea6ea64711fa76f94074c32 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 08:59:13 +0200 Subject: [PATCH 21/58] PFDEBUG DECODE added. Provides a human readable description of the opcodes composing a run-length encoded HLL (sparse encoding). The command is only useful for debugging / development tasks. --- src/hyperloglog.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index f99d188f8..9f1827033 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1243,6 +1243,41 @@ void pfdebugCommand(redisClient *c) { HLL_DENSE_GET_REGISTER(val,hdr->registers,j); addReplyLongLong(c,val); } + } + /* PFDEBUG DECODE */ + else if (!strcasecmp(cmd,"decode")) { + if (c->argc != 3) goto arityerr; + + uint8_t *p = o->ptr, *end = p+sdslen(o->ptr); + sds decoded = sdsempty(); + + if (hdr->encoding != HLL_SPARSE) { + addReplyError(c,"HLL encoding is not sparse"); + return; + } + + p += HLL_HDR_SIZE; + while(p < end) { + int runlen, regval; + + if (HLL_SPARSE_IS_ZERO(p)) { + runlen = HLL_SPARSE_ZERO_LEN(p); + p++; + decoded = sdscatprintf(decoded,"z:%d ",runlen); + } else if (HLL_SPARSE_IS_XZERO(p)) { + runlen = HLL_SPARSE_XZERO_LEN(p); + p += 2; + decoded = sdscatprintf(decoded,"Z:%d ",runlen); + } else { + runlen = HLL_SPARSE_VAL_LEN(p); + regval = HLL_SPARSE_VAL_VALUE(p); + p++; + decoded = sdscatprintf(decoded,"v:%d,%d ",regval,runlen); + } + } + decoded = sdstrim(decoded," "); + addReplyBulkCBuffer(c,decoded,sdslen(decoded)); + sdsfree(decoded); } else { addReplyErrorFormat(c,"Unknown PFDEBUG subcommand '%s'", cmd); } From 4fb59e800d235bc5e1c342ba7abebe1007a877ec Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 09:27:01 +0200 Subject: [PATCH 22/58] hllSparseToDense(): sanity check added. The function checks if all the HLL_REGISTERS were processed during the convertion from sparse to dense encoding, returning REDIS_OK or REDIS_ERR to signal a corruption problem. A bug in PFDEBUG GETREG was fixed: when the object is converted to the dense representation we need to reassign the new pointer to the header structure pointer. --- src/hyperloglog.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 9f1827033..38495a0b3 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -559,8 +559,11 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) { /* Convert the HLL with sparse representation given as input in its dense * representation. Both representations are represented by SDS strings, and - * the input representation is freed as a side effect. */ -void hllSparseToDense(robj *o) { + * the input representation is freed as a side effect. + * + * The function returns REDIS_OK if the sparse representation was valid, + * otherwise REDIS_ERR is returned if the representation was corrupted. */ +int hllSparseToDense(robj *o) { sds sparse = o->ptr, dense; struct hllhdr *hdr, *oldhdr = (struct hllhdr*)sparse; int idx = 0, runlen, regval; @@ -568,7 +571,7 @@ void hllSparseToDense(robj *o) { /* If the representation is already the right one return ASAP. */ hdr = (struct hllhdr*) sparse; - if (hdr->encoding == HLL_DENSE) return; + if (hdr->encoding == HLL_DENSE) return REDIS_OK; /* Create a string of the right size filled with zero bytes. * Note that the cached cardinality is set to 0 as a side effect @@ -601,9 +604,17 @@ void hllSparseToDense(robj *o) { } } + /* If the sparse representation was valid, we expect to find idx + * set to HLL_REGISTERS. */ + if (idx != HLL_REGISTERS) { + sdsfree(dense); + return REDIS_ERR; + } + /* Free the old representation and set the new one. */ sdsfree(o->ptr); o->ptr = dense; + return REDIS_OK; } /* "Add" the element in the sparse hyperloglog data structure. @@ -809,7 +820,7 @@ updated: return 1; promote: /* Promote to dense representation. */ - hllSparseToDense(o); + if (hllSparseToDense(o) == REDIS_ERR) return -1; /* Corrupted HLL. */ hdr = o->ptr; return hllDenseAdd(hdr->registers, ele, elesize); } @@ -1236,7 +1247,11 @@ void pfdebugCommand(redisClient *c) { if (c->argc != 3) goto arityerr; addReplyMultiBulkLen(c,HLL_REGISTERS); - hllSparseToDense(o); + if (hllSparseToDense(o) == REDIS_ERR) { + addReplyError(c,"HLL sparse encoding is corrupted"); + return; + } + hdr = o->ptr; for (j = 0; j < HLL_REGISTERS; j++) { uint8_t val; From ac3655f2a1b05c70ee756cc77e128efda640bbca Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 09:36:27 +0200 Subject: [PATCH 23/58] hllSparseAdd(): more correct dense conversion conditional. We want to promote if the total string size exceeds the resulting size after the upgrade. --- src/hyperloglog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 38495a0b3..df79fc970 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -801,7 +801,7 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { int oldlen = is_xzero ? 2 : 1; int deltalen = seqlen-oldlen; - if (deltalen > 0 && sdslen(o->ptr) > HLL_SPARSE_MAX) goto promote; + if (deltalen > 0 && sdslen(o->ptr)+deltalen > HLL_SPARSE_MAX) goto promote; if (deltalen && next) memmove(next+deltalen,next,next-sparse); sdsIncrLen(o->ptr,deltalen); memcpy(p,seq,seqlen); From 70a3bcf3a32501885d73c560b72e4f655615475f Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 09:40:07 +0200 Subject: [PATCH 24/58] Fixed memmove() count in hllSparseAdd(). --- src/hyperloglog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index df79fc970..d4c0a48d4 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -802,7 +802,7 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { int deltalen = seqlen-oldlen; if (deltalen > 0 && sdslen(o->ptr)+deltalen > HLL_SPARSE_MAX) goto promote; - if (deltalen && next) memmove(next+deltalen,next,next-sparse); + if (deltalen && next) memmove(next+deltalen,next,end-next); sdsIncrLen(o->ptr,deltalen); memcpy(p,seq,seqlen); From c9ee98b388f08bcea0d3ff2bdcb8c9c9f8229801 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 10:25:19 +0200 Subject: [PATCH 25/58] Fixed error message generation in PFDEBUG GETREG. Bulk length for registers was emitted too early, so if there was a bug the reply looked like a long array with just one element, blocking the client as result. --- src/hyperloglog.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index d4c0a48d4..1b675b2e8 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1246,12 +1246,13 @@ void pfdebugCommand(redisClient *c) { if (!strcasecmp(cmd,"getreg")) { if (c->argc != 3) goto arityerr; - addReplyMultiBulkLen(c,HLL_REGISTERS); if (hllSparseToDense(o) == REDIS_ERR) { addReplyError(c,"HLL sparse encoding is corrupted"); return; } + hdr = o->ptr; + addReplyMultiBulkLen(c,HLL_REGISTERS); for (j = 0; j < HLL_REGISTERS; j++) { uint8_t val; From d15cd39717d44669c04f474be25c3156024cc582 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 11:04:11 +0200 Subject: [PATCH 26/58] hllSparseAdd() opcode seek stop condition fixed. --- src/hyperloglog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 1b675b2e8..92427ef5c 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -668,7 +668,7 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { else if (HLL_SPARSE_IS_XZERO(p)) span = HLL_SPARSE_XZERO_LEN(p); else span = HLL_SPARSE_VAL_LEN(p); /* Break if this opcode covers the register as 'index'. */ - if (first+span >= index) break; + if (index <= first+span-1) break; prev = p; p += (HLL_SPARSE_IS_XZERO(p)) ? 2 : 1; first += span; From cba3a0416051331b018bf5e9306d53fbf4e0e442 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 11:49:53 +0200 Subject: [PATCH 27/58] More robust HLL_SPARSE macros protecting 'p' with parens. Now the macros will work with arguments such as "ptr+1". --- src/hyperloglog.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 92427ef5c..f7db2da21 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -361,13 +361,13 @@ struct hllhdr { * The macros parameter is expected to be an uint8_t pointer. */ #define HLL_SPARSE_XZERO_BIT 0x40 /* 01xxxxxx */ #define HLL_SPARSE_VAL_BIT 0x80 /* 1vvvvvxx */ -#define HLL_SPARSE_IS_ZERO(p) (((*p) & 0xc0) == 0) /* 00xxxxxx */ -#define HLL_SPARSE_IS_XZERO(p) (((*p) & 0xc0) == HLL_SPARSE_XZERO_BIT) -#define HLL_SPARSE_IS_VAL(p) ((*p) & HLL_SPARSE_VAL_BIT) -#define HLL_SPARSE_ZERO_LEN(p) (((*p) & 0x3f)+1) -#define HLL_SPARSE_XZERO_LEN(p) (((((*p) & 0x3f) << 8) | (*(p+1)))+1) -#define HLL_SPARSE_VAL_VALUE(p) ((((*p) >> 2) & 0x1f)+1) -#define HLL_SPARSE_VAL_LEN(p) (((*p) & 0x3)+1) +#define HLL_SPARSE_IS_ZERO(p) (((*(p)) & 0xc0) == 0) /* 00xxxxxx */ +#define HLL_SPARSE_IS_XZERO(p) (((*(p)) & 0xc0) == HLL_SPARSE_XZERO_BIT) +#define HLL_SPARSE_IS_VAL(p) ((*(p)) & HLL_SPARSE_VAL_BIT) +#define HLL_SPARSE_ZERO_LEN(p) (((*(p)) & 0x3f)+1) +#define HLL_SPARSE_XZERO_LEN(p) (((((*(p)) & 0x3f) << 8) | (*((p)+1)))+1) +#define HLL_SPARSE_VAL_VALUE(p) ((((*(p)) >> 2) & 0x1f)+1) +#define HLL_SPARSE_VAL_LEN(p) (((*(p)) & 0x3)+1) #define HLL_SPARSE_VAL_MAX_VALUE 32 #define HLL_SPARSE_VAL_MAX_LEN 4 #define HLL_SPARSE_ZERO_MAX_LEN 64 @@ -381,7 +381,7 @@ struct hllhdr { #define HLL_SPARSE_XZERO_SET(p,len) do { \ int _l = (len)-1; \ *(p) = (_l>>8) | HLL_SPARSE_XZERO_BIT; \ - *(p+1) = (_l&0xff); \ + *((p)+1) = (_l&0xff); \ } while(0) /* ========================= HyperLogLog algorithm ========================= */ From 8670ab5e11f74e8f0a9a637854ed850e12593b1a Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 12:09:37 +0200 Subject: [PATCH 28/58] Merge adjacent VAL opcodes in hllSparseAdd(). As more values are added splitting ZERO or XZERO opcodes, try to merge adjacent VAL opcodes if they have the same value. --- src/hyperloglog.c | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index f7db2da21..c600c3c08 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -805,16 +805,47 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { if (deltalen && next) memmove(next+deltalen,next,end-next); sdsIncrLen(o->ptr,deltalen); memcpy(p,seq,seqlen); + end += deltalen; updated: /* Step 4: Merge adjacent values if possible. * * The representation was updated, however the resulting representation - * may not be optimal: adjacent opcodes may be merged into a single one. - * We start from the opcode before the one we updated trying to merge - * opcodes up to the next 5 opcodes (since we need to consider the three - * opcodes resuling from the worst-case split of the updated opcode, - * plus the two opcodes at the left and right of the original one). */ + * may not be optimal: adjacent VAL opcodes can sometimes be merged into + * a single one. */ + p = prev ? prev : sparse; + int scanlen = 5; /* Scan up to 5 upcodes starting from prev. */ + while (p < end && scanlen--) { + if (HLL_SPARSE_IS_XZERO(p)) { + p += 2; + continue; + } else if (HLL_SPARSE_IS_ZERO(p)) { + p++; + continue; + } + /* We need two adjacent VAL opcodes to try a merge, having + * the same value, and a len that first the VAL opcode max len. */ + if (p+1 < end && HLL_SPARSE_IS_VAL(p+1)) { + int v1 = HLL_SPARSE_VAL_VALUE(p); + int v2 = HLL_SPARSE_VAL_VALUE(p+1); + if (v1 == v2) { + int len = HLL_SPARSE_VAL_LEN(p)+HLL_SPARSE_VAL_LEN(p+1); + if (len <= HLL_SPARSE_VAL_MAX_LEN) { + HLL_SPARSE_VAL_SET(p+1,v1,len); + memmove(p,p+1,end-p); + sdsIncrLen(o->ptr,-1); + end--; + /* After a merge we reiterate without incrementing 'p' + * in order to try to merge the just merged value with + * a value on its right. */ + continue; + } + } + } + p++; + } + + /* Invalidate the cached cardinality. */ hdr = o->ptr; HLL_INVALIDATE_CACHE(hdr); return 1; From 6c0f0eb21fc1502a81aa63d0549e55c6f5dd7825 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 12:12:53 +0200 Subject: [PATCH 29/58] Comment typo in hllSparseAdd(). first -> fits. --- src/hyperloglog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index c600c3c08..06afc3637 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -824,7 +824,7 @@ updated: continue; } /* We need two adjacent VAL opcodes to try a merge, having - * the same value, and a len that first the VAL opcode max len. */ + * the same value, and a len that fits the VAL opcode max len. */ if (p+1 < end && HLL_SPARSE_IS_VAL(p+1)) { int v1 = HLL_SPARSE_VAL_VALUE(p); int v2 = HLL_SPARSE_VAL_VALUE(p+1); From a84b91d05210adecd1f60d168eafbc58c0ff0139 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 12:58:46 +0200 Subject: [PATCH 30/58] hllSparseAdd(): faster code removing conditional. Bottleneck found profiling. Big run time improvement found when testing after the change. --- src/hyperloglog.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 06afc3637..aa0ae64c9 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -202,7 +202,7 @@ struct hllhdr { #define HLL_SPARSE 1 /* Sparse encoding */ #define HLL_MAX_ENCODING 1 -#define HLL_SPARSE_MAX 3000 +#define HLL_SPARSE_MAX 12000 /* =========================== Low level bit macros ========================= */ @@ -663,14 +663,23 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { next = NULL; /* Points to the next opcode at the end of the loop. */ span = 0; while(p < end) { + int oplen; + /* Set span to the number of registers covered by this opcode. */ - if (HLL_SPARSE_IS_ZERO(p)) span = HLL_SPARSE_ZERO_LEN(p); - else if (HLL_SPARSE_IS_XZERO(p)) span = HLL_SPARSE_XZERO_LEN(p); - else span = HLL_SPARSE_VAL_LEN(p); + if (HLL_SPARSE_IS_ZERO(p)) { + span = HLL_SPARSE_ZERO_LEN(p); + oplen = 1; + } else if (HLL_SPARSE_IS_XZERO(p)) { + span = HLL_SPARSE_XZERO_LEN(p); + oplen = 2; + } else { + span = HLL_SPARSE_VAL_LEN(p); + oplen = 1; + } /* Break if this opcode covers the register as 'index'. */ if (index <= first+span-1) break; prev = p; - p += (HLL_SPARSE_IS_XZERO(p)) ? 2 : 1; + p += oplen; first += span; } if (span == 0) return -1; /* Invalid format. */ From d2174e6c9be3969e2c243c018d047be7191996f1 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 15:20:26 +0200 Subject: [PATCH 31/58] Detect corrupted sparse HLLs in hllSparseSum(). --- src/hyperloglog.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index aa0ae64c9..50d0133f6 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -869,7 +869,7 @@ promote: /* Promote to dense representation. */ * PE is an array with a pre-computer table of values 2^-reg indexed by reg. * As a side effect the integer pointed by 'ezp' is set to the number * of zero registers. */ -double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) { +double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp, int *invalid) { double E = 0; int ez = 0, idx = 0, runlen, regval; uint8_t *end = sparse+sparselen, *p = sparse; @@ -895,7 +895,7 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) { p++; } } - redisAssert(idx == HLL_REGISTERS); + if (idx != HLL_REGISTERS && invalid) *invalid = 1; *ezp = ez; return E; } @@ -908,13 +908,14 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp) { /* Return the approximated cardinality of the set based on the armonic * mean of the registers values. 'hdr' points to the start of the SDS - * representing the String object holding the HLL representation. */ -uint64_t hllCount(struct hllhdr *hdr) { + * representing the String object holding the HLL representation. + * + * If the sparse representation of the HLL object is not valid, the integer + * pointed by 'invalid' is set to non-zero, otherwise it is left untouched. */ +uint64_t hllCount(struct hllhdr *hdr, int *invalid) { double m = HLL_REGISTERS; - double alpha = 0.7213/(1+1.079/m); - double E; - int ez; /* Number of registers equal to 0. */ - int j; + double E, alpha = 0.7213/(1+1.079/m); + int j, ez; /* Number of registers equal to 0. */ /* We precompute 2^(-reg[j]) in a small table in order to * speedup the computation of SUM(2^-register[0..i]). */ @@ -933,7 +934,8 @@ uint64_t hllCount(struct hllhdr *hdr) { if (hdr->encoding == HLL_DENSE) { E = hllDenseSum(hdr->registers,PE,&ez); } else { - E = hllSparseSum(hdr->registers,sdslen((sds)hdr)-HLL_HDR_SIZE,PE,&ez); + E = hllSparseSum(hdr->registers, + sdslen((sds)hdr)-HLL_HDR_SIZE,PE,&ez,invalid); } /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ @@ -1111,8 +1113,13 @@ void pfcountCommand(redisClient *c) { card |= (uint64_t)hdr->card[6] << 48; card |= (uint64_t)hdr->card[7] << 56; } else { + int invalid = 0; /* Recompute it and update the cached value. */ - card = hllCount(hdr); + card = hllCount(hdr,&invalid); + if (invalid) { + addReplyError(c,"Invalid HLL object"); + return; + } hdr->card[0] = card & 0xff; hdr->card[1] = (card >> 8) & 0xff; hdr->card[2] = (card >> 16) & 0xff; @@ -1245,7 +1252,7 @@ void pfselftestCommand(redisClient *c) { ele = j ^ seed; hllDenseAdd(hdr->registers,(unsigned char*)&ele,sizeof(ele)); if (j == checkpoint) { - int64_t abserr = checkpoint- (int64_t)hllCount(hdr); + int64_t abserr = checkpoint - (int64_t)hllCount(hdr,NULL); if (abserr < 0) abserr = -abserr; if (abserr > (uint64_t)(relerr*4*checkpoint)) { addReplyErrorFormat(c, From f009069d7c6866162d1885a9689811ee5c3132a6 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 15:42:05 +0200 Subject: [PATCH 32/58] hllSparseAdd(): speed optimization. Mostly by reordering opcodes check conditional by frequency of opcodes in larger sparse-encoded HLLs. --- src/hyperloglog.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 50d0133f6..fe1321513 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -440,7 +440,7 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) { /* Given a string element to add to the HyperLogLog, returns the length * of the pattern 000..1 of the element hash. As a side effect 'regp' is * set to the register index this element hashes to. */ -int hllPatLen(unsigned char *ele, size_t elesize, int *regp) { +int hllPatLen(unsigned char *ele, size_t elesize, long *regp) { uint64_t hash, bit, index; int count; @@ -483,7 +483,7 @@ int hllPatLen(unsigned char *ele, size_t elesize, int *regp) { * is returned. */ int hllDenseAdd(uint8_t *registers, unsigned char *ele, size_t elesize) { uint8_t oldcount, count; - int index; + long index; /* Update the register if this element produced a longer run of zeroes. */ count = hllPatLen(ele,elesize,&index); @@ -636,8 +636,8 @@ int hllSparseToDense(robj *o) { int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { struct hllhdr *hdr; uint8_t oldcount, count, *sparse, *end, *p, *prev, *next; - int index, first, span; - int is_zero = 0, is_xzero = 0, is_val = 0, runlen = 0; + long index, first, span; + long is_zero = 0, is_xzero = 0, is_val = 0, runlen = 0; /* Update the register if this element produced a longer run of zeroes. */ count = hllPatLen(ele,elesize,&index); @@ -663,18 +663,21 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { next = NULL; /* Points to the next opcode at the end of the loop. */ span = 0; while(p < end) { - int oplen; + long oplen; - /* Set span to the number of registers covered by this opcode. */ + /* Set span to the number of registers covered by this opcode. + * + * This is the most performance critical loop of the sparse + * representation. Sorting the conditionals from the most to the + * least frequent opcode in many-bytes sparse HLLs is faster. */ + oplen = 1; if (HLL_SPARSE_IS_ZERO(p)) { span = HLL_SPARSE_ZERO_LEN(p); - oplen = 1; - } else if (HLL_SPARSE_IS_XZERO(p)) { + } else if (HLL_SPARSE_IS_VAL(p)) { + span = HLL_SPARSE_VAL_LEN(p); + } else { /* XZERO. */ span = HLL_SPARSE_XZERO_LEN(p); oplen = 2; - } else { - span = HLL_SPARSE_VAL_LEN(p); - oplen = 1; } /* Break if this opcode covers the register as 'index'. */ if (index <= first+span-1) break; From 1e7f95441fc1c779690ea4ec9addc8bc2bd50a19 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 15:55:21 +0200 Subject: [PATCH 33/58] Added assertion in hllSparseAdd() when promotion to dense occurs. If we converted to dense, a register must be updated in the dense representation. --- src/hyperloglog.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index fe1321513..b7c4b6cfe 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -865,7 +865,17 @@ updated: promote: /* Promote to dense representation. */ if (hllSparseToDense(o) == REDIS_ERR) return -1; /* Corrupted HLL. */ hdr = o->ptr; - return hllDenseAdd(hdr->registers, ele, elesize); + + /* We need to call hllDenseAdd() to perform the operation after the + * conversion. However the result must be 1, since if we need to + * convert from sparse to dense a register requires to be updated. + * + * Note that this in turn means that PFADD will make sure the command + * is propagated to slaves / AOF, so if there is a sparse -> dense + * convertion, it will be performed in all the slaves as well. */ + int dense_retval = hllDenseAdd(hdr->registers, ele, elesize); + redisAssert(dense_retval == 1); + return dense_retval; } /* Compute SUM(2^-reg) in the sparse representation. From 4a43c113c5f6b67f0c14daeb932590f955b6d401 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 15:56:30 +0200 Subject: [PATCH 34/58] Correctly replicate PFDEBUG GETREG. Even if it is a debugging command, make sure that when it forces a change in encoding, the command is propagated. --- src/hyperloglog.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index b7c4b6cfe..fe0a5d7a0 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1306,9 +1306,12 @@ void pfdebugCommand(redisClient *c) { if (!strcasecmp(cmd,"getreg")) { if (c->argc != 3) goto arityerr; - if (hllSparseToDense(o) == REDIS_ERR) { - addReplyError(c,"HLL sparse encoding is corrupted"); - return; + if (hdr->encoding == HLL_SPARSE) { + if (hllSparseToDense(o) == REDIS_ERR) { + addReplyError(c,"HLL sparse encoding is corrupted"); + return; + } + server.dirty++; /* Force propagation on encoding change. */ } hdr = o->ptr; From f5fa2985f8a273feff812a9bf4dc8d726234a8fd Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 15:57:50 +0200 Subject: [PATCH 35/58] Mark PFDEBUG as write command in the commands table. It is safer since it is able to have side effects. --- src/redis.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis.c b/src/redis.c index 0c0106d75..899bed2eb 100644 --- a/src/redis.c +++ b/src/redis.c @@ -274,7 +274,7 @@ struct redisCommand redisCommandTable[] = { {"pfadd",pfaddCommand,-2,"wm",0,NULL,1,1,1,0,0}, {"pfcount",pfcountCommand,2,"w",0,NULL,1,1,1,0,0}, {"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0}, - {"pfdebug",pfdebugCommand,-3,"r",0,NULL,0,0,0,0,0} + {"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0} }; struct evictionPoolEntry *evictionPoolAlloc(void); From cd73060972242236d1a2d3351118b03206a6f933 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 16:09:32 +0200 Subject: [PATCH 36/58] PFMERGE fixed to work with sparse encoding. --- src/hyperloglog.c | 53 ++++++++++++++++++++++++++++++++------ tests/unit/hyperloglog.tcl | 4 +-- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index fe0a5d7a0..9b8968c1e 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1130,7 +1130,7 @@ void pfcountCommand(redisClient *c) { /* Recompute it and update the cached value. */ card = hllCount(hdr,&invalid); if (invalid) { - addReplyError(c,"Invalid HLL object"); + addReplyError(c,"Invalid HLL object detected"); return; } hdr->card[0] = card & 0xff; @@ -1163,8 +1163,6 @@ void pfmergeCommand(redisClient *c) { * it to the target variable later. */ memset(max,0,sizeof(max)); for (j = 1; j < c->argc; j++) { - uint8_t val; - /* Check type and size. */ robj *o = lookupKeyRead(c->db,c->argv[j]); if (o == NULL) continue; /* Assume empty HLL for non existing var. */ @@ -1173,14 +1171,47 @@ void pfmergeCommand(redisClient *c) { /* Merge with this HLL with our 'max' HHL by setting max[i] * to MAX(max[i],hll[i]). */ hdr = o->ptr; - for (i = 0; i < HLL_REGISTERS; i++) { - HLL_DENSE_GET_REGISTER(val,hdr->registers,i); - if (val > max[i]) max[i] = val; + if (hdr->encoding == HLL_DENSE) { + uint8_t val; + + for (i = 0; i < HLL_REGISTERS; i++) { + HLL_DENSE_GET_REGISTER(val,hdr->registers,i); + if (val > max[i]) max[i] = val; + } + } else { + uint8_t *p = o->ptr, *end = p + sdslen(o->ptr); + long runlen, regval; + + p += HLL_HDR_SIZE; + i = 0; + while(p < end) { + if (HLL_SPARSE_IS_ZERO(p)) { + runlen = HLL_SPARSE_ZERO_LEN(p); + i += runlen; + p++; + } else if (HLL_SPARSE_IS_XZERO(p)) { + runlen = HLL_SPARSE_XZERO_LEN(p); + i += runlen; + p += 2; + } else { + runlen = HLL_SPARSE_VAL_LEN(p); + regval = HLL_SPARSE_VAL_VALUE(p); + while(runlen--) { + if (regval > max[i]) max[i] = regval; + i++; + } + p++; + } + } + if (i != HLL_REGISTERS) { + addReplyError(c,"Invalid HLL object detected"); + return; + } } } /* Create / unshare the destination key's value if needed. */ - robj *o = lookupKeyRead(c->db,c->argv[1]); + robj *o = lookupKeyWrite(c->db,c->argv[1]); if (o == NULL) { /* Create the key with a string value of the exact length to * hold our HLL data structure. sdsnewlen() when NULL is passed @@ -1194,6 +1225,12 @@ void pfmergeCommand(redisClient *c) { o = dbUnshareStringValue(c->db,c->argv[1],o); } + /* Only support dense objects as destination. */ + if (hllSparseToDense(o) == REDIS_ERR) { + addReplyError(c,"Invalid HLL object detected"); + return; + } + /* Write the resulting HLL to the destination HLL registers and * invalidate the cached value. */ hdr = o->ptr; @@ -1308,7 +1345,7 @@ void pfdebugCommand(redisClient *c) { if (hdr->encoding == HLL_SPARSE) { if (hllSparseToDense(o) == REDIS_ERR) { - addReplyError(c,"HLL sparse encoding is corrupted"); + addReplyError(c,"Invalid HLL object detected"); return; } server.dirty++; /* Force propagation on encoding change. */ diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl index e241288dd..07f8b7772 100644 --- a/tests/unit/hyperloglog.tcl +++ b/tests/unit/hyperloglog.tcl @@ -60,9 +60,9 @@ start_server {tags {"hll"}} { r pfcount hll } {5} - test {PFGETREG returns the HyperLogLog raw registers} { + test {PFDEBUG GETREG returns the HyperLogLog raw registers} { r del hll r pfadd hll 1 2 3 - llength [r pfgetreg hll] + llength [r pfdebug getreg hll] } {16384} } From bff89bd0a348c8d488e2af38400a910930ef4fb0 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 16:11:54 +0200 Subject: [PATCH 37/58] Error message for invalid HLL objects unified. --- src/hyperloglog.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 9b8968c1e..c4cb5674d 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -204,6 +204,8 @@ struct hllhdr { #define HLL_SPARSE_MAX 12000 +static char *invalid_hll_err = "Corrupted HLL object detected"; + /* =========================== Low level bit macros ========================= */ /* Macros to access the dense representation. @@ -1085,7 +1087,7 @@ void pfaddCommand(redisClient *c) { updated++; break; case -1: - addReplyError(c,"Invalid HyperLogLog representation"); + addReplyError(c,invalid_hll_err); return; } } @@ -1130,7 +1132,7 @@ void pfcountCommand(redisClient *c) { /* Recompute it and update the cached value. */ card = hllCount(hdr,&invalid); if (invalid) { - addReplyError(c,"Invalid HLL object detected"); + addReplyError(c,invalid_hll_err); return; } hdr->card[0] = card & 0xff; @@ -1204,7 +1206,7 @@ void pfmergeCommand(redisClient *c) { } } if (i != HLL_REGISTERS) { - addReplyError(c,"Invalid HLL object detected"); + addReplyError(c,invalid_hll_err); return; } } @@ -1227,7 +1229,7 @@ void pfmergeCommand(redisClient *c) { /* Only support dense objects as destination. */ if (hllSparseToDense(o) == REDIS_ERR) { - addReplyError(c,"Invalid HLL object detected"); + addReplyError(c,invalid_hll_err); return; } @@ -1345,7 +1347,7 @@ void pfdebugCommand(redisClient *c) { if (hdr->encoding == HLL_SPARSE) { if (hllSparseToDense(o) == REDIS_ERR) { - addReplyError(c,"Invalid HLL object detected"); + addReplyError(c,invalid_hll_err); return; } server.dirty++; /* Force propagation on encoding change. */ From ddc1186dbfac40b02438f79c7f12846dc5193b2b Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 16:15:55 +0200 Subject: [PATCH 38/58] Set HLL_SPARSE_MAX to 3000. After running a few benchmarks, 3000 looks like a reasonable value to keep HLLs with a few thousand elements small while the CPU cost is still not huge. This covers all the cases where the dense representation would use N orders of magnitude more space, like in the case of many HLLs with carinality of a few tens or hundreds. It is not impossible that in the future this gets user configurable, however it is easy to pick an unreasoable value just looking at savings in the space dimension without checking what happens in the time dimension. --- src/hyperloglog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index c4cb5674d..f5df8fc31 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -202,7 +202,7 @@ struct hllhdr { #define HLL_SPARSE 1 /* Sparse encoding */ #define HLL_MAX_ENCODING 1 -#define HLL_SPARSE_MAX 12000 +#define HLL_SPARSE_MAX 3000 static char *invalid_hll_err = "Corrupted HLL object detected"; From c5d86e9db976b3d0cdf9fdf136c55231617767d8 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Apr 2014 19:35:00 +0200 Subject: [PATCH 39/58] PFDEBUG ENCODING added. --- src/hyperloglog.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index f5df8fc31..393cc418d 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1396,6 +1396,13 @@ void pfdebugCommand(redisClient *c) { decoded = sdstrim(decoded," "); addReplyBulkCBuffer(c,decoded,sdslen(decoded)); sdsfree(decoded); + } + /* PFDEBUG ENCODING */ + else if (!strcasecmp(cmd,"encoding")) { + char *encodingstr[2] = {"dense","sparse"}; + if (c->argc != 3) goto arityerr; + + addReplyStatus(c,encodingstr[hdr->encoding]); } else { addReplyErrorFormat(c,"Unknown PFDEBUG subcommand '%s'", cmd); } From 2736ec0d0f66b5da0d51768c01a55ec2dd564f1d Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 15 Apr 2014 10:10:38 +0200 Subject: [PATCH 40/58] PFSELFTEST improved with sparse encoding checks. --- src/hyperloglog.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 393cc418d..b1b330310 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1258,7 +1258,8 @@ void pfmergeCommand(redisClient *c) { void pfselftestCommand(redisClient *c) { int j, i; sds bitcounters = sdsnewlen(NULL,HLL_DENSE_SIZE); - struct hllhdr *hdr = (struct hllhdr*) bitcounters; + struct hllhdr *hdr = (struct hllhdr*) bitcounters, *hdr2; + robj *o = NULL; uint8_t bytecounters[HLL_REGISTERS]; /* Test 1: access registers. @@ -1289,20 +1290,43 @@ void pfselftestCommand(redisClient *c) { } /* Test 2: approximation error. - * The test is adds unique elements and check that the estimated value + * The test adds unique elements and check that the estimated value * is always reasonable bounds. * * We check that the error is smaller than 4 times than the expected * standard error, to make it very unlikely for the test to fail because - * of a "bad" run. */ + * of a "bad" run. + * + * The test is performed with both dense and sparse HLLs at the same + * time also verifying that the computed cardinality is the same. */ memset(hdr->registers,0,HLL_DENSE_SIZE-HLL_HDR_SIZE); + o = createHLLObject(); double relerr = 1.04/sqrt(HLL_REGISTERS); - int64_t checkpoint = 1000; + int64_t checkpoint = 1; uint64_t seed = (uint64_t)rand() | (uint64_t)rand() << 32; uint64_t ele; for (j = 1; j <= 10000000; j++) { ele = j ^ seed; hllDenseAdd(hdr->registers,(unsigned char*)&ele,sizeof(ele)); + hllAdd(o,(unsigned char*)&ele,sizeof(ele)); + + /* Make sure that for small cardinalities we use sparse + * encoding. */ + if (j == checkpoint && j < HLL_SPARSE_MAX/2) { + hdr2 = o->ptr; + if (hdr2->encoding != HLL_SPARSE) { + addReplyError(c, "TESTFAILED sparse encoding not used"); + goto cleanup; + } + } + + /* Check that dense and sparse representations agree. */ + if (j == checkpoint && hllCount(hdr,NULL) != hllCount(o->ptr,NULL)) { + addReplyError(c, "TESTFAILED dense/sparse disagree"); + goto cleanup; + } + + /* Check error. */ if (j == checkpoint) { int64_t abserr = checkpoint - (int64_t)hllCount(hdr,NULL); if (abserr < 0) abserr = -abserr; @@ -1322,6 +1346,7 @@ void pfselftestCommand(redisClient *c) { cleanup: sdsfree(bitcounters); + if (o) decrRefCount(o); } /* PFDEBUG ... args ... From 1dca87d69c0adb9ca6c905aee993d02fc582d742 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 15 Apr 2014 17:46:51 +0200 Subject: [PATCH 41/58] User-defined switch point between sparse-dense HLL encodings. --- redis.conf | 14 ++++++++++++++ src/config.c | 8 ++++++++ src/hyperloglog.c | 11 +++++------ src/redis.c | 1 + src/redis.h | 4 ++++ 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/redis.conf b/redis.conf index 1e53917b6..ee3a3a3ce 100644 --- a/redis.conf +++ b/redis.conf @@ -682,6 +682,20 @@ set-max-intset-entries 512 zset-max-ziplist-entries 128 zset-max-ziplist-value 64 +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is convereted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. Thev value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 + # Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in # order to help rehashing the main Redis hash table (the one mapping top-level # keys to values). The hash table implementation Redis uses (see dict.c) diff --git a/src/config.c b/src/config.c index 15ea1c5c3..25068cb9f 100644 --- a/src/config.c +++ b/src/config.c @@ -391,6 +391,8 @@ void loadServerConfigFromString(char *config) { server.zset_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"zset-max-ziplist-value") && argc == 2) { server.zset_max_ziplist_value = memtoll(argv[1], NULL); + } else if (!strcasecmp(argv[0],"hll-sparse-max-bytes") && argc == 2) { + server.hll_sparse_max_bytes = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"rename-command") && argc == 3) { struct redisCommand *cmd = lookupCommand(argv[1]); int retval; @@ -765,6 +767,9 @@ void configSetCommand(redisClient *c) { } else if (!strcasecmp(c->argv[2]->ptr,"zset-max-ziplist-value")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; server.zset_max_ziplist_value = ll; + } else if (!strcasecmp(c->argv[2]->ptr,"hll-sparse-max-bytes")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; + server.hll_sparse_max_bytes = ll; } else if (!strcasecmp(c->argv[2]->ptr,"lua-time-limit")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll < 0) goto badfmt; server.lua_time_limit = ll; @@ -974,6 +979,8 @@ void configGetCommand(redisClient *c) { server.zset_max_ziplist_entries); config_get_numerical_field("zset-max-ziplist-value", server.zset_max_ziplist_value); + config_get_numerical_field("hll-sparse-max-bytes", + server.hll_sparse_max_bytes); config_get_numerical_field("lua-time-limit",server.lua_time_limit); config_get_numerical_field("slowlog-log-slower-than", server.slowlog_log_slower_than); @@ -1773,6 +1780,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"set-max-intset-entries",server.set_max_intset_entries,REDIS_SET_MAX_INTSET_ENTRIES); rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,REDIS_ZSET_MAX_ZIPLIST_ENTRIES); rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,REDIS_ZSET_MAX_ZIPLIST_VALUE); + rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES); rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,REDIS_DEFAULT_ACTIVE_REHASHING); rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigNumericalOption(state,"hz",server.hz,REDIS_DEFAULT_HZ); diff --git a/src/hyperloglog.c b/src/hyperloglog.c index b1b330310..bb07aa380 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -176,7 +176,7 @@ * involved in updating the sparse representation is not justified by the * memory savings. The exact maximum length of the sparse representation * when this implementation switches to the dense representation is - * configured via the define HLL_SPARSE_MAX. + * configured via the define server.hll_sparse_max_bytes. */ struct hllhdr { @@ -202,8 +202,6 @@ struct hllhdr { #define HLL_SPARSE 1 /* Sparse encoding */ #define HLL_MAX_ENCODING 1 -#define HLL_SPARSE_MAX 3000 - static char *invalid_hll_err = "Corrupted HLL object detected"; /* =========================== Low level bit macros ========================= */ @@ -634,7 +632,7 @@ int hllSparseToDense(robj *o) { * As a side effect the function may promote the HLL representation from * sparse to dense: this happens when a register requires to be set to a value * not representable with the sparse representation, or when the resulting - * size would be greater than HLL_SPARSE_MAX. */ + * size would be greater than server.hll_sparse_max_bytes. */ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { struct hllhdr *hdr; uint8_t oldcount, count, *sparse, *end, *p, *prev, *next; @@ -815,7 +813,8 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) { int oldlen = is_xzero ? 2 : 1; int deltalen = seqlen-oldlen; - if (deltalen > 0 && sdslen(o->ptr)+deltalen > HLL_SPARSE_MAX) goto promote; + if (deltalen > 0 && + sdslen(o->ptr)+deltalen > server.hll_sparse_max_bytes) goto promote; if (deltalen && next) memmove(next+deltalen,next,end-next); sdsIncrLen(o->ptr,deltalen); memcpy(p,seq,seqlen); @@ -1312,7 +1311,7 @@ void pfselftestCommand(redisClient *c) { /* Make sure that for small cardinalities we use sparse * encoding. */ - if (j == checkpoint && j < HLL_SPARSE_MAX/2) { + if (j == checkpoint && j < server.hll_sparse_max_bytes/2) { hdr2 = o->ptr; if (hdr2->encoding != HLL_SPARSE) { addReplyError(c, "TESTFAILED sparse encoding not used"); diff --git a/src/redis.c b/src/redis.c index 899bed2eb..0978b2b75 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1421,6 +1421,7 @@ void initServerConfig() { server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES; server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES; server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE; + server.hll_sparse_max_bytes = REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES; server.shutdown_asap = 0; server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD; server.repl_timeout = REDIS_REPL_TIMEOUT; diff --git a/src/redis.h b/src/redis.h index 850ec1684..dc13e8f2a 100644 --- a/src/redis.h +++ b/src/redis.h @@ -312,6 +312,9 @@ #define REDIS_ZSET_MAX_ZIPLIST_ENTRIES 128 #define REDIS_ZSET_MAX_ZIPLIST_VALUE 64 +/* HyperLogLog defines */ +#define REDIS_DEFAULT_HLL_SPARSE_MAX_BYTES 3000 + /* Sets operations codes */ #define REDIS_OP_UNION 0 #define REDIS_OP_DIFF 1 @@ -809,6 +812,7 @@ struct redisServer { size_t set_max_intset_entries; size_t zset_max_ziplist_entries; size_t zset_max_ziplist_value; + size_t hll_sparse_max_bytes; time_t unixtime; /* Unix time sampled every cron cycle. */ long long mstime; /* Like 'unixtime' but with milliseconds resolution. */ /* Pubsub */ From e105f2a6456990352edd1415f0514d9ff31d899f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 16 Apr 2014 09:05:42 +0200 Subject: [PATCH 42/58] PFDEBUG TODENSE added. Converts HyperLogLogs from sparse to dense. Used for testing. --- src/hyperloglog.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index bb07aa380..3cb31f296 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1427,6 +1427,21 @@ void pfdebugCommand(redisClient *c) { if (c->argc != 3) goto arityerr; addReplyStatus(c,encodingstr[hdr->encoding]); + } + /* PFDEBUG TODENSE */ + else if (!strcasecmp(cmd,"todense")) { + int conv = 0; + if (c->argc != 3) goto arityerr; + + if (hdr->encoding == HLL_SPARSE) { + if (hllSparseToDense(o) == REDIS_ERR) { + addReplyError(c,invalid_hll_err); + return; + } + conv = 1; + server.dirty++; /* Force propagation on encoding change. */ + } + addReply(c,conv ? shared.cone : shared.czero); } else { addReplyErrorFormat(c,"Unknown PFDEBUG subcommand '%s'", cmd); } From fc3426c687847a622d8b825ab641abf3006e0c7d Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 16 Apr 2014 09:10:30 +0200 Subject: [PATCH 43/58] HyperLogLog invalid representation error code set to INVALIDOBJ. --- src/hyperloglog.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 3cb31f296..838c921db 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -202,7 +202,7 @@ struct hllhdr { #define HLL_SPARSE 1 /* Sparse encoding */ #define HLL_MAX_ENCODING 1 -static char *invalid_hll_err = "Corrupted HLL object detected"; +static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected\r\n"; /* =========================== Low level bit macros ========================= */ @@ -1086,7 +1086,7 @@ void pfaddCommand(redisClient *c) { updated++; break; case -1: - addReplyError(c,invalid_hll_err); + addReplySds(c,sdsnew(invalid_hll_err)); return; } } @@ -1131,7 +1131,7 @@ void pfcountCommand(redisClient *c) { /* Recompute it and update the cached value. */ card = hllCount(hdr,&invalid); if (invalid) { - addReplyError(c,invalid_hll_err); + addReplySds(c,sdsnew(invalid_hll_err)); return; } hdr->card[0] = card & 0xff; @@ -1205,7 +1205,7 @@ void pfmergeCommand(redisClient *c) { } } if (i != HLL_REGISTERS) { - addReplyError(c,invalid_hll_err); + addReplySds(c,sdsnew(invalid_hll_err)); return; } } @@ -1228,7 +1228,7 @@ void pfmergeCommand(redisClient *c) { /* Only support dense objects as destination. */ if (hllSparseToDense(o) == REDIS_ERR) { - addReplyError(c,invalid_hll_err); + addReplySds(c,sdsnew(invalid_hll_err)); return; } @@ -1371,7 +1371,7 @@ void pfdebugCommand(redisClient *c) { if (hdr->encoding == HLL_SPARSE) { if (hllSparseToDense(o) == REDIS_ERR) { - addReplyError(c,invalid_hll_err); + addReplySds(c,sdsnew(invalid_hll_err)); return; } server.dirty++; /* Force propagation on encoding change. */ @@ -1435,7 +1435,7 @@ void pfdebugCommand(redisClient *c) { if (hdr->encoding == HLL_SPARSE) { if (hllSparseToDense(o) == REDIS_ERR) { - addReplyError(c,invalid_hll_err); + addReplySds(c,sdsnew(invalid_hll_err)); return; } conv = 1; From 20d49e3ac92d6d72c5ba0688cb337fe5b7d1666e Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 16 Apr 2014 09:17:38 +0200 Subject: [PATCH 44/58] More HyperLogLog tests. --- tests/unit/hyperloglog.tcl | 76 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl index 07f8b7772..47e3db2a1 100644 --- a/tests/unit/hyperloglog.tcl +++ b/tests/unit/hyperloglog.tcl @@ -39,6 +39,82 @@ start_server {tags {"hll"}} { set res } {5 10} + test {HyperLogLogs are promote from sparse to dense} { + r del hll + r config set hll-sparse-max-bytes 3000 + set n 0 + while {$n < 100000} { + set elements {} + for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]} + incr n 100 + r pfadd hll {*}$elements + set card [r pfcount hll] + set err [expr {abs($card-$n)}] + assert {$err < (double($card)/100)*5} + if {$n < 1000} { + assert {[r pfdebug encoding hll] eq {sparse}} + } elseif {$n > 10000} { + assert {[r pfdebug encoding hll] eq {dense}} + } + } + } + + test {HyperLogLog sparse encoding stress test} { + for {set x 0} {$x < 1000} {incr x} { + r del hll1 hll2 + set numele [randomInt 100] + set elements {} + for {set j 0} {$j < $numele} {incr j} { + lappend elements [expr rand()] + } + # Force dense representation of hll2 + r pfadd hll2 + r pfdebug todense hll2 + r pfadd hll1 {*}$elements + r pfadd hll2 {*}$elements + assert {[r pfdebug encoding hll1] eq {sparse}} + assert {[r pfdebug encoding hll2] eq {dense}} + # Cardinality estimated should match exactly. + assert {[r pfcount hll1] eq [r pfcount hll2]} + } + } + + test {Corrupted sparse HyperLogLogs are detected: Additionl at tail} { + r del hll + r pfadd hll a b c + r append hll "hello" + set e {} + catch {r pfcount hll} e + set e + } {*INVALIDOBJ*} + + test {Corrupted sparse HyperLogLogs are detected: Broken magic} { + r del hll + r pfadd hll a b c + r setrange hll 0 "0123" + set e {} + catch {r pfcount hll} e + set e + } {*WRONGTYPE*} + + test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} { + r del hll + r pfadd hll a b c + r setrange hll 4 "x" + set e {} + catch {r pfcount hll} e + set e + } {*WRONGTYPE*} + + test {Corrupted dense HyperLogLogs are detected: Wrong length} { + r del hll + r pfadd hll a b c + r setrange hll 4 "\x00" + set e {} + catch {r pfcount hll} e + set e + } {*WRONGTYPE*} + test {PFADD, PFCOUNT, PFMERGE type checking works} { r set foo bar catch {r pfadd foo 1} e From ded5cbbd8f7e3aceeefff37349014b8d42215896 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 16 Apr 2014 12:17:00 +0200 Subject: [PATCH 45/58] ZLEXCOUNT implemented. Like ZCOUNT for lexicographical ranges. --- src/redis.c | 1 + src/redis.h | 1 + src/t_zset.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/src/redis.c b/src/redis.c index 0978b2b75..8898ad3e5 100644 --- a/src/redis.c +++ b/src/redis.c @@ -179,6 +179,7 @@ struct redisCommand redisCommandTable[] = { {"zrangebylex",zrangebylexCommand,-4,"r",0,NULL,1,1,1,0,0}, {"zrevrangebylex",zrevrangebylexCommand,-4,"r",0,NULL,1,1,1,0,0}, {"zcount",zcountCommand,4,"r",0,NULL,1,1,1,0,0}, + {"zlexcount",zlexcountCommand,4,"r",0,NULL,1,1,1,0,0}, {"zrevrange",zrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0}, {"zcard",zcardCommand,2,"r",0,NULL,1,1,1,0,0}, {"zscore",zscoreCommand,3,"r",0,NULL,1,1,1,0,0}, diff --git a/src/redis.h b/src/redis.h index dc13e8f2a..af25d9be8 100644 --- a/src/redis.h +++ b/src/redis.h @@ -1400,6 +1400,7 @@ void zrevrangebyscoreCommand(redisClient *c); void zrangebylexCommand(redisClient *c); void zrevrangebylexCommand(redisClient *c); void zcountCommand(redisClient *c); +void zlexcountCommand(redisClient *c); void zrevrangeCommand(redisClient *c); void zcardCommand(redisClient *c); void zremCommand(redisClient *c); diff --git a/src/t_zset.c b/src/t_zset.c index edeec3252..7c88f715a 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -2359,6 +2359,79 @@ void zcountCommand(redisClient *c) { addReplyLongLong(c, count); } +void zlexcountCommand(redisClient *c) { + robj *key = c->argv[1]; + robj *zobj; + zlexrangespec range; + int count = 0; + + /* Parse the range arguments */ + if (zslParseLexRange(c->argv[2],c->argv[3],&range) != REDIS_OK) { + addReplyError(c,"min or max not valid string range item"); + return; + } + + /* Lookup the sorted set */ + if ((zobj = lookupKeyReadOrReply(c, key, shared.czero)) == NULL || + checkType(c, zobj, REDIS_ZSET)) return; + + if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { + unsigned char *zl = zobj->ptr; + unsigned char *eptr, *sptr; + + /* Use the first element in range as the starting point */ + eptr = zzlFirstInLexRange(zl,range); + + /* No "first" element */ + if (eptr == NULL) { + addReply(c, shared.czero); + return; + } + + /* First element is in range */ + sptr = ziplistNext(zl,eptr); + redisAssertWithInfo(c,zobj,zzlLexValueLteMax(eptr,&range)); + + /* Iterate over elements in range */ + while (eptr) { + /* Abort when the node is no longer in range. */ + if (!zzlLexValueLteMax(eptr,&range)) { + break; + } else { + count++; + zzlNext(zl,&eptr,&sptr); + } + } + } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) { + zset *zs = zobj->ptr; + zskiplist *zsl = zs->zsl; + zskiplistNode *zn; + unsigned long rank; + + /* Find first element in range */ + zn = zslFirstInLexRange(zsl, range); + + /* Use rank of first element, if any, to determine preliminary count */ + if (zn != NULL) { + rank = zslGetRank(zsl, zn->score, zn->obj); + count = (zsl->length - (rank - 1)); + + /* Find last element in range */ + zn = zslLastInLexRange(zsl, range); + + /* Use rank of last element, if any, to determine the actual count */ + if (zn != NULL) { + rank = zslGetRank(zsl, zn->score, zn->obj); + count -= (zsl->length - rank); + } + } + } else { + redisPanic("Unknown sorted set encoding"); + } + + addReplyLongLong(c, count); +} + /* This command implements ZRANGEBYLEX, ZREVRANGEBYLEX. */ void genericZrangebylexCommand(redisClient *c, int reverse) { zlexrangespec range; From 35a4c0c9d3475408549c39b1dfc39bd8d993115c Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 16 Apr 2014 23:55:58 +0200 Subject: [PATCH 46/58] Pass by pointer and release of lex ranges. Given that the code was written with a 2 years pause... something strange happened in the middle. So there was no function to free a lex range min/max objects, and in some places the range was passed by value. --- src/t_zset.c | 57 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/src/t_zset.c b/src/t_zset.c index 7c88f715a..a2ef31479 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -444,7 +444,7 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) { * respectively if the item is exclusive or inclusive. REDIS_OK will be * returned. * - * If the stirng is not a valid range REDIS_ERR is returned, and the value + * If the string is not a valid range REDIS_ERR is returned, and the value * of *dest and *ex is undefined. */ int zslParseLexRangeItem(robj *item, robj **dest, int *ex) { char *c = item->ptr; @@ -475,8 +475,14 @@ int zslParseLexRangeItem(robj *item, robj **dest, int *ex) { } } -/* Populate the rangespec according to the objects min and max. */ +/* Populate the rangespec according to the objects min and max. + * + * Return REDIS_OK on success. On error REDIS_ERR is returned. + * When OK is returned the structure must be freed with zslFreeLexRange(), + * otherwise no release is needed. */ static int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec) { + /* The range can't be valid if objects are integer encoded. + * Every item must start with ( or [. */ if (min->encoding == REDIS_ENCODING_INT || max->encoding == REDIS_ENCODING_INT) return REDIS_ERR; @@ -491,6 +497,13 @@ static int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec) { } } +/* Free a lex range structure, must be called only after zelParseLexRange() + * populated the structure with success (REDIS_OK returned). */ +void zslFreeLexRange(zlexrangespec *spec) { + decrRefCount(spec->min); + decrRefCount(spec->max); +} + /* This is just a wrapper to compareStringObjects() that is able to * handle shared.minstring and shared.maxstring as the equivalent of * -inf and +inf for strings */ @@ -816,16 +829,16 @@ int zzlIsInLexRange(unsigned char *zl, zlexrangespec *range) { /* Find pointer to the first element contained in the specified lex range. * Returns NULL when no element is contained in the range. */ -unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec range) { +unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec *range) { unsigned char *eptr = ziplistIndex(zl,0), *sptr; /* If everything is out of range, return early. */ - if (!zzlIsInLexRange(zl,&range)) return NULL; + if (!zzlIsInLexRange(zl,range)) return NULL; while (eptr != NULL) { - if (zzlLexValueGteMin(eptr,&range)) { + if (zzlLexValueGteMin(eptr,range)) { /* Check if score <= max. */ - if (zzlLexValueLteMax(eptr,&range)) + if (zzlLexValueLteMax(eptr,range)) return eptr; return NULL; } @@ -841,16 +854,16 @@ unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec range) { /* Find pointer to the last element contained in the specified lex range. * Returns NULL when no element is contained in the range. */ -unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec range) { +unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range) { unsigned char *eptr = ziplistIndex(zl,-2), *sptr; /* If everything is out of range, return early. */ - if (!zzlIsInLexRange(zl,&range)) return NULL; + if (!zzlIsInLexRange(zl,range)) return NULL; while (eptr != NULL) { - if (zzlLexValueLteMax(eptr,&range)) { + if (zzlLexValueLteMax(eptr,range)) { /* Check if score >= min. */ - if (zzlLexValueGteMin(eptr,&range)) + if (zzlLexValueGteMin(eptr,range)) return eptr; return NULL; } @@ -2373,17 +2386,22 @@ void zlexcountCommand(redisClient *c) { /* Lookup the sorted set */ if ((zobj = lookupKeyReadOrReply(c, key, shared.czero)) == NULL || - checkType(c, zobj, REDIS_ZSET)) return; + checkType(c, zobj, REDIS_ZSET)) + { + zslFreeLexRange(&range); + return; + } if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = zobj->ptr; unsigned char *eptr, *sptr; /* Use the first element in range as the starting point */ - eptr = zzlFirstInLexRange(zl,range); + eptr = zzlFirstInLexRange(zl,&range); /* No "first" element */ if (eptr == NULL) { + zslFreeLexRange(&range); addReply(c, shared.czero); return; } @@ -2429,6 +2447,7 @@ void zlexcountCommand(redisClient *c) { redisPanic("Unknown sorted set encoding"); } + zslFreeLexRange(&range); addReplyLongLong(c, count); } @@ -2468,6 +2487,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) { (getLongFromObjectOrReply(c, c->argv[pos+2], &limit, NULL) != REDIS_OK)) return; pos += 3; remaining -= 3; } else { + zslFreeLexRange(&range); addReply(c,shared.syntaxerr); return; } @@ -2476,7 +2496,11 @@ void genericZrangebylexCommand(redisClient *c, int reverse) { /* Ok, lookup the key and get the range */ if ((zobj = lookupKeyReadOrReply(c,key,shared.emptymultibulk)) == NULL || - checkType(c,zobj,REDIS_ZSET)) return; + checkType(c,zobj,REDIS_ZSET)) + { + zslFreeLexRange(&range); + return; + } if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { unsigned char *zl = zobj->ptr; @@ -2487,14 +2511,15 @@ void genericZrangebylexCommand(redisClient *c, int reverse) { /* If reversed, get the last node in range as starting point. */ if (reverse) { - eptr = zzlLastInLexRange(zl,range); + eptr = zzlLastInLexRange(zl,&range); } else { - eptr = zzlFirstInLexRange(zl,range); + eptr = zzlFirstInLexRange(zl,&range); } /* No "first" element in the specified interval. */ if (eptr == NULL) { addReply(c, shared.emptymultibulk); + zslFreeLexRange(&range); return; } @@ -2558,6 +2583,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) { /* No "first" element in the specified interval. */ if (ln == NULL) { addReply(c, shared.emptymultibulk); + zslFreeLexRange(&range); return; } @@ -2598,6 +2624,7 @@ void genericZrangebylexCommand(redisClient *c, int reverse) { redisPanic("Unknown sorted set encoding"); } + zslFreeLexRange(&range); setDeferredMultiBulkLength(c, replylen, rangelen); } From cd3535267724b54fc5a889fdd0c601531e136ef0 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 17 Apr 2014 00:08:11 +0200 Subject: [PATCH 47/58] Basic ZRANGEBYLEX / ZLEXCOUNT tests. --- tests/unit/type/zset.tcl | 56 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index 806f4c88b..098f17841 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -296,6 +296,62 @@ start_server {tags {"zset"}} { assert_error "*not*float*" {r zrangebyscore fooz 1 NaN} } + proc create_default_lex_zset {} { + create_zset zset {0 alpha 0 bar 0 cool 0 down + 0 elephant 0 foo 0 great 0 hill + 0 omega} + } + + test "ZRANGEBYLEX/ZREVRANGEBYLEX/ZCOUNT basics" { + create_default_lex_zset + + # inclusive range + assert_equal {alpha bar cool} [r zrangebylex zset - \[cool] + assert_equal {bar cool down} [r zrangebylex zset \[bar \[down] + assert_equal {great hill omega} [r zrangebylex zset \[g +] + assert_equal {cool bar alpha} [r zrevrangebylex zset \[cool -] + assert_equal {down cool bar} [r zrevrangebylex zset \[down \[bar] + assert_equal {omega hill great foo elephant down} [r zrevrangebylex zset + \[d] + assert_equal 3 [r zlexcount zset \[ele \[h] + + # exclusive range + assert_equal {alpha bar} [r zrangebylex zset - (cool] + assert_equal {cool} [r zrangebylex zset (bar (down] + assert_equal {hill omega} [r zrangebylex zset (great +] + assert_equal {bar alpha} [r zrevrangebylex zset (cool -] + assert_equal {cool} [r zrevrangebylex zset (down (bar] + assert_equal {omega hill} [r zrevrangebylex zset + (great] + assert_equal 2 [r zlexcount zset (ele (great] + + # inclusive and exclusive + assert_equal {} [r zrangebylex zset (az (b] + assert_equal {} [r zrangebylex zset (z +] + assert_equal {} [r zrangebylex zset - \[aaaa] + assert_equal {} [r zrevrangebylex zset \[elez \[elex] + assert_equal {} [r zrevrangebylex zset (hill (omega] + } + + test "ZRANGEBYSLEX with LIMIT" { + create_default_lex_zset + assert_equal {alpha bar} [r zrangebylex zset - \[cool LIMIT 0 2] + assert_equal {bar cool} [r zrangebylex zset - \[cool LIMIT 1 2] + assert_equal {} [r zrangebylex zset \[bar \[down LIMIT 0 0] + assert_equal {} [r zrangebylex zset \[bar \[down LIMIT 2 0] + assert_equal {bar} [r zrangebylex zset \[bar \[down LIMIT 0 1] + assert_equal {cool} [r zrangebylex zset \[bar \[down LIMIT 1 1] + assert_equal {bar cool down} [r zrangebylex zset \[bar \[down LIMIT 0 100] + assert_equal {omega hill great foo elephant} [r zrevrangebylex zset + \[d LIMIT 0 5] + assert_equal {omega hill great foo} [r zrevrangebylex zset + \[d LIMIT 0 4] + } + + test "ZRANGEBYLEX with invalid lex range specifiers" { + assert_error "*not*string*" {r zrangebylex fooz foo bar} + assert_error "*not*string*" {r zrangebylex fooz \[foo bar} + assert_error "*not*string*" {r zrangebylex fooz foo \[bar} + assert_error "*not*string*" {r zrangebylex fooz +x \[bar} + assert_error "*not*string*" {r zrangebylex fooz -x \[bar} + } + test "ZREMRANGEBYSCORE basics" { proc remrangebyscore {min max} { create_zset zset {1 a 2 b 3 c 4 d 5 e} From 00e9dc8b7549e4db44ba0e8c4175584530bbde37 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 17 Apr 2014 10:25:58 +0200 Subject: [PATCH 48/58] Sorted set lex ranges stress tester. --- tests/unit/type/zset.tcl | 65 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index 098f17841..b3d593c3d 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -764,6 +764,71 @@ start_server {tags {"zset"}} { assert_equal {} $err } + test "ZRANGEBYLEX fuzzy test, 200 ranges in $elements element sorted set - $encoding" { + set lexset {} + r del zset + for {set j 0} {$j < $elements} {incr j} { + set e [randstring 0 30 alpha] + lappend lexset $e + r zadd zset 0 $e + } + set lexset [lsort -unique $lexset] + for {set j 0} {$j < 100} {incr j} { + set min [randstring 0 30 alpha] + set max [randstring 0 30 alpha] + set mininc [randomInt 2] + set maxinc [randomInt 2] + if {$mininc} {set cmin "\[$min"} else {set cmin "($min"} + if {$maxinc} {set cmax "\[$max"} else {set cmax "($max"} + set rev [randomInt 2] + if {$rev} { + set cmd zrevrangebylex + } else { + set cmd zrangebylex + } + + # Make sure data is the same in both sides + assert {[r zrange zset 0 -1] eq $lexset} + + # Get the Redis output + set output [r $cmd zset $cmin $cmax] + if {$rev} { + set outlen [r zlexcount zset $cmax $cmin] + } else { + set outlen [r zlexcount zset $cmin $cmax] + } + + # Compute the same output via Tcl + set o {} + set copy $lexset + if {(!$rev && [string compare $min $max] > 0) || + ($rev && [string compare $max $min] > 0)} { + # Empty output when ranges are inverted. + } else { + if {$rev} { + # Invert the Tcl array using Redis itself. + set copy [r zrevrange zset 0 -1] + # Invert min / max as well + lassign [list $min $max $mininc $maxinc] \ + max min maxinc mininc + } + foreach e $copy { + set mincmp [string compare $e $min] + set maxcmp [string compare $e $max] + if { + ($mininc && $mincmp >= 0 || !$mininc && $mincmp > 0) + && + ($maxinc && $maxcmp <= 0 || !$maxinc && $maxcmp < 0) + } { + lappend o $e + } + } + } + assert {$o eq $output} + assert {$outlen eq [llength $output]} + } + } + test "ZSETs skiplist implementation backlink consistency test - $encoding" { set diff 0 for {set j 0} {$j < $elements} {incr j} { From 0711f61616dae8876a927ac9c22d370ca2b65853 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 17 Apr 2014 14:19:14 +0200 Subject: [PATCH 49/58] ZREMRANGE* commands refactored into a single generic function. --- src/t_zset.c | 123 ++++++++++++++++++++++++--------------------------- 1 file changed, 58 insertions(+), 65 deletions(-) diff --git a/src/t_zset.c b/src/t_zset.c index a2ef31479..1214043b4 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -1313,31 +1313,74 @@ void zremCommand(redisClient *c) { addReplyLongLong(c,deleted); } -void zremrangebyscoreCommand(redisClient *c) { +/* Implements ZREMRANGEBYRANK, ZREMRANGEBYSCORE, ZREMRANGEBYLEX commands. */ +#define ZRANGE_RANK 0 +#define ZRANGE_SCORE 1 +#define ZRANGE_LEX 2 +void zremrangeGenericCommand(redisClient *c, int rangetype) { robj *key = c->argv[1]; robj *zobj; - zrangespec range; int keyremoved = 0; unsigned long deleted; + zrangespec range; + long start, end, llen; - /* Parse the range arguments. */ - if (zslParseRange(c->argv[2],c->argv[3],&range) != REDIS_OK) { - addReplyError(c,"min or max is not a float"); - return; + /* Step 1: Parse the range. */ + if (rangetype == ZRANGE_RANK) { + if ((getLongFromObjectOrReply(c,c->argv[2],&start,NULL) != REDIS_OK) || + (getLongFromObjectOrReply(c,c->argv[3],&end,NULL) != REDIS_OK)) + return; + } else if (rangetype == ZRANGE_SCORE) { + if (zslParseRange(c->argv[2],c->argv[3],&range) != REDIS_OK) { + addReplyError(c,"min or max is not a float"); + return; + } } + /* Step 2: Lookup & range sanity checks if needed. */ if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL || checkType(c,zobj,REDIS_ZSET)) return; + if (rangetype == ZRANGE_RANK) { + /* Sanitize indexes. */ + llen = zsetLength(zobj); + if (start < 0) start = llen+start; + if (end < 0) end = llen+end; + if (start < 0) start = 0; + + /* Invariant: start >= 0, so this test will be true when end < 0. + * The range is empty when start > end or start >= length. */ + if (start > end || start >= llen) { + addReply(c,shared.czero); + return; + } + if (end >= llen) end = llen-1; + } + + /* Step 3: Perform the range deletion operation. */ if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { - zobj->ptr = zzlDeleteRangeByScore(zobj->ptr,range,&deleted); + switch(rangetype) { + case ZRANGE_RANK: + zobj->ptr = zzlDeleteRangeByRank(zobj->ptr,start+1,end+1,&deleted); + break; + case ZRANGE_SCORE: + zobj->ptr = zzlDeleteRangeByScore(zobj->ptr,range,&deleted); + break; + } if (zzlLength(zobj->ptr) == 0) { dbDelete(c->db,key); keyremoved = 1; } } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) { zset *zs = zobj->ptr; - deleted = zslDeleteRangeByScore(zs->zsl,range,zs->dict); + switch(rangetype) { + case ZRANGE_RANK: + deleted = zslDeleteRangeByRank(zs->zsl,start+1,end+1,zs->dict); + break; + case ZRANGE_SCORE: + deleted = zslDeleteRangeByScore(zs->zsl,range,zs->dict); + break; + } if (htNeedsResize(zs->dict)) dictResize(zs->dict); if (dictSize(zs->dict) == 0) { dbDelete(c->db,key); @@ -1347,9 +1390,11 @@ void zremrangebyscoreCommand(redisClient *c) { redisPanic("Unknown sorted set encoding"); } + /* Step 4: Notifications and reply. */ if (deleted) { + char *event[3] = {"zremrangebyrank","zremrangebyscore","zremrangebylex"}; signalModifiedKey(c->db,key); - notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,"zrembyscore",key,c->db->id); + notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,event[rangetype],key,c->db->id); if (keyremoved) notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",key,c->db->id); } @@ -1358,63 +1403,11 @@ void zremrangebyscoreCommand(redisClient *c) { } void zremrangebyrankCommand(redisClient *c) { - robj *key = c->argv[1]; - robj *zobj; - long start; - long end; - int llen; - unsigned long deleted; - int keyremoved = 0; + zremrangeGenericCommand(c,ZRANGE_RANK); +} - if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != REDIS_OK) || - (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != REDIS_OK)) return; - - if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL || - checkType(c,zobj,REDIS_ZSET)) return; - - /* Sanitize indexes. */ - llen = zsetLength(zobj); - if (start < 0) start = llen+start; - if (end < 0) end = llen+end; - if (start < 0) start = 0; - - /* Invariant: start >= 0, so this test will be true when end < 0. - * The range is empty when start > end or start >= length. */ - if (start > end || start >= llen) { - addReply(c,shared.czero); - return; - } - if (end >= llen) end = llen-1; - - if (zobj->encoding == REDIS_ENCODING_ZIPLIST) { - /* Correct for 1-based rank. */ - zobj->ptr = zzlDeleteRangeByRank(zobj->ptr,start+1,end+1,&deleted); - if (zzlLength(zobj->ptr) == 0) { - dbDelete(c->db,key); - keyremoved = 1; - } - } else if (zobj->encoding == REDIS_ENCODING_SKIPLIST) { - zset *zs = zobj->ptr; - - /* Correct for 1-based rank. */ - deleted = zslDeleteRangeByRank(zs->zsl,start+1,end+1,zs->dict); - if (htNeedsResize(zs->dict)) dictResize(zs->dict); - if (dictSize(zs->dict) == 0) { - dbDelete(c->db,key); - keyremoved = 1; - } - } else { - redisPanic("Unknown sorted set encoding"); - } - - if (deleted) { - signalModifiedKey(c->db,key); - notifyKeyspaceEvent(REDIS_NOTIFY_ZSET,"zrembyrank",key,c->db->id); - if (keyremoved) - notifyKeyspaceEvent(REDIS_NOTIFY_GENERIC,"del",key,c->db->id); - } - server.dirty += deleted; - addReplyLongLong(c,deleted); +void zremrangebyscoreCommand(redisClient *c) { + zremrangeGenericCommand(c,ZRANGE_SCORE); } typedef struct { From 07ea1eb0836a84ecf6ba232ab0b2e153b9befb6b Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 17 Apr 2014 14:30:12 +0200 Subject: [PATCH 50/58] Always pass sorted set range objects by reference. --- src/db.c | 6 ++-- src/redis.h | 4 +-- src/t_zset.c | 90 +++++++++++++++++++++++++++------------------------- 3 files changed, 51 insertions(+), 49 deletions(-) diff --git a/src/db.c b/src/db.c index f80ef2aca..6ebfe6363 100644 --- a/src/db.c +++ b/src/db.c @@ -1143,7 +1143,7 @@ unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int coun range.min = range.max = hashslot; range.minex = range.maxex = 0; - n = zslFirstInRange(server.cluster->slots_to_keys, range); + n = zslFirstInRange(server.cluster->slots_to_keys, &range); while(n && n->score == hashslot && count--) { keys[j++] = n->obj; n = n->level[0].forward; @@ -1161,7 +1161,7 @@ unsigned int countKeysInSlot(unsigned int hashslot) { range.minex = range.maxex = 0; /* Find first element in range */ - zn = zslFirstInRange(zsl, range); + zn = zslFirstInRange(zsl, &range); /* Use rank of first element, if any, to determine preliminary count */ if (zn != NULL) { @@ -1169,7 +1169,7 @@ unsigned int countKeysInSlot(unsigned int hashslot) { count = (zsl->length - (rank - 1)); /* Find last element in range */ - zn = zslLastInRange(zsl, range); + zn = zslLastInRange(zsl, &range); /* Use rank of last element, if any, to determine the actual count */ if (zn != NULL) { diff --git a/src/redis.h b/src/redis.h index af25d9be8..2166a2557 100644 --- a/src/redis.h +++ b/src/redis.h @@ -1151,8 +1151,8 @@ void zslFree(zskiplist *zsl); zskiplistNode *zslInsert(zskiplist *zsl, double score, robj *obj); unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score); int zslDelete(zskiplist *zsl, double score, robj *obj); -zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec range); -zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec range); +zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range); +zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range); double zzlGetScore(unsigned char *sptr); void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr); void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr); diff --git a/src/t_zset.c b/src/t_zset.c index 1214043b4..5e9354d7a 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -235,18 +235,18 @@ int zslIsInRange(zskiplist *zsl, zrangespec *range) { /* Find the first node that is contained in the specified range. * Returns NULL when no element is contained in the range. */ -zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec range) { +zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range) { zskiplistNode *x; int i; /* If everything is out of range, return early. */ - if (!zslIsInRange(zsl,&range)) return NULL; + if (!zslIsInRange(zsl,range)) return NULL; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { /* Go forward while *OUT* of range. */ while (x->level[i].forward && - !zslValueGteMin(x->level[i].forward->score,&range)) + !zslValueGteMin(x->level[i].forward->score,range)) x = x->level[i].forward; } @@ -255,24 +255,24 @@ zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec range) { redisAssert(x != NULL); /* Check if score <= max. */ - if (!zslValueLteMax(x->score,&range)) return NULL; + if (!zslValueLteMax(x->score,range)) return NULL; return x; } /* Find the last node that is contained in the specified range. * Returns NULL when no element is contained in the range. */ -zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec range) { +zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range) { zskiplistNode *x; int i; /* If everything is out of range, return early. */ - if (!zslIsInRange(zsl,&range)) return NULL; + if (!zslIsInRange(zsl,range)) return NULL; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { /* Go forward while *IN* range. */ while (x->level[i].forward && - zslValueLteMax(x->level[i].forward->score,&range)) + zslValueLteMax(x->level[i].forward->score,range)) x = x->level[i].forward; } @@ -280,7 +280,7 @@ zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec range) { redisAssert(x != NULL); /* Check if score >= min. */ - if (!zslValueGteMin(x->score,&range)) return NULL; + if (!zslValueGteMin(x->score,range)) return NULL; return x; } @@ -288,16 +288,16 @@ zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec range) { * Min and max are inclusive, so a score >= min || score <= max is deleted. * Note that this function takes the reference to the hash table view of the * sorted set, in order to remove the elements from the hash table too. */ -unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec range, dict *dict) { +unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dict) { zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; unsigned long removed = 0; int i; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { - while (x->level[i].forward && (range.minex ? - x->level[i].forward->score <= range.min : - x->level[i].forward->score < range.min)) + while (x->level[i].forward && (range->minex ? + x->level[i].forward->score <= range->min : + x->level[i].forward->score < range->min)) x = x->level[i].forward; update[i] = x; } @@ -306,7 +306,9 @@ unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec range, dict *dict x = x->level[0].forward; /* Delete nodes while in range. */ - while (x && (range.maxex ? x->score < range.max : x->score <= range.max)) { + while (x && + (range->maxex ? x->score < range->max : x->score <= range->max)) + { zskiplistNode *next = x->level[0].forward; zslDeleteNode(zsl,x,update); dictDelete(dict,x->obj); @@ -547,18 +549,18 @@ int zslIsInLexRange(zskiplist *zsl, zlexrangespec *range) { /* Find the first node that is contained in the specified lex range. * Returns NULL when no element is contained in the range. */ -zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec range) { +zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range) { zskiplistNode *x; int i; /* If everything is out of range, return early. */ - if (!zslIsInLexRange(zsl,&range)) return NULL; + if (!zslIsInLexRange(zsl,range)) return NULL; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { /* Go forward while *OUT* of range. */ while (x->level[i].forward && - !zslLexValueGteMin(x->level[i].forward->obj,&range)) + !zslLexValueGteMin(x->level[i].forward->obj,range)) x = x->level[i].forward; } @@ -567,24 +569,24 @@ zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec range) { redisAssert(x != NULL); /* Check if score <= max. */ - if (!zslLexValueLteMax(x->obj,&range)) return NULL; + if (!zslLexValueLteMax(x->obj,range)) return NULL; return x; } /* Find the last node that is contained in the specified range. * Returns NULL when no element is contained in the range. */ -zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec range) { +zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range) { zskiplistNode *x; int i; /* If everything is out of range, return early. */ - if (!zslIsInLexRange(zsl,&range)) return NULL; + if (!zslIsInLexRange(zsl,range)) return NULL; x = zsl->header; for (i = zsl->level-1; i >= 0; i--) { /* Go forward while *IN* range. */ while (x->level[i].forward && - zslLexValueLteMax(x->level[i].forward->obj,&range)) + zslLexValueLteMax(x->level[i].forward->obj,range)) x = x->level[i].forward; } @@ -592,7 +594,7 @@ zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec range) { redisAssert(x != NULL); /* Check if score >= min. */ - if (!zslLexValueGteMin(x->obj,&range)) return NULL; + if (!zslLexValueGteMin(x->obj,range)) return NULL; return x; } @@ -730,21 +732,21 @@ int zzlIsInRange(unsigned char *zl, zrangespec *range) { /* Find pointer to the first element contained in the specified range. * Returns NULL when no element is contained in the range. */ -unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec range) { +unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range) { unsigned char *eptr = ziplistIndex(zl,0), *sptr; double score; /* If everything is out of range, return early. */ - if (!zzlIsInRange(zl,&range)) return NULL; + if (!zzlIsInRange(zl,range)) return NULL; while (eptr != NULL) { sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); score = zzlGetScore(sptr); - if (zslValueGteMin(score,&range)) { + if (zslValueGteMin(score,range)) { /* Check if score <= max. */ - if (zslValueLteMax(score,&range)) + if (zslValueLteMax(score,range)) return eptr; return NULL; } @@ -758,21 +760,21 @@ unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec range) { /* Find pointer to the last element contained in the specified range. * Returns NULL when no element is contained in the range. */ -unsigned char *zzlLastInRange(unsigned char *zl, zrangespec range) { +unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range) { unsigned char *eptr = ziplistIndex(zl,-2), *sptr; double score; /* If everything is out of range, return early. */ - if (!zzlIsInRange(zl,&range)) return NULL; + if (!zzlIsInRange(zl,range)) return NULL; while (eptr != NULL) { sptr = ziplistNext(zl,eptr); redisAssert(sptr != NULL); score = zzlGetScore(sptr); - if (zslValueLteMax(score,&range)) { + if (zslValueLteMax(score,range)) { /* Check if score >= min. */ - if (zslValueGteMin(score,&range)) + if (zslValueGteMin(score,range)) return eptr; return NULL; } @@ -977,7 +979,7 @@ unsigned char *zzlInsert(unsigned char *zl, robj *ele, double score) { return zl; } -unsigned char *zzlDeleteRangeByScore(unsigned char *zl, zrangespec range, unsigned long *deleted) { +unsigned char *zzlDeleteRangeByScore(unsigned char *zl, zrangespec *range, unsigned long *deleted) { unsigned char *eptr, *sptr; double score; unsigned long num = 0; @@ -991,7 +993,7 @@ unsigned char *zzlDeleteRangeByScore(unsigned char *zl, zrangespec range, unsign * byte and ziplistNext will return NULL. */ while ((sptr = ziplistNext(zl,eptr)) != NULL) { score = zzlGetScore(sptr); - if (zslValueLteMax(score,&range)) { + if (zslValueLteMax(score,range)) { /* Delete both the element and the score. */ zl = ziplistDelete(zl,&eptr); zl = ziplistDelete(zl,&eptr); @@ -1364,7 +1366,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) { zobj->ptr = zzlDeleteRangeByRank(zobj->ptr,start+1,end+1,&deleted); break; case ZRANGE_SCORE: - zobj->ptr = zzlDeleteRangeByScore(zobj->ptr,range,&deleted); + zobj->ptr = zzlDeleteRangeByScore(zobj->ptr,&range,&deleted); break; } if (zzlLength(zobj->ptr) == 0) { @@ -1378,7 +1380,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) { deleted = zslDeleteRangeByRank(zs->zsl,start+1,end+1,zs->dict); break; case ZRANGE_SCORE: - deleted = zslDeleteRangeByScore(zs->zsl,range,zs->dict); + deleted = zslDeleteRangeByScore(zs->zsl,&range,zs->dict); break; } if (htNeedsResize(zs->dict)) dictResize(zs->dict); @@ -2153,9 +2155,9 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) { /* If reversed, get the last node in range as starting point. */ if (reverse) { - eptr = zzlLastInRange(zl,range); + eptr = zzlLastInRange(zl,&range); } else { - eptr = zzlFirstInRange(zl,range); + eptr = zzlFirstInRange(zl,&range); } /* No "first" element in the specified interval. */ @@ -2221,9 +2223,9 @@ void genericZrangebyscoreCommand(redisClient *c, int reverse) { /* If reversed, get the last node in range as starting point. */ if (reverse) { - ln = zslLastInRange(zsl,range); + ln = zslLastInRange(zsl,&range); } else { - ln = zslFirstInRange(zsl,range); + ln = zslFirstInRange(zsl,&range); } /* No "first" element in the specified interval. */ @@ -2310,7 +2312,7 @@ void zcountCommand(redisClient *c) { double score; /* Use the first element in range as the starting point */ - eptr = zzlFirstInRange(zl,range); + eptr = zzlFirstInRange(zl,&range); /* No "first" element */ if (eptr == NULL) { @@ -2342,7 +2344,7 @@ void zcountCommand(redisClient *c) { unsigned long rank; /* Find first element in range */ - zn = zslFirstInRange(zsl, range); + zn = zslFirstInRange(zsl, &range); /* Use rank of first element, if any, to determine preliminary count */ if (zn != NULL) { @@ -2350,7 +2352,7 @@ void zcountCommand(redisClient *c) { count = (zsl->length - (rank - 1)); /* Find last element in range */ - zn = zslLastInRange(zsl, range); + zn = zslLastInRange(zsl, &range); /* Use rank of last element, if any, to determine the actual count */ if (zn != NULL) { @@ -2420,7 +2422,7 @@ void zlexcountCommand(redisClient *c) { unsigned long rank; /* Find first element in range */ - zn = zslFirstInLexRange(zsl, range); + zn = zslFirstInLexRange(zsl, &range); /* Use rank of first element, if any, to determine preliminary count */ if (zn != NULL) { @@ -2428,7 +2430,7 @@ void zlexcountCommand(redisClient *c) { count = (zsl->length - (rank - 1)); /* Find last element in range */ - zn = zslLastInLexRange(zsl, range); + zn = zslLastInLexRange(zsl, &range); /* Use rank of last element, if any, to determine the actual count */ if (zn != NULL) { @@ -2568,9 +2570,9 @@ void genericZrangebylexCommand(redisClient *c, int reverse) { /* If reversed, get the last node in range as starting point. */ if (reverse) { - ln = zslLastInLexRange(zsl,range); + ln = zslLastInLexRange(zsl,&range); } else { - ln = zslFirstInLexRange(zsl,range); + ln = zslFirstInLexRange(zsl,&range); } /* No "first" element in the specified interval. */ From 0ab40d14df04a58df2c6c1d482f79abfcc235611 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 17 Apr 2014 14:47:52 +0200 Subject: [PATCH 51/58] ZREMRANGEBYLEX implemented. --- src/redis.c | 1 + src/redis.h | 1 + src/t_zset.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) diff --git a/src/redis.c b/src/redis.c index 8898ad3e5..d7b9c72dc 100644 --- a/src/redis.c +++ b/src/redis.c @@ -171,6 +171,7 @@ struct redisCommand redisCommandTable[] = { {"zrem",zremCommand,-3,"w",0,NULL,1,1,1,0,0}, {"zremrangebyscore",zremrangebyscoreCommand,4,"w",0,NULL,1,1,1,0,0}, {"zremrangebyrank",zremrangebyrankCommand,4,"w",0,NULL,1,1,1,0,0}, + {"zremrangebylex",zremrangebylexCommand,4,"w",0,NULL,1,1,1,0,0}, {"zunionstore",zunionstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0}, {"zinterstore",zinterstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0}, {"zrange",zrangeCommand,-4,"r",0,NULL,1,1,1,0,0}, diff --git a/src/redis.h b/src/redis.h index 2166a2557..d0cb88f88 100644 --- a/src/redis.h +++ b/src/redis.h @@ -1406,6 +1406,7 @@ void zcardCommand(redisClient *c); void zremCommand(redisClient *c); void zscoreCommand(redisClient *c); void zremrangebyscoreCommand(redisClient *c); +void zremrangebylexCommand(redisClient *c); void multiCommand(redisClient *c); void execCommand(redisClient *c); void discardCommand(redisClient *c); diff --git a/src/t_zset.c b/src/t_zset.c index 5e9354d7a..40dc00c27 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -52,6 +52,9 @@ #include "redis.h" #include +static int zslLexValueGteMin(robj *value, zlexrangespec *spec); +static int zslLexValueLteMax(robj *value, zlexrangespec *spec); + zskiplistNode *zslCreateNode(int level, double score, robj *obj) { zskiplistNode *zn = zmalloc(sizeof(*zn)+level*sizeof(struct zskiplistLevel)); zn->score = score; @@ -319,6 +322,35 @@ unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dic return removed; } +unsigned long zslDeleteRangeByLex(zskiplist *zsl, zlexrangespec *range, dict *dict) { + zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x; + unsigned long removed = 0; + int i; + + + x = zsl->header; + for (i = zsl->level-1; i >= 0; i--) { + while (x->level[i].forward && + !zslLexValueGteMin(x->level[i].forward->obj,range)) + x = x->level[i].forward; + update[i] = x; + } + + /* Current node is the last with score < or <= min. */ + x = x->level[0].forward; + + /* Delete nodes while in range. */ + while (x && zslLexValueLteMax(x->obj,range)) { + zskiplistNode *next = x->level[0].forward; + zslDeleteNode(zsl,x,update); + dictDelete(dict,x->obj); + zslFreeNode(x); + removed++; + x = next; + } + return removed; +} + /* Delete all the elements with rank between start and end from the skiplist. * Start and end are inclusive. Note that start and end need to be 1-based */ unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) { @@ -1008,6 +1040,33 @@ unsigned char *zzlDeleteRangeByScore(unsigned char *zl, zrangespec *range, unsig return zl; } +unsigned char *zzlDeleteRangeByLex(unsigned char *zl, zlexrangespec *range, unsigned long *deleted) { + unsigned char *eptr, *sptr; + unsigned long num = 0; + + if (deleted != NULL) *deleted = 0; + + eptr = zzlFirstInLexRange(zl,range); + if (eptr == NULL) return zl; + + /* When the tail of the ziplist is deleted, eptr will point to the sentinel + * byte and ziplistNext will return NULL. */ + while ((sptr = ziplistNext(zl,eptr)) != NULL) { + if (zzlLexValueLteMax(eptr,range)) { + /* Delete both the element and the score. */ + zl = ziplistDelete(zl,&eptr); + zl = ziplistDelete(zl,&eptr); + num++; + } else { + /* No longer in range. */ + break; + } + } + + if (deleted != NULL) *deleted = num; + return zl; +} + /* Delete all the elements with rank between start and end from the skiplist. * Start and end are inclusive. Note that start and end need to be 1-based */ unsigned char *zzlDeleteRangeByRank(unsigned char *zl, unsigned int start, unsigned int end, unsigned long *deleted) { @@ -1325,6 +1384,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) { int keyremoved = 0; unsigned long deleted; zrangespec range; + zlexrangespec lexrange; long start, end, llen; /* Step 1: Parse the range. */ @@ -1337,6 +1397,11 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) { addReplyError(c,"min or max is not a float"); return; } + } else if (rangetype == ZRANGE_LEX) { + if (zslParseLexRange(c->argv[2],c->argv[3],&lexrange) != REDIS_OK) { + addReplyError(c,"min or max not valid string range item"); + return; + } } /* Step 2: Lookup & range sanity checks if needed. */ @@ -1368,6 +1433,9 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) { case ZRANGE_SCORE: zobj->ptr = zzlDeleteRangeByScore(zobj->ptr,&range,&deleted); break; + case ZRANGE_LEX: + zobj->ptr = zzlDeleteRangeByLex(zobj->ptr,&lexrange,&deleted); + break; } if (zzlLength(zobj->ptr) == 0) { dbDelete(c->db,key); @@ -1382,6 +1450,9 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) { case ZRANGE_SCORE: deleted = zslDeleteRangeByScore(zs->zsl,&range,zs->dict); break; + case ZRANGE_LEX: + deleted = zslDeleteRangeByLex(zs->zsl,&lexrange,zs->dict); + break; } if (htNeedsResize(zs->dict)) dictResize(zs->dict); if (dictSize(zs->dict) == 0) { @@ -1412,6 +1483,10 @@ void zremrangebyscoreCommand(redisClient *c) { zremrangeGenericCommand(c,ZRANGE_SCORE); } +void zremrangebylexCommand(redisClient *c) { + zremrangeGenericCommand(c,ZRANGE_LEX); +} + typedef struct { robj *subject; int type; /* Set, sorted set */ From e70c3b6c9ba690df5c0268449316512f85ec3ba6 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 17 Apr 2014 17:08:43 +0200 Subject: [PATCH 52/58] HyperLogLog low level merge extracted from PFMERGE. --- src/hyperloglog.c | 93 +++++++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 39 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 838c921db..1b984c15f 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -991,6 +991,55 @@ int hllAdd(robj *o, unsigned char *ele, size_t elesize) { } } +/* Merge by computing MAX(registers[i],hll[i]) the HyperLogLog 'hll' + * with an array of uint8_t HLL_REGISTERS registers pointed by 'max'. + * + * The hll object must be already validated via isHLLObjectOrReply() + * or in some other way. + * + * If the HyperLogLog is sparse and is found to be invalid, REDIS_ERR + * is returned, otherwise the function always succeeds. */ +int hllMerge(uint8_t *max, robj *hll) { + struct hllhdr *hdr = hll->ptr; + int i; + + if (hdr->encoding == HLL_DENSE) { + uint8_t val; + + for (i = 0; i < HLL_REGISTERS; i++) { + HLL_DENSE_GET_REGISTER(val,hdr->registers,i); + if (val > max[i]) max[i] = val; + } + } else { + uint8_t *p = hll->ptr, *end = p + sdslen(hll->ptr); + long runlen, regval; + + p += HLL_HDR_SIZE; + i = 0; + while(p < end) { + if (HLL_SPARSE_IS_ZERO(p)) { + runlen = HLL_SPARSE_ZERO_LEN(p); + i += runlen; + p++; + } else if (HLL_SPARSE_IS_XZERO(p)) { + runlen = HLL_SPARSE_XZERO_LEN(p); + i += runlen; + p += 2; + } else { + runlen = HLL_SPARSE_VAL_LEN(p); + regval = HLL_SPARSE_VAL_VALUE(p); + while(runlen--) { + if (regval > max[i]) max[i] = regval; + i++; + } + p++; + } + } + if (i != HLL_REGISTERS) return REDIS_ERR; + } + return REDIS_OK; +} + /* ========================== HyperLogLog commands ========================== */ /* Create an HLL object. We always create the HLL using sparse encoding. @@ -1157,7 +1206,7 @@ void pfcountCommand(redisClient *c) { void pfmergeCommand(redisClient *c) { uint8_t max[HLL_REGISTERS]; struct hllhdr *hdr; - int j, i; + int j; /* Compute an HLL with M[i] = MAX(M[i]_j). * We we the maximum into the max array of registers. We'll write @@ -1171,43 +1220,9 @@ void pfmergeCommand(redisClient *c) { /* Merge with this HLL with our 'max' HHL by setting max[i] * to MAX(max[i],hll[i]). */ - hdr = o->ptr; - if (hdr->encoding == HLL_DENSE) { - uint8_t val; - - for (i = 0; i < HLL_REGISTERS; i++) { - HLL_DENSE_GET_REGISTER(val,hdr->registers,i); - if (val > max[i]) max[i] = val; - } - } else { - uint8_t *p = o->ptr, *end = p + sdslen(o->ptr); - long runlen, regval; - - p += HLL_HDR_SIZE; - i = 0; - while(p < end) { - if (HLL_SPARSE_IS_ZERO(p)) { - runlen = HLL_SPARSE_ZERO_LEN(p); - i += runlen; - p++; - } else if (HLL_SPARSE_IS_XZERO(p)) { - runlen = HLL_SPARSE_XZERO_LEN(p); - i += runlen; - p += 2; - } else { - runlen = HLL_SPARSE_VAL_LEN(p); - regval = HLL_SPARSE_VAL_VALUE(p); - while(runlen--) { - if (regval > max[i]) max[i] = regval; - i++; - } - p++; - } - } - if (i != HLL_REGISTERS) { - addReplySds(c,sdsnew(invalid_hll_err)); - return; - } + if (hllMerge(max,o) == REDIS_ERR) { + addReplySds(c,sdsnew(invalid_hll_err)); + return; } } @@ -1241,7 +1256,7 @@ void pfmergeCommand(redisClient *c) { HLL_INVALIDATE_CACHE(hdr); signalModifiedKey(c->db,c->argv[1]); - /* We generate an HLLADD event for HLLMERGE for semantical simplicity + /* We generate an PFADD event for PFMERGE for semantical simplicity * since in theory this is a mass-add of elements. */ notifyKeyspaceEvent(REDIS_NOTIFY_STRING,"pfadd",c->argv[1],c->db->id); server.dirty++; From e841ecc3df801172b0e021e8f2e19110761ef74a Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 17 Apr 2014 17:29:04 +0200 Subject: [PATCH 53/58] PFCOUNT support for multi-key union. --- src/hyperloglog.c | 77 ++++++++++++++++++++++++++++++++++++++++++++--- src/redis.c | 2 +- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 1b984c15f..a44df87c9 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -198,8 +198,9 @@ struct hllhdr { #define HLL_REGISTER_MAX ((1<registers will point to an uint8_t array of HLL_REGISTERS element. + * This is useful in order to speedup PFCOUNT when called against multiple + * keys (no need to work with 6-bit integers encoding). */ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { double m = HLL_REGISTERS; double E, alpha = 0.7213/(1+1.079/m); @@ -947,9 +973,13 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { /* Compute SUM(2^-register[0..i]). */ if (hdr->encoding == HLL_DENSE) { E = hllDenseSum(hdr->registers,PE,&ez); - } else { + } else if (hdr->encoding == HLL_SPARSE) { E = hllSparseSum(hdr->registers, sdslen((sds)hdr)-HLL_HDR_SIZE,PE,&ez,invalid); + } else if (hdr->encoding == HLL_RAW) { + E = hllRawSum(hdr->registers,PE,&ez); + } else { + redisPanic("Unknown HyperLogLog encoding in hllCount()"); } /* Muliply the inverse of E for alpha_m * m^2 to have the raw estimate. */ @@ -1151,10 +1181,47 @@ void pfaddCommand(redisClient *c) { /* PFCOUNT var -> approximated cardinality of set. */ void pfcountCommand(redisClient *c) { - robj *o = lookupKeyRead(c->db,c->argv[1]); + robj *o; struct hllhdr *hdr; uint64_t card; + /* Case 1: multi-key keys, cardinality of the union. + * + * When multiple keys are specified, PFCOUNT actually computes + * the cardinality of the merge of the N HLLs specified. */ + if (c->argc > 2) { + uint8_t max[HLL_HDR_SIZE+HLL_REGISTERS], *registers; + int j; + + /* Compute an HLL with M[i] = MAX(M[i]_j). */ + memset(max,0,sizeof(max)); + hdr = (struct hllhdr*) max; + hdr->encoding = HLL_RAW; /* Special internal-only encoding. */ + registers = max + HLL_HDR_SIZE; + for (j = 1; j < c->argc; j++) { + /* Check type and size. */ + robj *o = lookupKeyRead(c->db,c->argv[j]); + if (o == NULL) continue; /* Assume empty HLL for non existing var. */ + if (isHLLObjectOrReply(c,o) != REDIS_OK) return; + + /* Merge with this HLL with our 'max' HHL by setting max[i] + * to MAX(max[i],hll[i]). */ + if (hllMerge(registers,o) == REDIS_ERR) { + addReplySds(c,sdsnew(invalid_hll_err)); + return; + } + } + + /* Compute cardinality of the resulting set. */ + addReplyLongLong(c,hllCount(hdr,NULL)); + return; + } + + /* Case 2: cardinality of the single HLL. + * + * The user specified a single key. Either return the cached value + * or compute one and update the cache. */ + o = lookupKeyRead(c->db,c->argv[1]); if (o == NULL) { /* No key? Cardinality is zero since no element was added, otherwise * we would have a key as HLLADD creates it as a side effect. */ diff --git a/src/redis.c b/src/redis.c index d7b9c72dc..88743550f 100644 --- a/src/redis.c +++ b/src/redis.c @@ -274,7 +274,7 @@ struct redisCommand redisCommandTable[] = { {"wait",waitCommand,3,"rs",0,NULL,0,0,0,0,0}, {"pfselftest",pfselftestCommand,1,"r",0,NULL,0,0,0,0,0}, {"pfadd",pfaddCommand,-2,"wm",0,NULL,1,1,1,0,0}, - {"pfcount",pfcountCommand,2,"w",0,NULL,1,1,1,0,0}, + {"pfcount",pfcountCommand,-2,"w",0,NULL,1,1,1,0,0}, {"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0}, {"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0} }; From bb3241f788c01768eeb6dbb2863232f75f670446 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 17 Apr 2014 17:53:20 +0200 Subject: [PATCH 54/58] Speedup SUM(2^-reg[m]) in HyperLogLog computation. When the register is set to zero, we need to add 2^-0 to E, which is 1, but it is faster to just add 'ez' at the end, which is the number of registers set to zero, a value we need to compute anyway. --- src/hyperloglog.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index a44df87c9..c951c09f7 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -546,11 +546,12 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) { HLL_DENSE_GET_REGISTER(reg,registers,j); if (reg == 0) { ez++; - E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ + /* Increment E at the end of the loop. */ } else { E += PE[reg]; /* Precomputed 2^(-reg[j]). */ } } + E += ez; /* Add 2^0 'ez' times. */ } *ezp = ez; return E; @@ -894,13 +895,13 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp, int *i runlen = HLL_SPARSE_ZERO_LEN(p); idx += runlen; ez += runlen; - E += 1*runlen; /* 2^(-reg[j]) is 1 when m is 0. */ + /* Increment E at the end of the loop. */ p++; } else if (HLL_SPARSE_IS_XZERO(p)) { runlen = HLL_SPARSE_XZERO_LEN(p); idx += runlen; ez += runlen; - E += 1*runlen; /* 2^(-reg[j]) is 1 when m is 0. */ + /* Increment E at the end of the loop. */ p += 2; } else { runlen = HLL_SPARSE_VAL_LEN(p); @@ -911,6 +912,7 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp, int *i } } if (idx != HLL_REGISTERS && invalid) *invalid = 1; + E += ez; /* Add 2^0 'ez' times. */ *ezp = ez; return E; } @@ -932,11 +934,13 @@ double hllRawSum(uint8_t *registers, double *PE, int *ezp) { reg = registers[j]; if (reg == 0) { ez++; - E += 1; /* 2^(-reg[j]) is 1 when m is 0. */ + /* Increment E at the end of the loop. */ } else { E += PE[reg]; /* Precomputed 2^(-reg[j]). */ } } + E += ez; /* 2^(-reg[j]) is 1 when m is 0, add it 'ez' times for every + zero register in the HLL. */ *ezp = ez; return E; } From ca8f491e9c438e950659dc03b8fba4462bb70400 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 17 Apr 2014 18:02:45 +0200 Subject: [PATCH 55/58] Speedup hllRawSum() processing 8 bytes per iteration. The internal HLL raw encoding used by PFCOUNT when merging multiple keys is aligned to 8 bits (1 byte per register) so we can exploit this to improve performances by processing multiple bytes per iteration. In benchmarks the new code was several times faster with HLLs with many registers set to zero, while no slowdown was observed with populated HLLs. --- src/hyperloglog.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index c951c09f7..1c6ed45f3 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -928,16 +928,24 @@ double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp, int *i double hllRawSum(uint8_t *registers, double *PE, int *ezp) { double E = 0; int j, ez = 0; - unsigned long reg; + uint64_t *word = (uint64_t*) registers; + uint8_t *bytes; - for (j = 0; j < HLL_REGISTERS; j++) { - reg = registers[j]; - if (reg == 0) { - ez++; - /* Increment E at the end of the loop. */ + for (j = 0; j < HLL_REGISTERS/8; j++) { + if (*word == 0) { + ez += 8; } else { - E += PE[reg]; /* Precomputed 2^(-reg[j]). */ + bytes = (uint8_t*) word; + if (bytes[0]) E += PE[bytes[0]]; else ez++; + if (bytes[1]) E += PE[bytes[1]]; else ez++; + if (bytes[2]) E += PE[bytes[2]]; else ez++; + if (bytes[3]) E += PE[bytes[3]]; else ez++; + if (bytes[4]) E += PE[bytes[4]]; else ez++; + if (bytes[5]) E += PE[bytes[5]]; else ez++; + if (bytes[6]) E += PE[bytes[6]]; else ez++; + if (bytes[7]) E += PE[bytes[7]]; else ez++; } + word++; } E += ez; /* 2^(-reg[j]) is 1 when m is 0, add it 'ez' times for every zero register in the HLL. */ From 531fbda313dd460424ff7dc386c42130eedc9f4b Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 18 Apr 2014 12:36:33 +0200 Subject: [PATCH 56/58] PFCOUNT multi-key test added. --- tests/unit/hyperloglog.tcl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl index 47e3db2a1..af86e68e5 100644 --- a/tests/unit/hyperloglog.tcl +++ b/tests/unit/hyperloglog.tcl @@ -136,6 +136,21 @@ start_server {tags {"hll"}} { r pfcount hll } {5} + test {PFCOUNT multiple-keys merge returns cardinality of union} { + r del hll1 hll2 hll3 + for {set x 1} {$x < 10000} {incr x} { + # Force dense representation of hll2 + r pfadd hll1 "foo-$x" + r pfadd hll2 "bar-$x" + r pfadd hll3 "zap-$x" + + set card [r pfcount hll1 hll2 hll3] + set realcard [expr {$x*3}] + set err [expr {abs($card-$realcard)}] + assert {$err < (double($card)/100)*5} + } + } + test {PFDEBUG GETREG returns the HyperLogLog raw registers} { r del hll r pfadd hll 1 2 3 From 2d43e0264dd713c2d18a3fd1afc0e6b7114210f7 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 18 Apr 2014 13:01:04 +0200 Subject: [PATCH 57/58] ZREMRANGEBYLEX memory leak removed calling zslFreeLexRange(). --- src/t_zset.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/t_zset.c b/src/t_zset.c index 40dc00c27..4e946b4f5 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -1406,7 +1406,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) { /* Step 2: Lookup & range sanity checks if needed. */ if ((zobj = lookupKeyWriteOrReply(c,key,shared.czero)) == NULL || - checkType(c,zobj,REDIS_ZSET)) return; + checkType(c,zobj,REDIS_ZSET)) goto cleanup; if (rangetype == ZRANGE_RANK) { /* Sanitize indexes. */ @@ -1419,7 +1419,7 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) { * The range is empty when start > end or start >= length. */ if (start > end || start >= llen) { addReply(c,shared.czero); - return; + goto cleanup; } if (end >= llen) end = llen-1; } @@ -1473,6 +1473,9 @@ void zremrangeGenericCommand(redisClient *c, int rangetype) { } server.dirty += deleted; addReplyLongLong(c,deleted); + +cleanup: + if (rangetype == ZRANGE_LEX) zslFreeLexRange(&lexrange); } void zremrangebyrankCommand(redisClient *c) { From ed2cc684ab58a0d427c39ef4764028b2f549d459 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 18 Apr 2014 13:02:16 +0200 Subject: [PATCH 58/58] Fuzzy test for ZREMRANGEBYLEX added. --- tests/unit/type/zset.tcl | 42 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tests/unit/type/zset.tcl b/tests/unit/type/zset.tcl index b3d593c3d..9cc840be6 100644 --- a/tests/unit/type/zset.tcl +++ b/tests/unit/type/zset.tcl @@ -764,7 +764,7 @@ start_server {tags {"zset"}} { assert_equal {} $err } - test "ZRANGEBYLEX fuzzy test, 200 ranges in $elements element sorted set - $encoding" { + test "ZRANGEBYLEX fuzzy test, 100 ranges in $elements element sorted set - $encoding" { set lexset {} r del zset for {set j 0} {$j < $elements} {incr j} { @@ -829,6 +829,46 @@ start_server {tags {"zset"}} { } } + test "ZREMRANGEBYLEX fuzzy test, 100 ranges in $elements element sorted set - $encoding" { + set lexset {} + r del zset zsetcopy + for {set j 0} {$j < $elements} {incr j} { + set e [randstring 0 30 alpha] + lappend lexset $e + r zadd zset 0 $e + } + set lexset [lsort -unique $lexset] + for {set j 0} {$j < 100} {incr j} { + # Copy... + r zunionstore zsetcopy 1 zset + set lexsetcopy $lexset + + set min [randstring 0 30 alpha] + set max [randstring 0 30 alpha] + set mininc [randomInt 2] + set maxinc [randomInt 2] + if {$mininc} {set cmin "\[$min"} else {set cmin "($min"} + if {$maxinc} {set cmax "\[$max"} else {set cmax "($max"} + + # Make sure data is the same in both sides + assert {[r zrange zset 0 -1] eq $lexset} + + # Get the range we are going to remove + set torem [r zrangebylex zset $cmin $cmax] + set toremlen [r zlexcount zset $cmin $cmax] + r zremrangebylex zsetcopy $cmin $cmax + set output [r zrange zsetcopy 0 -1] + + # Remove the range with Tcl from the original list + if {$toremlen} { + set first [lsearch -exact $lexsetcopy [lindex $torem 0]] + set last [expr {$first+$toremlen-1}] + set lexsetcopy [lreplace $lexsetcopy $first $last] + } + assert {$lexsetcopy eq $output} + } + } + test "ZSETs skiplist implementation backlink consistency test - $encoding" { set diff 0 for {set j 0} {$j < $elements} {incr j} {