From 9f13b2bd4967334b1701c6eccdf53760cb13f79e Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 14 Mar 2019 14:02:16 -0400 Subject: [PATCH 01/10] Fix hyperloglog corruption --- src/hyperloglog.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index fc21ea006..e993bf26e 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -614,6 +614,10 @@ int hllSparseToDense(robj *o) { } else { runlen = HLL_SPARSE_VAL_LEN(p); regval = HLL_SPARSE_VAL_VALUE(p); + if ((runlen + idx) > HLL_REGISTERS) { + sdsfree(dense); + return C_ERR; + } while(runlen--) { HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval); idx++; @@ -1088,6 +1092,8 @@ int hllMerge(uint8_t *max, robj *hll) { } else { runlen = HLL_SPARSE_VAL_LEN(p); regval = HLL_SPARSE_VAL_VALUE(p); + if ((runlen + i) > HLL_REGISTERS) + return C_ERR; while(runlen--) { if (regval > max[i]) max[i] = regval; i++; From 4208666797b5831eefc022ae46ab5747200cd671 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Mar 2019 13:52:29 +0100 Subject: [PATCH 02/10] HyperLogLog: dense/sparse repr parsing fuzz test. --- tests/unit/hyperloglog.tcl | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl index 7d36b7a35..6a9c47b11 100644 --- a/tests/unit/hyperloglog.tcl +++ b/tests/unit/hyperloglog.tcl @@ -115,6 +115,35 @@ start_server {tags {"hll"}} { set e } {*WRONGTYPE*} + test {Fuzzing dense/sparse encoding: Redis should always detect errors} { + for {set j 0} {$j < 10000} {incr j} { + r del hll + set items {} + set numitems [randomInt 3000] + for {set i 0} {$i < $numitems} {incr i} { + lappend items [expr {rand()}] + } + r pfadd hll {*}$items + + # Corrupt it in some random way. + for {set i 0} {$i < 5} {incr i} { + set len [r strlen hll] + set pos [randomInt $len] + set byte [randstring 1 1 binary] + r setrange hll $pos $byte + # Don't modify more bytes 50% of times + if {rand() < 0.5} break + } + + # Use the hyperloglog to check if it crashes + # Redis in some way. + catch { + r pfcount hll + r pfdebug getreg hll + } + } + } + test {PFADD, PFCOUNT, PFMERGE type checking works} { r set foo bar catch {r pfadd foo 1} e From a4b90be9fcd5e1668ac941cabce3b1ab38dbe326 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Mar 2019 17:10:16 +0100 Subject: [PATCH 03/10] HyperLogLog: enlarge reghisto variable for safety. --- src/hyperloglog.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index e993bf26e..526510b43 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1017,7 +1017,12 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { double m = HLL_REGISTERS; double E; int j; - int reghisto[HLL_Q+2] = {0}; + /* Note that reghisto could be just HLL_Q+1, becuase this is the + * maximum frequency of the "000...1" sequence the hash function is + * able to return. However it is slow to check for sanity of the + * input: instead we history array at a safe size: overflows will + * just write data to wrong, but correctly allocated, places. */ + int reghisto[64] = {0}; /* Compute register histogram */ if (hdr->encoding == HLL_DENSE) { From dca7358279bb6449f93e01f7d2806639b8e9ec4b Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Mar 2019 17:13:19 +0100 Subject: [PATCH 04/10] HyperLogLog: speedup fuzz test. --- tests/unit/hyperloglog.tcl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl index 6a9c47b11..712fcc641 100644 --- a/tests/unit/hyperloglog.tcl +++ b/tests/unit/hyperloglog.tcl @@ -116,7 +116,7 @@ start_server {tags {"hll"}} { } {*WRONGTYPE*} test {Fuzzing dense/sparse encoding: Redis should always detect errors} { - for {set j 0} {$j < 10000} {incr j} { + for {set j 0} {$j < 1000} {incr j} { r del hll set items {} set numitems [randomInt 3000] @@ -139,7 +139,6 @@ start_server {tags {"hll"}} { # Redis in some way. catch { r pfcount hll - r pfdebug getreg hll } } } From e216ceaf0e099536fe3658a29dcb725d812364e0 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Mar 2019 17:16:06 +0100 Subject: [PATCH 05/10] HyperLogLog: handle wrong offset in the base case. --- src/hyperloglog.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 526510b43..1e7ce3dce 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -614,10 +614,7 @@ int hllSparseToDense(robj *o) { } else { runlen = HLL_SPARSE_VAL_LEN(p); regval = HLL_SPARSE_VAL_VALUE(p); - if ((runlen + idx) > HLL_REGISTERS) { - sdsfree(dense); - return C_ERR; - } + if ((runlen + idx) > HLL_REGISTERS) break; /* Overflow. */ while(runlen--) { HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval); idx++; @@ -1097,8 +1094,7 @@ int hllMerge(uint8_t *max, robj *hll) { } else { runlen = HLL_SPARSE_VAL_LEN(p); regval = HLL_SPARSE_VAL_VALUE(p); - if ((runlen + i) > HLL_REGISTERS) - return C_ERR; + if ((runlen + i) > HLL_REGISTERS) break; /* Overflow. */ while(runlen--) { if (regval > max[i]) max[i] = regval; i++; From 8ea906a3e8f3e125baa9cf54f6027921d3822b02 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 16 Mar 2019 09:15:12 +0100 Subject: [PATCH 06/10] HyperLogLog: fix comment in hllCount(). --- src/hyperloglog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 1e7ce3dce..e01ea6042 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1014,8 +1014,8 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { double m = HLL_REGISTERS; double E; int j; - /* Note that reghisto could be just HLL_Q+1, becuase this is the - * maximum frequency of the "000...1" sequence the hash function is + /* Note that reghisto size could be just HLL_Q+2, becuase HLL_Q+1 is + * the maximum frequency of the "000...1" sequence the hash function is * able to return. However it is slow to check for sanity of the * input: instead we history array at a safe size: overflows will * just write data to wrong, but correctly allocated, places. */ From b78ac354f41e370a4dc21ac01981cb0ccd0a1b7d Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 11:15:39 +0100 Subject: [PATCH 07/10] redis-check-aof: fix potential overflow. Bug signaled by @vattezhang in PR #5940 but fixed differently. --- src/redis-check-aof.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index c4d5a225e..54ed85f0d 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -33,8 +33,8 @@ #define ERROR(...) { \ char __buf[1024]; \ - sprintf(__buf, __VA_ARGS__); \ - sprintf(error, "0x%16llx: %s", (long long)epos, __buf); \ + snprintf(__buf, sizeof(__buf), __VA_ARGS__); \ + snprintf(error, sizeof(error), "0x%16llx: %s", (long long)epos, __buf); \ } static char error[1024]; From 14b17c3615108fdbca5e7fe4d2c3f0e8b7454521 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 11:34:40 +0100 Subject: [PATCH 08/10] replicaofCommand() refactoring: stay into 80 cols. --- src/replication.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index 3c30999af..f2adc7995 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2053,8 +2053,11 @@ void replicaofCommand(client *c) { /* Check if we are already attached to the specified slave */ if (server.masterhost && !strcasecmp(server.masterhost,c->argv[1]->ptr) && server.masterport == port) { - serverLog(LL_NOTICE,"REPLICAOF would result into synchronization with the master we are already connected with. No operation performed."); - addReplySds(c,sdsnew("+OK Already connected to specified master\r\n")); + serverLog(LL_NOTICE,"REPLICAOF would result into synchronization " + "with the master we are already connected " + "with. No operation performed."); + addReplySds(c,sdsnew("+OK Already connected to specified " + "master\r\n")); return; } /* There was no previous master or the user specified a different one, From a5af648fdddaf93e89735a8577b56f12379d1dd2 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 15:38:43 +0100 Subject: [PATCH 09/10] MANIFESTO v2. --- MANIFESTO | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/MANIFESTO b/MANIFESTO index 2b719057e..d43a58893 100644 --- a/MANIFESTO +++ b/MANIFESTO @@ -34,7 +34,21 @@ Redis Manifesto so that the complexity is obvious and more complex operations can be performed as the sum of the basic operations. -4 - Code is like a poem; it's not just something we write to reach some +4 - We believe in code efficiency. Computers get faster and faster, yet we + believe that abusing computing capabilities is not wise: the amount of + operations you can do for a given amount of energy remains anyway a + significant parameter: it allows to do more with less computers and, at + the same time, having a smaller environmental impact. Similarly Redis is + able to "scale down" to smaller devices. It is perfectly usable in a + Raspberry Pi and other small ARM based computers. Faster code having + just the layers of abstractions that are really needed will also result, + often, in more predictable performances. We think likewise about memory + usage, one of the fundamental goals of the Redis project is to + incrementally build more and more memory efficient data structures, so that + problems that were not approachable in RAM in the past will be perfectly + fine to handle in the future. + +5 - Code is like a poem; it's not just something we write to reach some practical result. Sometimes people that are far from the Redis philosophy suggest using other code written by other authors (frequently in other languages) in order to implement something Redis currently lacks. But to us @@ -45,23 +59,44 @@ Redis Manifesto when needed. At the same time, when writing the Redis story we're trying to write smaller stories that will fit in to other code. -5 - We're against complexity. We believe designing systems is a fight against +6 - We're against complexity. We believe designing systems is a fight against complexity. We'll accept to fight the complexity when it's worthwhile but we'll try hard to recognize when a small feature is not worth 1000s of lines of code. Most of the time the best way to fight complexity is by not creating it at all. -6 - Two levels of API. The Redis API has two levels: 1) a subset of the API fits +7 - Threading is not a silver bullet. Instead of making Redis threaded we + believe on the idea of an efficient (mostly) single threaded Redis core. + Multiple of such cores, that may run in the same computer or may run + in multiple computers, are abstracted away as a single big system by + higher order protocols and features: Redis Cluster and the upcoming + Redis Proxy are our main goals. A shared nothing approach is not just + much simpler (see the previous point in this document), is also optimal + in NUMA systems. In the specific case of Redis it allows for each instance + to have a more limited amount of data, making the Redis persist-by-fork + approach more sounding. In the future we may explore parallelism only for + I/O, which is the low hanging fruit: minimal complexity could provide an + improved single process experience. + +8 - Two levels of API. The Redis API has two levels: 1) a subset of the API fits naturally into a distributed version of Redis and 2) a more complex API that supports multi-key operations. Both are useful if used judiciously but there's no way to make the more complex multi-keys API distributed in an opaque way without violating our other principles. We don't want to provide the illusion of something that will work magically when actually it can't in all cases. Instead we'll provide commands to quickly migrate keys from one - instance to another to perform multi-key operations and expose the tradeoffs - to the user. + instance to another to perform multi-key operations and expose the + trade-offs to the user. -7 - We optimize for joy. We believe writing code is a lot of hard work, and the +9 - We optimize for joy. We believe writing code is a lot of hard work, and the only way it can be worth is by enjoying it. When there is no longer joy in writing code, the best thing to do is stop. To prevent this, we'll avoid taking paths that will make Redis less of a joy to develop. + +10 - All the above points are put together in what we call opportunistic + programming: trying to get the most for the user with minimal increases + in complexity (hanging fruits). Solve 95% of the problem with 5% of the + code when it is acceptable. Avoid a fixed schedule but follow the flow of + user requests, inspiration, Redis internal readiness for certain features + (sometimes many past changes reach a critical point making a previously + complex feature very easy to obtain). From 3eaa2cdc44a9b0742f0695f44911b92547995836 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 15:49:52 +0100 Subject: [PATCH 10/10] MANIFESTO: simplicity and lock-in. --- MANIFESTO | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/MANIFESTO b/MANIFESTO index d43a58893..372789462 100644 --- a/MANIFESTO +++ b/MANIFESTO @@ -63,7 +63,11 @@ Redis Manifesto complexity. We'll accept to fight the complexity when it's worthwhile but we'll try hard to recognize when a small feature is not worth 1000s of lines of code. Most of the time the best way to fight complexity is by not - creating it at all. + creating it at all. Complexity is also a form of lock-in: code that is + very hard to understand cannot be modified by users in an independent way + regardless of the license. One of the main Redis goals is to remain + understandable, enough for a single programmer to have a clear idea of how + it works in detail just reading the source code for a couple of weeks. 7 - Threading is not a silver bullet. Instead of making Redis threaded we believe on the idea of an efficient (mostly) single threaded Redis core.