From 7f6aaa6f650b5ab51f02111e07b3c68c06b9856b Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 14 Mar 2019 14:02:16 -0400 Subject: [PATCH 01/10] Fix hyperloglog corruption --- src/hyperloglog.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index fc21ea006..e993bf26e 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -614,6 +614,10 @@ int hllSparseToDense(robj *o) { } else { runlen = HLL_SPARSE_VAL_LEN(p); regval = HLL_SPARSE_VAL_VALUE(p); + if ((runlen + idx) > HLL_REGISTERS) { + sdsfree(dense); + return C_ERR; + } while(runlen--) { HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval); idx++; @@ -1088,6 +1092,8 @@ int hllMerge(uint8_t *max, robj *hll) { } else { runlen = HLL_SPARSE_VAL_LEN(p); regval = HLL_SPARSE_VAL_VALUE(p); + if ((runlen + i) > HLL_REGISTERS) + return C_ERR; while(runlen--) { if (regval > max[i]) max[i] = regval; i++; From a47cd50a0d7478d495612f4c4d166ba564381f04 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Mar 2019 13:52:29 +0100 Subject: [PATCH 02/10] HyperLogLog: dense/sparse repr parsing fuzz test. --- tests/unit/hyperloglog.tcl | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl index 7d36b7a35..6a9c47b11 100644 --- a/tests/unit/hyperloglog.tcl +++ b/tests/unit/hyperloglog.tcl @@ -115,6 +115,35 @@ start_server {tags {"hll"}} { set e } {*WRONGTYPE*} + test {Fuzzing dense/sparse encoding: Redis should always detect errors} { + for {set j 0} {$j < 10000} {incr j} { + r del hll + set items {} + set numitems [randomInt 3000] + for {set i 0} {$i < $numitems} {incr i} { + lappend items [expr {rand()}] + } + r pfadd hll {*}$items + + # Corrupt it in some random way. + for {set i 0} {$i < 5} {incr i} { + set len [r strlen hll] + set pos [randomInt $len] + set byte [randstring 1 1 binary] + r setrange hll $pos $byte + # Don't modify more bytes 50% of times + if {rand() < 0.5} break + } + + # Use the hyperloglog to check if it crashes + # Redis in some way. + catch { + r pfcount hll + r pfdebug getreg hll + } + } + } + test {PFADD, PFCOUNT, PFMERGE type checking works} { r set foo bar catch {r pfadd foo 1} e From 3765dfdcf76fa37143457580b31c176bc3100dc3 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Mar 2019 17:10:16 +0100 Subject: [PATCH 03/10] HyperLogLog: enlarge reghisto variable for safety. --- src/hyperloglog.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index e993bf26e..526510b43 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1017,7 +1017,12 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { double m = HLL_REGISTERS; double E; int j; - int reghisto[HLL_Q+2] = {0}; + /* Note that reghisto could be just HLL_Q+1, becuase this is the + * maximum frequency of the "000...1" sequence the hash function is + * able to return. However it is slow to check for sanity of the + * input: instead we history array at a safe size: overflows will + * just write data to wrong, but correctly allocated, places. */ + int reghisto[64] = {0}; /* Compute register histogram */ if (hdr->encoding == HLL_DENSE) { From ca291b0951ff4dd4db4c08e3fe7bea7311d94e47 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Mar 2019 17:13:19 +0100 Subject: [PATCH 04/10] HyperLogLog: speedup fuzz test. --- tests/unit/hyperloglog.tcl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl index 6a9c47b11..712fcc641 100644 --- a/tests/unit/hyperloglog.tcl +++ b/tests/unit/hyperloglog.tcl @@ -116,7 +116,7 @@ start_server {tags {"hll"}} { } {*WRONGTYPE*} test {Fuzzing dense/sparse encoding: Redis should always detect errors} { - for {set j 0} {$j < 10000} {incr j} { + for {set j 0} {$j < 1000} {incr j} { r del hll set items {} set numitems [randomInt 3000] @@ -139,7 +139,6 @@ start_server {tags {"hll"}} { # Redis in some way. catch { r pfcount hll - r pfdebug getreg hll } } } From 961aa74ff1710b48d0f55da175e0f7665ccb197e Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 15 Mar 2019 17:16:06 +0100 Subject: [PATCH 05/10] HyperLogLog: handle wrong offset in the base case. --- src/hyperloglog.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 526510b43..1e7ce3dce 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -614,10 +614,7 @@ int hllSparseToDense(robj *o) { } else { runlen = HLL_SPARSE_VAL_LEN(p); regval = HLL_SPARSE_VAL_VALUE(p); - if ((runlen + idx) > HLL_REGISTERS) { - sdsfree(dense); - return C_ERR; - } + if ((runlen + idx) > HLL_REGISTERS) break; /* Overflow. */ while(runlen--) { HLL_DENSE_SET_REGISTER(hdr->registers,idx,regval); idx++; @@ -1097,8 +1094,7 @@ int hllMerge(uint8_t *max, robj *hll) { } else { runlen = HLL_SPARSE_VAL_LEN(p); regval = HLL_SPARSE_VAL_VALUE(p); - if ((runlen + i) > HLL_REGISTERS) - return C_ERR; + if ((runlen + i) > HLL_REGISTERS) break; /* Overflow. */ while(runlen--) { if (regval > max[i]) max[i] = regval; i++; From 218091d2e4eb8704d2caccc8f30d12dee9435e2f Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 16 Mar 2019 09:15:12 +0100 Subject: [PATCH 06/10] HyperLogLog: fix comment in hllCount(). --- src/hyperloglog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 1e7ce3dce..e01ea6042 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1014,8 +1014,8 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { double m = HLL_REGISTERS; double E; int j; - /* Note that reghisto could be just HLL_Q+1, becuase this is the - * maximum frequency of the "000...1" sequence the hash function is + /* Note that reghisto size could be just HLL_Q+2, becuase HLL_Q+1 is + * the maximum frequency of the "000...1" sequence the hash function is * able to return. However it is slow to check for sanity of the * input: instead we history array at a safe size: overflows will * just write data to wrong, but correctly allocated, places. */ From 71d9f4e59606270fb8862803ded3f0bfdd7bb03b Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 11:15:39 +0100 Subject: [PATCH 07/10] redis-check-aof: fix potential overflow. Bug signaled by @vattezhang in PR #5940 but fixed differently. --- src/redis-check-aof.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index c4d5a225e..54ed85f0d 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -33,8 +33,8 @@ #define ERROR(...) { \ char __buf[1024]; \ - sprintf(__buf, __VA_ARGS__); \ - sprintf(error, "0x%16llx: %s", (long long)epos, __buf); \ + snprintf(__buf, sizeof(__buf), __VA_ARGS__); \ + snprintf(error, sizeof(error), "0x%16llx: %s", (long long)epos, __buf); \ } static char error[1024]; From 340d03b64bf59a6c0a79a86a51e823397d5a9b57 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 11:34:40 +0100 Subject: [PATCH 08/10] replicaofCommand() refactoring: stay into 80 cols. --- src/replication.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index 3c30999af..f2adc7995 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2053,8 +2053,11 @@ void replicaofCommand(client *c) { /* Check if we are already attached to the specified slave */ if (server.masterhost && !strcasecmp(server.masterhost,c->argv[1]->ptr) && server.masterport == port) { - serverLog(LL_NOTICE,"REPLICAOF would result into synchronization with the master we are already connected with. No operation performed."); - addReplySds(c,sdsnew("+OK Already connected to specified master\r\n")); + serverLog(LL_NOTICE,"REPLICAOF would result into synchronization " + "with the master we are already connected " + "with. No operation performed."); + addReplySds(c,sdsnew("+OK Already connected to specified " + "master\r\n")); return; } /* There was no previous master or the user specified a different one, From a3b442eb5daece99f5ac52a167ab455d302eda43 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 15:38:43 +0100 Subject: [PATCH 09/10] MANIFESTO v2. --- MANIFESTO | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/MANIFESTO b/MANIFESTO index 2b719057e..d43a58893 100644 --- a/MANIFESTO +++ b/MANIFESTO @@ -34,7 +34,21 @@ Redis Manifesto so that the complexity is obvious and more complex operations can be performed as the sum of the basic operations. -4 - Code is like a poem; it's not just something we write to reach some +4 - We believe in code efficiency. Computers get faster and faster, yet we + believe that abusing computing capabilities is not wise: the amount of + operations you can do for a given amount of energy remains anyway a + significant parameter: it allows to do more with less computers and, at + the same time, having a smaller environmental impact. Similarly Redis is + able to "scale down" to smaller devices. It is perfectly usable in a + Raspberry Pi and other small ARM based computers. Faster code having + just the layers of abstractions that are really needed will also result, + often, in more predictable performances. We think likewise about memory + usage, one of the fundamental goals of the Redis project is to + incrementally build more and more memory efficient data structures, so that + problems that were not approachable in RAM in the past will be perfectly + fine to handle in the future. + +5 - Code is like a poem; it's not just something we write to reach some practical result. Sometimes people that are far from the Redis philosophy suggest using other code written by other authors (frequently in other languages) in order to implement something Redis currently lacks. But to us @@ -45,23 +59,44 @@ Redis Manifesto when needed. At the same time, when writing the Redis story we're trying to write smaller stories that will fit in to other code. -5 - We're against complexity. We believe designing systems is a fight against +6 - We're against complexity. We believe designing systems is a fight against complexity. We'll accept to fight the complexity when it's worthwhile but we'll try hard to recognize when a small feature is not worth 1000s of lines of code. Most of the time the best way to fight complexity is by not creating it at all. -6 - Two levels of API. The Redis API has two levels: 1) a subset of the API fits +7 - Threading is not a silver bullet. Instead of making Redis threaded we + believe on the idea of an efficient (mostly) single threaded Redis core. + Multiple of such cores, that may run in the same computer or may run + in multiple computers, are abstracted away as a single big system by + higher order protocols and features: Redis Cluster and the upcoming + Redis Proxy are our main goals. A shared nothing approach is not just + much simpler (see the previous point in this document), is also optimal + in NUMA systems. In the specific case of Redis it allows for each instance + to have a more limited amount of data, making the Redis persist-by-fork + approach more sounding. In the future we may explore parallelism only for + I/O, which is the low hanging fruit: minimal complexity could provide an + improved single process experience. + +8 - Two levels of API. The Redis API has two levels: 1) a subset of the API fits naturally into a distributed version of Redis and 2) a more complex API that supports multi-key operations. Both are useful if used judiciously but there's no way to make the more complex multi-keys API distributed in an opaque way without violating our other principles. We don't want to provide the illusion of something that will work magically when actually it can't in all cases. Instead we'll provide commands to quickly migrate keys from one - instance to another to perform multi-key operations and expose the tradeoffs - to the user. + instance to another to perform multi-key operations and expose the + trade-offs to the user. -7 - We optimize for joy. We believe writing code is a lot of hard work, and the +9 - We optimize for joy. We believe writing code is a lot of hard work, and the only way it can be worth is by enjoying it. When there is no longer joy in writing code, the best thing to do is stop. To prevent this, we'll avoid taking paths that will make Redis less of a joy to develop. + +10 - All the above points are put together in what we call opportunistic + programming: trying to get the most for the user with minimal increases + in complexity (hanging fruits). Solve 95% of the problem with 5% of the + code when it is acceptable. Avoid a fixed schedule but follow the flow of + user requests, inspiration, Redis internal readiness for certain features + (sometimes many past changes reach a critical point making a previously + complex feature very easy to obtain). From a40a075ada7246b858bda75c8dca4a05f0bcf8b8 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 15:49:52 +0100 Subject: [PATCH 10/10] MANIFESTO: simplicity and lock-in. --- MANIFESTO | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/MANIFESTO b/MANIFESTO index d43a58893..372789462 100644 --- a/MANIFESTO +++ b/MANIFESTO @@ -63,7 +63,11 @@ Redis Manifesto complexity. We'll accept to fight the complexity when it's worthwhile but we'll try hard to recognize when a small feature is not worth 1000s of lines of code. Most of the time the best way to fight complexity is by not - creating it at all. + creating it at all. Complexity is also a form of lock-in: code that is + very hard to understand cannot be modified by users in an independent way + regardless of the license. One of the main Redis goals is to remain + understandable, enough for a single programmer to have a clear idea of how + it works in detail just reading the source code for a couple of weeks. 7 - Threading is not a silver bullet. Instead of making Redis threaded we believe on the idea of an efficient (mostly) single threaded Redis core.