From 3d478f2e3f74792a2cf232440a63e5f86b7a5f85 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 16 May 2020 18:03:28 +0200 Subject: [PATCH 01/95] Improve the PSYNC2 test reliability. --- tests/integration/psync2.tcl | 48 +++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 15 deletions(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 5fe29caba..d6b2d19d0 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -12,7 +12,7 @@ start_server {} { set no_exit 0 ; # Do not exit at end of the test - set duration 20 ; # Total test seconds + set duration 40 ; # Total test seconds set genload 1 ; # Load master with writes at every cycle @@ -125,14 +125,16 @@ start_server {} { } } - # wait for all the slaves to be in sync with the master, due to pings, we have to re-sample the master constantly too + # wait for all the slaves to be in sync. + set masteroff [status $R($master_id) master_repl_offset] wait_for_condition 500 100 { - [status $R($master_id) master_repl_offset] == [status $R(0) master_repl_offset] && - [status $R($master_id) master_repl_offset] == [status $R(1) master_repl_offset] && - [status $R($master_id) master_repl_offset] == [status $R(2) master_repl_offset] && - [status $R($master_id) master_repl_offset] == [status $R(3) master_repl_offset] && - [status $R($master_id) master_repl_offset] == [status $R(4) master_repl_offset] + [status $R(0) master_repl_offset] >= $masteroff && + [status $R(1) master_repl_offset] >= $masteroff && + [status $R(2) master_repl_offset] >= $masteroff && + [status $R(3) master_repl_offset] >= $masteroff && + [status $R(4) master_repl_offset] >= $masteroff } else { + puts "Master ID is $master_id" for {set j 0} {$j < 5} {incr j} { puts "$j: sync_full: [status $R($j) sync_full]" puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" @@ -140,17 +142,11 @@ start_server {} { puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" puts "---" } - fail "Slaves are not in sync with the master after too long time." + fail "Replicas offsets didn't catch up with the master after too long time." } - # Put down the old master so that it cannot generate more - # replication stream, this way in the next master switch, the time at - # which we move slaves away is not important, each will have full - # history (otherwise PINGs will make certain slaves have more history), - # and sometimes a full resync will be needed. - $R($master_id) slaveof 127.0.0.1 0 ;# We use port zero to make it fail. - if {$debug_msg} { + puts "Master ID is $master_id" for {set j 0} {$j < 5} {incr j} { puts "$j: sync_full: [status $R($j) sync_full]" puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" @@ -168,6 +164,28 @@ start_server {} { assert {$sum == 4} } + # In absence of pings, are the instances really able to have + # the exact same offset? + $R($master_id) config set repl-ping-replica-period 3600 + wait_for_condition 500 100 { + [status $R($master_id) master_repl_offset] == [status $R(0) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(1) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(2) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(3) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(4) master_repl_offset] + } else { + puts "Master ID is $master_id" + for {set j 0} {$j < 5} {incr j} { + puts "$j: sync_full: [status $R($j) sync_full]" + puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" + puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" + puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" + puts "---" + } + fail "Replicas and master offsets were unable to match *exactly*." + } + $R($master_id) config set repl-ping-replica-period 10 + # Limit anyway the maximum number of cycles. This is useful when the # test is skipped via --only option of the test suite. In that case # we don't want to see many seconds of this test being just skipped. From 5e75739bfd7915b0379e3cdf88208e6008e7846f Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 17 May 2020 18:26:02 +0300 Subject: [PATCH 02/95] add regression test for the race in #7205 with the original version of 6.0.0, this test detects an excessive full sync. with the fix in 146201c69, this test detects memory corruption, especially when using libc allocator with or without valgrind. --- tests/integration/replication.tcl | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 22b0bfeaf..e5079d9dd 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -637,3 +637,55 @@ start_server {tags {"repl"}} { } } } + +test {replicaof right after disconnection} { + # this is a rare race condition that was reproduced sporadically by the psync2 unit. + # see details in #7205 + start_server {tags {"repl"}} { + set replica1 [srv 0 client] + set replica1_host [srv 0 host] + set replica1_port [srv 0 port] + set replica1_log [srv 0 stdout] + start_server {} { + set replica2 [srv 0 client] + set replica2_host [srv 0 host] + set replica2_port [srv 0 port] + set replica2_log [srv 0 stdout] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + $replica1 replicaof $master_host $master_port + $replica2 replicaof $master_host $master_port + + wait_for_condition 50 100 { + [string match {*master_link_status:up*} [$replica1 info replication]] && + [string match {*master_link_status:up*} [$replica2 info replication]] + } else { + fail "Can't turn the instance into a replica" + } + + set rd [redis_deferring_client -1] + $rd debug sleep 1 + after 100 + + # when replica2 will wake up from the sleep it will find both disconnection + # from it's master and also a replicaof command at the same event loop + $master client kill type replica + $replica2 replicaof $replica1_host $replica1_port + $rd read + + wait_for_condition 50 100 { + [string match {*master_link_status:up*} [$replica2 info replication]] + } else { + fail "role change failed." + } + + # make sure psync succeeded, and there were no unexpected full syncs. + assert_equal [status $master sync_full] 2 + assert_equal [status $replica1 sync_full] 0 + assert_equal [status $replica2 sync_full] 0 + } + } + } +} From 7d8259d151406daf0f4f4fee9d961d40582214c5 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Mon, 18 May 2020 10:01:30 +0300 Subject: [PATCH 03/95] fix valgrind test failure in replication test in 00323f342 i added more keys to that test to make it run longer but in valgrind this now means the test times out, give valgrind more time. --- tests/integration/replication.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index e5079d9dd..7c03c4bc6 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -612,7 +612,7 @@ start_server {tags {"repl"}} { # Wait that replicas acknowledge they are online so # we are sure that DBSIZE and DEBUG DIGEST will not # fail because of timing issues. - wait_for_condition 50 100 { + wait_for_condition 150 100 { [lindex [$replica role] 3] eq {connected} } else { fail "replicas still not connected after some time" From 902e82efd21d6812a00a7d785d66e497b6c89d40 Mon Sep 17 00:00:00 2001 From: Benjamin Sergeant Date: Wed, 13 May 2020 09:24:51 -0700 Subject: [PATCH 04/95] Redis-cli 6.0.1 `--cluster-yes` doesn't work (fix #7246) This make it so that all prompts for all redis-cli --cluster commands are automatically answered with a yes. --- src/redis-cli.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 880f3d70a..c8d9580c7 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1798,6 +1798,10 @@ static void usage(void) { } static int confirmWithYes(char *msg) { + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_YES) { + return 1; + } + printf("%s (type 'yes' to accept): ", msg); fflush(stdout); char buf[4]; @@ -8071,7 +8075,7 @@ int main(int argc, char **argv) { if (cliConnect(0) == REDIS_ERR) exit(1); if (config.interval == 0) config.interval = 1000000; statMode(); - } +( } /* Scan mode */ if (config.scan_mode) { From dc2f7e0ad75c20fa697b8ad180db5ab48804eb36 Mon Sep 17 00:00:00 2001 From: Benjamin Sergeant Date: Wed, 13 May 2020 09:32:27 -0700 Subject: [PATCH 05/95] fix typo ... --- src/redis-cli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c8d9580c7..4b0f4d2d5 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -8075,7 +8075,7 @@ int main(int argc, char **argv) { if (cliConnect(0) == REDIS_ERR) exit(1); if (config.interval == 0) config.interval = 1000000; statMode(); -( } + } /* Scan mode */ if (config.scan_mode) { From 5d88c23555cfa962a9851085a3f8994bf27cd97b Mon Sep 17 00:00:00 2001 From: Benjamin Sergeant Date: Thu, 14 May 2020 15:29:06 -0700 Subject: [PATCH 06/95] do not handle --cluster-yes for cluster fix mode --- src/redis-cli.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4b0f4d2d5..96eb3c3dd 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1797,11 +1797,14 @@ static void usage(void) { exit(1); } -static int confirmWithYes(char *msg) { - if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_YES) { +static int confirmWithYes(char *msg, int force) { + /* if force is true and --cluster-yes option is on, + * do not prompt for an answer */ + if (force && + (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_YES)) { return 1; } - + printf("%s (type 'yes' to accept): ", msg); fflush(stdout); char buf[4]; @@ -4476,12 +4479,16 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { } dictReleaseIterator(iter); + /* we want explicit manual confirmation from users for all the fix cases */ + int force = 0; + /* Handle case "1": keys in no node. */ if (listLength(none) > 0) { printf("The following uncovered slots have no keys " "across the cluster:\n"); clusterManagerPrintSlotsList(none); - if (confirmWithYes("Fix these slots by covering with a random node?")){ + if (confirmWithYes("Fix these slots by covering with a random node?", + force)) { listIter li; listNode *ln; listRewind(none, &li); @@ -4507,7 +4514,8 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { if (listLength(single) > 0) { printf("The following uncovered slots have keys in just one node:\n"); clusterManagerPrintSlotsList(single); - if (confirmWithYes("Fix these slots by covering with those nodes?")){ + if (confirmWithYes("Fix these slots by covering with those nodes?", + force)) { listIter li; listNode *ln; listRewind(single, &li); @@ -4539,7 +4547,7 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { printf("The following uncovered slots have keys in multiple nodes:\n"); clusterManagerPrintSlotsList(multi); if (confirmWithYes("Fix these slots by moving keys " - "into a single node?")) { + "into a single node?", force)) { listIter li; listNode *ln; listRewind(multi, &li); @@ -5502,7 +5510,8 @@ assign_replicas: } clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); - if (confirmWithYes("Can I set the above configuration?")) { + int force = 1; + if (confirmWithYes("Can I set the above configuration?", force)) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; From 1d84b89fb6d80e1ce12719c9e2761a68851169c4 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Thu, 14 May 2020 11:07:51 -0700 Subject: [PATCH 07/95] Converge hash validation for adding and removing --- src/acl.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/acl.c b/src/acl.c index 3194feb5b..bcca116bb 100644 --- a/src/acl.c +++ b/src/acl.c @@ -166,6 +166,25 @@ sds ACLHashPassword(unsigned char *cleartext, size_t len) { return sdsnewlen(hex,HASH_PASSWORD_LEN); } +/* Given a hash and the hash length, returns C_OK if it is a valid password + * hash, or C_ERR otherwise. */ +int ACLCheckPasswordHash(unsigned char *hash, int hashlen) { + if (hashlen != HASH_PASSWORD_LEN) { + return C_ERR; + } + + /* Password hashes can only be characters that represent + * hexadecimal values, which are numbers and lowercase + * characters 'a' through 'f'. */ + for(int i = 0; i < HASH_PASSWORD_LEN; i++) { + char c = hash[i]; + if ((c < 'a' || c > 'f') && (c < '0' || c > '9')) { + return C_ERR; + } + } + return C_OK; +} + /* ============================================================================= * Low level ACL API * ==========================================================================*/ @@ -753,22 +772,10 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) { if (op[0] == '>') { newpass = ACLHashPassword((unsigned char*)op+1,oplen-1); } else { - if (oplen != HASH_PASSWORD_LEN + 1) { + if (ACLCheckPasswordHash((unsigned char*)op+1,oplen-1) == C_ERR) { errno = EBADMSG; return C_ERR; } - - /* Password hashes can only be characters that represent - * hexadecimal values, which are numbers and lowercase - * characters 'a' through 'f'. - */ - for(int i = 1; i < HASH_PASSWORD_LEN + 1; i++) { - char c = op[i]; - if ((c < 'a' || c > 'f') && (c < '0' || c > '9')) { - errno = EBADMSG; - return C_ERR; - } - } newpass = sdsnewlen(op+1,oplen-1); } @@ -784,7 +791,7 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) { if (op[0] == '<') { delpass = ACLHashPassword((unsigned char*)op+1,oplen-1); } else { - if (oplen != HASH_PASSWORD_LEN + 1) { + if (ACLCheckPasswordHash((unsigned char*)op+1,oplen-1) == C_ERR) { errno = EBADMSG; return C_ERR; } From e113af744819ce973fd9805d72305a81f07f8023 Mon Sep 17 00:00:00 2001 From: ShooterIT Date: Thu, 7 May 2020 11:04:08 +0800 Subject: [PATCH 08/95] Use dictSize to get the size of dict in dict.c --- src/dict.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dict.c b/src/dict.c index 13c185253..45aab66f9 100644 --- a/src/dict.c +++ b/src/dict.c @@ -478,7 +478,7 @@ dictEntry *dictFind(dict *d, const void *key) dictEntry *he; uint64_t h, idx, table; - if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */ + if (dictSize(d) == 0) return NULL; /* dict is empty */ if (dictIsRehashing(d)) _dictRehashStep(d); h = dictHashKey(d, key); for (table = 0; table <= 1; table++) { @@ -1044,7 +1044,7 @@ dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t h dictEntry *he, **heref; unsigned long idx, table; - if (d->ht[0].used + d->ht[1].used == 0) return NULL; /* dict is empty */ + if (dictSize(d) == 0) return NULL; /* dict is empty */ for (table = 0; table <= 1; table++) { idx = hash & d->ht[table].sizemask; heref = &d->ht[table].table[idx]; From 71036d4cdbab9fc47aa33cce5bbfad5f5b53e395 Mon Sep 17 00:00:00 2001 From: WuYunlong Date: Mon, 11 May 2020 13:13:51 +0800 Subject: [PATCH 09/95] Add a test to prove current tcl cluster client can not handle keys with hash tag. --- tests/cluster/tests/15-cluster-slots.tcl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/cluster/tests/15-cluster-slots.tcl b/tests/cluster/tests/15-cluster-slots.tcl index dc9938ef6..1b33c57bd 100644 --- a/tests/cluster/tests/15-cluster-slots.tcl +++ b/tests/cluster/tests/15-cluster-slots.tcl @@ -41,4 +41,10 @@ test "client do not break when cluster slot" { if { [catch {R 0 cluster slots}] } { fail "output overflow when cluster slots" } -} \ No newline at end of file +} + +test "client can handle keys with hash tag" { + set cluster [redis_cluster 127.0.0.1:[get_instance_attrib redis 0 port]] + $cluster set foo{tag} bar + $cluster close +} From 65de5a1a7d185201b3932e6afeecc22bcafb732d Mon Sep 17 00:00:00 2001 From: WuYunlong Date: Mon, 11 May 2020 13:07:05 +0800 Subject: [PATCH 10/95] Handle keys with hash tag when computing hash slot using tcl cluster client. --- tests/support/cluster.tcl | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tests/support/cluster.tcl b/tests/support/cluster.tcl index 74587e1f7..64b079ff8 100644 --- a/tests/support/cluster.tcl +++ b/tests/support/cluster.tcl @@ -286,8 +286,29 @@ proc ::redis_cluster::crc16 {s} { # Hash a single key returning the slot it belongs to, Implemented hash # tags as described in the Redis Cluster specification. proc ::redis_cluster::hash {key} { - # TODO: Handle hash slots. - expr {[::redis_cluster::crc16 $key] & 16383} + set keylen [string length $key] + set s {} + set e {} + for {set s 0} {$s < $keylen} {incr s} { + if {[string index $key $s] eq "\{"} break + } + + if {[expr {$s == $keylen}]} { + set res [expr {[crc16 $key] & 16383}] + return $res + } + + for {set e [expr {$s+1}]} {$e < $keylen} {incr e} { + if {[string index $key $e] == "\}"} break + } + + if {$e == $keylen || $e == [expr {$s+1}]} { + set res [expr {[crc16 $key] & 16383}] + return $res + } + + set key_sub [string range $key [expr {$s+1}] [expr {$e-1}]] + return [expr {[crc16 $key_sub] & 16383}] } # Return the slot the specified keys hash to. From 3ffb3ac7eada295f0ab5cdfebb95f9e9d1ee439e Mon Sep 17 00:00:00 2001 From: hwware Date: Mon, 18 May 2020 22:10:57 -0400 Subject: [PATCH 11/95] Redis-Benchmark: avoid potentical memmory leaking --- src/redis-benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 34295147d..46c1853f7 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -303,7 +303,7 @@ fail: else fprintf(stderr, "%s\n", hostsocket); freeReplyObject(reply); redisFree(c); - zfree(cfg); + freeRedisConfig(cfg); return NULL; } static void freeRedisConfig(redisConfig *cfg) { From 2478a83ce73e79cb6276232489bd8ea37ad76363 Mon Sep 17 00:00:00 2001 From: ShooterIT Date: Mon, 18 May 2020 18:18:20 +0800 Subject: [PATCH 12/95] Redis Benchmark: generate random test data The function of generating random data is designed by antirez. See #7196. --- src/redis-benchmark.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 46c1853f7..38d4ca51b 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -1278,6 +1278,17 @@ static void updateClusterSlotsConfiguration() { pthread_mutex_unlock(&config.is_updating_slots_mutex); } +/* Generate random data for redis benchmark. See #7196. */ +static void genBenchmarkRandomData(char *data, int count) { + static uint32_t state = 1234; + int i = 0; + + while (count--) { + state = (state*1103515245+12345); + data[i++] = '0'+((state>>16)&63); + } +} + /* Returns number of consumed options. */ int parseOptions(int argc, const char **argv) { int i; @@ -1632,7 +1643,7 @@ int main(int argc, const char **argv) { /* Run default benchmark suite. */ data = zmalloc(config.datasize+1); do { - memset(data,'x',config.datasize); + genBenchmarkRandomData(data, config.datasize); data[config.datasize] = '\0'; if (test_is_selected("ping_inline") || test_is_selected("ping")) From 6bb5b6d9422ee1c04e6a8c4a9b1a9a1248e2ec75 Mon Sep 17 00:00:00 2001 From: hujie Date: Tue, 19 May 2020 00:58:58 +0800 Subject: [PATCH 13/95] fix clear USER_FLAG_ALLCOMMANDS flag in acl in ACLSetUserCommandBit, when the command bit overflows, no operation is performed, so no need clear the USER_FLAG_ALLCOMMANDS flag. in ACLSetUser, when adding subcommand, we don't need to call ACLGetCommandID ahead since subcommand may be empty. --- src/acl.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/acl.c b/src/acl.c index bcca116bb..c3a724c33 100644 --- a/src/acl.c +++ b/src/acl.c @@ -375,12 +375,13 @@ int ACLUserCanExecuteFutureCommands(user *u) { * to skip the command bit explicit test. */ void ACLSetUserCommandBit(user *u, unsigned long id, int value) { uint64_t word, bit; - if (value == 0) u->flags &= ~USER_FLAG_ALLCOMMANDS; if (ACLGetCommandBitCoordinates(id,&word,&bit) == C_ERR) return; - if (value) + if (value) { u->allowed_commands[word] |= bit; - else + } else { u->allowed_commands[word] &= ~bit; + u->flags &= ~USER_FLAG_ALLCOMMANDS; + } } /* This is like ACLSetUserCommandBit(), but instead of setting the specified @@ -845,7 +846,6 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) { errno = ENOENT; return C_ERR; } - unsigned long id = ACLGetCommandID(copy); /* The subcommand cannot be empty, so things like DEBUG| * are syntax errors of course. */ @@ -858,6 +858,7 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) { /* The command should not be set right now in the command * bitmap, because adding a subcommand of a fully added * command is probably an error on the user side. */ + unsigned long id = ACLGetCommandID(copy); if (ACLGetUserCommandBit(u,id) == 1) { zfree(copy); errno = EBUSY; From 4f8e19f877b5134a503e4a2d6d47817529685443 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 17 May 2020 15:10:25 +0300 Subject: [PATCH 14/95] improve DEBUG MALLCTL to be able to write to write only fields. also support: debug mallctl-str thread.tcache.flush VOID --- src/debug.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/src/debug.c b/src/debug.c index 587ff7c5d..d79226bf2 100644 --- a/src/debug.c +++ b/src/debug.c @@ -311,6 +311,13 @@ void mallctl_int(client *c, robj **argv, int argc) { size_t sz = sizeof(old); while (sz > 0) { if ((ret=je_mallctl(argv[0]->ptr, &old, &sz, argc > 1? &val: NULL, argc > 1?sz: 0))) { + if (ret == EPERM && argc > 1) { + /* if this option is write only, try just writing to it. */ + if (!(ret=je_mallctl(argv[0]->ptr, NULL, 0, &val, sz))) { + addReply(c, shared.ok); + return; + } + } if (ret==EINVAL) { /* size might be wrong, try a smaller one */ sz /= 2; @@ -333,17 +340,30 @@ void mallctl_int(client *c, robj **argv, int argc) { } void mallctl_string(client *c, robj **argv, int argc) { - int ret; + int rret, wret; char *old; size_t sz = sizeof(old); /* for strings, it seems we need to first get the old value, before overriding it. */ - if ((ret=je_mallctl(argv[0]->ptr, &old, &sz, NULL, 0))) { - addReplyErrorFormat(c,"%s", strerror(ret)); - return; + if ((rret=je_mallctl(argv[0]->ptr, &old, &sz, NULL, 0))) { + /* return error unless this option is write only. */ + if (!(rret == EPERM && argc > 1)) { + addReplyErrorFormat(c,"%s", strerror(rret)); + return; + } } - addReplyBulkCString(c, old); - if(argc > 1) - je_mallctl(argv[0]->ptr, NULL, 0, &argv[1]->ptr, sizeof(char*)); + if(argc > 1) { + char *val = argv[1]->ptr; + char **valref = &val; + if ((!strcmp(val,"VOID"))) + valref = NULL, sz = 0; + wret = je_mallctl(argv[0]->ptr, NULL, 0, valref, sz); + } + if (!rret) + addReplyBulkCString(c, old); + else if (wret) + addReplyErrorFormat(c,"%s", strerror(wret)); + else + addReply(c, shared.ok); } #endif From 56d63f4d7dee23f2417aff6ec4d3a58f0e605d2d Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Wed, 20 May 2020 14:08:40 +0300 Subject: [PATCH 15/95] fix a rare active defrag edge case bug leading to stagnation There's a rare case which leads to stagnation in the defragger, causing it to keep scanning the keyspace and do nothing (not moving any allocation), this happens when all the allocator slabs of a certain bin have the same % utilization, but the slab from which new allocations are made have a lower utilization. this commit fixes it by removing the current slab from the overall average utilization of the bin, and also eliminate any precision loss in the utilization calculation and move the decision about the defrag to reside inside jemalloc. and also add a test that consistently reproduce this issue. --- .../internal/jemalloc_internal_inlines_c.h | 23 +++- deps/jemalloc/src/jemalloc.c | 8 +- src/defrag.c | 13 +- tests/unit/memefficiency.tcl | 125 +++++++++++++++++- 4 files changed, 146 insertions(+), 23 deletions(-) diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 290e5cf99..2685802b8 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -216,7 +216,7 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra, } JEMALLOC_ALWAYS_INLINE int -iget_defrag_hint(tsdn_t *tsdn, void* ptr, int *bin_util, int *run_util) { +iget_defrag_hint(tsdn_t *tsdn, void* ptr) { int defrag = 0; rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); @@ -232,11 +232,22 @@ iget_defrag_hint(tsdn_t *tsdn, void* ptr, int *bin_util, int *run_util) { malloc_mutex_lock(tsdn, &bin->lock); /* don't bother moving allocations from the slab currently used for new allocations */ if (slab != bin->slabcur) { - const bin_info_t *bin_info = &bin_infos[binind]; - size_t availregs = bin_info->nregs * bin->stats.curslabs; - *bin_util = ((long long)bin->stats.curregs<<16) / availregs; - *run_util = ((long long)(bin_info->nregs - extent_nfree_get(slab))<<16) / bin_info->nregs; - defrag = 1; + int free_in_slab = extent_nfree_get(slab); + if (free_in_slab) { + const bin_info_t *bin_info = &bin_infos[binind]; + int curslabs = bin->stats.curslabs; + size_t curregs = bin->stats.curregs; + if (bin->slabcur) { + /* remove slabcur from the overall utilization */ + curregs -= bin_info->nregs - extent_nfree_get(bin->slabcur); + curslabs -= 1; + } + /* Compare the utilization ratio of the slab in question to the total average, + * to avoid precision lost and division, we do that by extrapolating the usage + * of the slab as if all slabs have the same usage. If this slab is less used + * than the average, we'll prefer to evict the data to hopefully more used ones */ + defrag = (bin_info->nregs - free_in_slab) * curslabs <= curregs; + } } malloc_mutex_unlock(tsdn, &bin->lock); } diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c index 5b936cb48..585645a28 100644 --- a/deps/jemalloc/src/jemalloc.c +++ b/deps/jemalloc/src/jemalloc.c @@ -3326,12 +3326,10 @@ jemalloc_postfork_child(void) { /******************************************************************************/ /* Helps the application decide if a pointer is worth re-allocating in order to reduce fragmentation. - * returns 0 if the allocation is in the currently active run, - * or when it is not causing any frag issue (large or huge bin) - * returns the bin utilization and run utilization both in fixed point 16:16. + * returns 1 if the allocation should be moved, and 0 if the allocation be kept. * If the application decides to re-allocate it should use MALLOCX_TCACHE_NONE when doing so. */ JEMALLOC_EXPORT int JEMALLOC_NOTHROW -get_defrag_hint(void* ptr, int *bin_util, int *run_util) { +get_defrag_hint(void* ptr) { assert(ptr != NULL); - return iget_defrag_hint(TSDN_NULL, ptr, bin_util, run_util); + return iget_defrag_hint(TSDN_NULL, ptr); } diff --git a/src/defrag.c b/src/defrag.c index e729297a5..6e5296632 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -43,7 +43,7 @@ /* this method was added to jemalloc in order to help us understand which * pointers are worthwhile moving and which aren't */ -int je_get_defrag_hint(void* ptr, int *bin_util, int *run_util); +int je_get_defrag_hint(void* ptr); /* forward declarations*/ void defragDictBucketCallback(void *privdata, dictEntry **bucketref); @@ -55,18 +55,11 @@ dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sd * when it returns a non-null value, the old pointer was already released * and should NOT be accessed. */ void* activeDefragAlloc(void *ptr) { - int bin_util, run_util; size_t size; void *newptr; - if(!je_get_defrag_hint(ptr, &bin_util, &run_util)) { - server.stat_active_defrag_misses++; - return NULL; - } - /* if this run is more utilized than the average utilization in this bin - * (or it is full), skip it. This will eventually move all the allocations - * from relatively empty runs into relatively full runs. */ - if (run_util > bin_util || run_util == 1<<16) { + if(!je_get_defrag_hint(ptr)) { server.stat_active_defrag_misses++; + size = zmalloc_size(ptr); return NULL; } /* move this allocation to a new allocation. diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index 777693fdf..390bad192 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -95,6 +95,10 @@ start_server {tags {"defrag"}} { } if {$::verbose} { puts "frag $frag" + set misses [s active_defrag_misses] + set hits [s active_defrag_hits] + puts "hits: $hits" + puts "misses: $misses" puts "max latency $max_latency" puts [r latency latest] puts [r latency history active-defrag-cycle] @@ -221,6 +225,10 @@ start_server {tags {"defrag"}} { } if {$::verbose} { puts "frag $frag" + set misses [s active_defrag_misses] + set hits [s active_defrag_hits] + puts "hits: $hits" + puts "misses: $misses" puts "max latency $max_latency" puts [r latency latest] puts [r latency history active-defrag-cycle] @@ -256,11 +264,12 @@ start_server {tags {"defrag"}} { set expected_frag 1.7 # add a mass of list nodes to two lists (allocations are interlaced) set val [string repeat A 100] ;# 5 items of 100 bytes puts us in the 640 bytes bin, which has 32 regs, so high potential for fragmentation - for {set j 0} {$j < 500000} {incr j} { + set elements 500000 + for {set j 0} {$j < $elements} {incr j} { $rd lpush biglist1 $val $rd lpush biglist2 $val } - for {set j 0} {$j < 500000} {incr j} { + for {set j 0} {$j < $elements} {incr j} { $rd read ; # Discard replies $rd read ; # Discard replies } @@ -302,6 +311,8 @@ start_server {tags {"defrag"}} { # test the the fragmentation is lower after 120 ;# serverCron only updates the info once in 100ms + set misses [s active_defrag_misses] + set hits [s active_defrag_hits] set frag [s allocator_frag_ratio] set max_latency 0 foreach event [r latency latest] { @@ -312,6 +323,8 @@ start_server {tags {"defrag"}} { } if {$::verbose} { puts "frag $frag" + puts "misses: $misses" + puts "hits: $hits" puts "max latency $max_latency" puts [r latency latest] puts [r latency history active-defrag-cycle] @@ -320,6 +333,10 @@ start_server {tags {"defrag"}} { # due to high fragmentation, 100hz, and active-defrag-cycle-max set to 75, # we expect max latency to be not much higher than 7.5ms but due to rare slowness threshold is set higher assert {$max_latency <= 30} + + # in extreme cases of stagnation, we see over 20m misses before the tests aborts with "defrag didn't stop", + # in normal cases we only see 100k misses out of 500k elements + assert {$misses < $elements} } # verify the data isn't corrupted or changed set newdigest [r debug digest] @@ -327,6 +344,110 @@ start_server {tags {"defrag"}} { r save ;# saving an rdb iterates over all the data / pointers r del biglist1 ;# coverage for quicklistBookmarksClear } {1} + + test "Active defrag edge case" { + # there was an edge case in defrag where all the slabs of a certain bin are exact the same + # % utilization, with the exception of the current slab from which new allocations are made + # if the current slab is lower in utilization the defragger would have ended up in stagnation, + # keept running and not move any allocation. + # this test is more consistent on a fresh server with no history + start_server {tags {"defrag"}} { + r flushdb + r config resetstat + r config set save "" ;# prevent bgsave from interfereing with save below + r config set hz 100 + r config set activedefrag no + r config set active-defrag-max-scan-fields 1000 + r config set active-defrag-threshold-lower 5 + r config set active-defrag-cycle-min 65 + r config set active-defrag-cycle-max 75 + r config set active-defrag-ignore-bytes 1mb + r config set maxmemory 0 + set expected_frag 1.3 + + r debug mallctl-str thread.tcache.flush VOID + # fill the first slab containin 32 regs of 640 bytes. + for {set j 0} {$j < 32} {incr j} { + r setrange "_$j" 600 x + r debug mallctl-str thread.tcache.flush VOID + } + + # add a mass of keys with 600 bytes values, fill the bin of 640 bytes which has 32 regs per slab. + set rd [redis_deferring_client] + set keys 640000 + for {set j 0} {$j < $keys} {incr j} { + $rd setrange $j 600 x + } + for {set j 0} {$j < $keys} {incr j} { + $rd read ; # Discard replies + } + + # create some fragmentation of 50% + set sent 0 + for {set j 0} {$j < $keys} {incr j 1} { + $rd del $j + incr sent + incr j 1 + } + for {set j 0} {$j < $sent} {incr j} { + $rd read ; # Discard replies + } + + # create higher fragmentation in the first slab + for {set j 10} {$j < 32} {incr j} { + r del "_$j" + } + + # start defrag + after 120 ;# serverCron only updates the info once in 100ms + set frag [s allocator_frag_ratio] + if {$::verbose} { + puts "frag $frag" + } + + assert {$frag >= $expected_frag} + + set digest [r debug digest] + catch {r config set activedefrag yes} e + if {![string match {DISABLED*} $e]} { + # wait for the active defrag to start working (decision once a second) + wait_for_condition 50 100 { + [s active_defrag_running] ne 0 + } else { + fail "defrag not started." + } + + # wait for the active defrag to stop working + wait_for_condition 500 100 { + [s active_defrag_running] eq 0 + } else { + after 120 ;# serverCron only updates the info once in 100ms + puts [r info memory] + puts [r info stats] + puts [r memory malloc-stats] + fail "defrag didn't stop." + } + + # test the the fragmentation is lower + after 120 ;# serverCron only updates the info once in 100ms + set misses [s active_defrag_misses] + set hits [s active_defrag_hits] + set frag [s allocator_frag_ratio] + if {$::verbose} { + puts "frag $frag" + puts "hits: $hits" + puts "misses: $misses" + } + assert {$frag < 1.1} + assert {$misses < 10000000} ;# when defrag doesn't stop, we have some 30m misses, when it does, we have 2m misses + } + + # verify the data isn't corrupted or changed + set newdigest [r debug digest] + assert {$digest eq $newdigest} + r save ;# saving an rdb iterates over all the data / pointers + } + } } } } ;# run_solo From 830c674845332ffa75b9d55b0fb5b320abcc6e67 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Thu, 21 May 2020 13:57:29 +0800 Subject: [PATCH 16/95] Tracking: flag CLIENT_TRACKING_BROKEN_REDIR when redir broken --- src/tracking.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tracking.c b/src/tracking.c index cfde26fc9..eb4113131 100644 --- a/src/tracking.c +++ b/src/tracking.c @@ -206,6 +206,7 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) { if (c->client_tracking_redirection) { client *redir = lookupClientByID(c->client_tracking_redirection); if (!redir) { + c->flags |= CLIENT_TRACKING_BROKEN_REDIR; /* We need to signal to the original connection that we * are unable to send invalidation messages to the redirected * connection, because the client no longer exist. */ From 9d1265240f86505d70ad673ea3f16bcb66c279f9 Mon Sep 17 00:00:00 2001 From: ShooterIT Date: Thu, 21 May 2020 21:00:21 +0800 Subject: [PATCH 17/95] Fix reply bytes calculation error Fix #7275. --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 0a04c0489..e6c48ac77 100644 --- a/src/networking.c +++ b/src/networking.c @@ -488,7 +488,7 @@ void trimReplyUnusedTailSpace(client *c) { /* take over the allocation's internal fragmentation (at least for * memory usage tracking) */ tail->size = zmalloc_usable(tail) - sizeof(clientReplyBlock); - c->reply_bytes += tail->size - old_size; + c->reply_bytes = c->reply_bytes + tail->size - old_size; listNodeValue(ln) = tail; } } From 26143c29362e8d81981e036bb965db20fc9756fe Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Wed, 20 May 2020 15:14:23 +0300 Subject: [PATCH 18/95] TLS: Improve tls-protocols clarity in redis.conf. --- redis.conf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/redis.conf b/redis.conf index bab7cff47..1aa760599 100644 --- a/redis.conf +++ b/redis.conf @@ -176,9 +176,10 @@ tcp-keepalive 300 # tls-cluster yes # Explicitly specify TLS versions to support. Allowed values are case insensitive -# and include "TLSv1", "TLSv1.1", "TLSv1.2", "TLSv1.3" (OpenSSL >= 1.1.1) +# and include "TLSv1", "TLSv1.1", "TLSv1.2", "TLSv1.3" (OpenSSL >= 1.1.1) or +# any combination. To enable only TLSv1.2 and TLSv1.3, use: # -# tls-protocols TLSv1.2 +# tls-protocols "TLSv1.2 TLSv1.3" # Configure allowed ciphers. See the ciphers(1ssl) manpage for more information # about the syntax of this string. From e60f63a718db5ff3f9532210820e6487bda352bc Mon Sep 17 00:00:00 2001 From: ShooterIT Date: Thu, 21 May 2020 21:45:35 +0800 Subject: [PATCH 19/95] Replace addDeferredMultiBulkLength with addReplyDeferredLen in comment --- src/networking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index e6c48ac77..f3362aba4 100644 --- a/src/networking.c +++ b/src/networking.c @@ -268,7 +268,7 @@ void _addReplyProtoToList(client *c, const char *s, size_t len) { clientReplyBlock *tail = ln? listNodeValue(ln): NULL; /* Note that 'tail' may be NULL even if we have a tail node, becuase when - * addDeferredMultiBulkLength() is used, it sets a dummy node to NULL just + * addReplyDeferredLen() is used, it sets a dummy node to NULL just * fo fill it later, when the size of the bulk length is set. */ /* Append to tail string when possible. */ @@ -473,7 +473,7 @@ void trimReplyUnusedTailSpace(client *c) { clientReplyBlock *tail = ln? listNodeValue(ln): NULL; /* Note that 'tail' may be NULL even if we have a tail node, becuase when - * addDeferredMultiBulkLength() is used */ + * addReplyDeferredLen() is used */ if (!tail) return; /* We only try to trim the space is relatively high (more than a 1/4 of the From 5c70dab9fc72c342bae9033d3e3da016d779e371 Mon Sep 17 00:00:00 2001 From: hwware Date: Thu, 21 May 2020 17:30:36 -0400 Subject: [PATCH 20/95] fix server crash for STRALGO command --- src/t_string.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_string.c b/src/t_string.c index d4eb04769..d46900a7a 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -519,7 +519,7 @@ void stralgoLCS(client *c) { != C_OK) return; if (minmatchlen < 0) minmatchlen = 0; j++; - } else if (!strcasecmp(opt,"STRINGS")) { + } else if (!strcasecmp(opt,"STRINGS") && (c->argc-j) > 2) { if (a != NULL) { addReplyError(c,"Either use STRINGS or KEYS"); return; @@ -527,7 +527,7 @@ void stralgoLCS(client *c) { a = c->argv[j+1]->ptr; b = c->argv[j+2]->ptr; j += 2; - } else if (!strcasecmp(opt,"KEYS")) { + } else if (!strcasecmp(opt,"KEYS") && (c->argc-j) > 2) { if (a != NULL) { addReplyError(c,"Either use STRINGS or KEYS"); return; From 08c70f7dcef4f0353545fe0061818890854c09c3 Mon Sep 17 00:00:00 2001 From: hwware Date: Thu, 21 May 2020 18:13:35 -0400 Subject: [PATCH 21/95] using moreargs variable --- src/t_string.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_string.c b/src/t_string.c index d46900a7a..5306069bf 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -519,7 +519,7 @@ void stralgoLCS(client *c) { != C_OK) return; if (minmatchlen < 0) minmatchlen = 0; j++; - } else if (!strcasecmp(opt,"STRINGS") && (c->argc-j) > 2) { + } else if (!strcasecmp(opt,"STRINGS") && moreargs > 1) { if (a != NULL) { addReplyError(c,"Either use STRINGS or KEYS"); return; @@ -527,7 +527,7 @@ void stralgoLCS(client *c) { a = c->argv[j+1]->ptr; b = c->argv[j+2]->ptr; j += 2; - } else if (!strcasecmp(opt,"KEYS") && (c->argc-j) > 2) { + } else if (!strcasecmp(opt,"KEYS") && moreargs > 1) { if (a != NULL) { addReplyError(c,"Either use STRINGS or KEYS"); return; From 5d59bbb6d93b8eaceca2934cc4e1d0b781cb9694 Mon Sep 17 00:00:00 2001 From: Qu Chen Date: Thu, 21 May 2020 18:39:38 -0700 Subject: [PATCH 22/95] Disconnect chained replicas when the replica performs PSYNC with the master always to avoid replication offset mismatch between master and chained replicas. --- src/replication.c | 13 ++++++-- tests/integration/psync2-pingoff.tcl | 50 ++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/replication.c b/src/replication.c index b10635f25..7eb161f7d 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2023,12 +2023,19 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) { * new one. */ memcpy(server.replid,new,sizeof(server.replid)); memcpy(server.cached_master->replid,new,sizeof(server.replid)); - - /* Disconnect all the sub-slaves: they need to be notified. */ - disconnectSlaves(); } } + /* Disconnect all the sub-replicas: they need to be notified always because + * in case the master has last replicated some non-meaningful commands + * (e.g. PINGs), the primary replica will request the PSYNC offset for the + * last known meaningful command. This means the master will again replicate + * the non-meaningful commands. If the sub-replicas still remains connected, + * they will receive those commands a second time and increment the master + * replication offset to be higher than the master's offset forever. + */ + disconnectSlaves(); + /* Setup the replication to continue. */ sdsfree(reply); replicationResurrectCachedMaster(conn); diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl index 420747d21..2c2303141 100644 --- a/tests/integration/psync2-pingoff.tcl +++ b/tests/integration/psync2-pingoff.tcl @@ -60,3 +60,53 @@ start_server {} { } } }} + + +start_server {tags {"psync2"}} { +start_server {} { +start_server {} { + + for {set j 0} {$j < 3} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + $R($j) CONFIG SET repl-ping-replica-period 1 + } + + test "Chained replicas disconnect when replica re-connect with the same master" { + # Add a second replica as a chained replica of the current replica + $R(1) replicaof $R_host(0) $R_port(0) + $R(2) replicaof $R_host(1) $R_port(1) + wait_for_condition 50 1000 { + [status $R(2) master_link_status] == "up" + } else { + fail "Chained replica not replicating from its master" + } + + # Do a write on the master, and wait for 3 seconds for the master to + # send some PINGs to its replica + $R(0) INCR counter2 + after 2000 + set sync_partial_master [status $R(0) sync_partial_ok] + set sync_partial_replica [status $R(1) sync_partial_ok] + $R(0) CONFIG SET repl-ping-replica-period 100 + + # Disconnect the master's direct replica + $R(0) client kill type replica + wait_for_condition 50 1000 { + [status $R(1) master_link_status] == "up" && + [status $R(2) master_link_status] == "up" && + [status $R(0) sync_partial_ok] == $sync_partial_master + 1 && + [status $R(1) sync_partial_ok] == $sync_partial_replica + 1 + } else { + fail "Disconnected replica failed to PSYNC with master" + } + + # Verify that the replica and its replica's meaningful and real + # offsets match with the master + assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] + assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset] + assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(1) master_repl_meaningful_offset] + assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(2) master_repl_meaningful_offset] + } +}}} From 8802cbbcded7e65131e27e50e7e4bc2655bd9fc8 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Thu, 21 May 2020 15:20:59 -0700 Subject: [PATCH 23/95] EAGAIN for tls during diskless load --- src/replication.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/replication.c b/src/replication.c index 7eb161f7d..7e981873f 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1526,6 +1526,10 @@ void readSyncBulkPayload(connection *conn) { nread = connRead(conn,buf,readlen); if (nread <= 0) { + if (connGetState(conn) == CONN_STATE_CONNECTED) { + /* equivalent to EAGAIN */ + return; + } serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s", (nread == -1) ? strerror(errno) : "connection lost"); cancelReplicationHandshake(); From ed3fd6c524b3d6267f624096971cb73f4e911a51 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 22 May 2020 15:59:02 +0200 Subject: [PATCH 24/95] Fix #7306 less aggressively. Citing from the issue: btw I suggest we change this fix to something else: * We revert the fix. * We add a call that disconnects chained replicas in the place where we trim the replica (that is a master i this case) offset. This way we can avoid disconnections when there is no trimming of the backlog. Note that we now want to disconnect replicas asynchronously in disconnectSlaves(), because it's in general safer now that we can call it from freeClient(). Otherwise for instance the command: CLIENT KILL TYPE master May crash: clientCommand() starts running the linked of of clients, looking for clients to kill. However it finds the master, kills it calling freeClient(), but this in turn calls replicationCacheMaster() that may also call disconnectSlaves() now. So the linked list iterator of the clientCommand() will no longer be valid. --- src/networking.c | 7 +++++-- src/replication.c | 39 ++++++++++++++++++++++++--------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/networking.c b/src/networking.c index f3362aba4..a1948ce60 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1039,9 +1039,12 @@ static void freeClientArgv(client *c) { * when we resync with our own master and want to force all our slaves to * resync with us as well. */ void disconnectSlaves(void) { - while (listLength(server.slaves)) { + listIter li; + listNode *ln; + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { listNode *ln = listFirst(server.slaves); - freeClient((client*)ln->value); + freeClientAsync((client*)ln->value); } } diff --git a/src/replication.c b/src/replication.c index 7e981873f..603ef4895 100644 --- a/src/replication.c +++ b/src/replication.c @@ -39,7 +39,7 @@ #include #include -long long adjustMeaningfulReplOffset(); +long long adjustMeaningfulReplOffset(int *adjusted); void replicationDiscardCachedMaster(void); void replicationResurrectCachedMaster(connection *conn); void replicationSendAck(void); @@ -2027,19 +2027,12 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) { * new one. */ memcpy(server.replid,new,sizeof(server.replid)); memcpy(server.cached_master->replid,new,sizeof(server.replid)); + + /* Disconnect all the sub-slaves: they need to be notified. */ + disconnectSlaves(); } } - /* Disconnect all the sub-replicas: they need to be notified always because - * in case the master has last replicated some non-meaningful commands - * (e.g. PINGs), the primary replica will request the PSYNC offset for the - * last known meaningful command. This means the master will again replicate - * the non-meaningful commands. If the sub-replicas still remains connected, - * they will receive those commands a second time and increment the master - * replication offset to be higher than the master's offset forever. - */ - disconnectSlaves(); - /* Setup the replication to continue. */ sdsfree(reply); replicationResurrectCachedMaster(conn); @@ -2708,7 +2701,8 @@ void replicationCacheMaster(client *c) { /* Adjust reploff and read_reploff to the last meaningful offset we * executed. This is the offset the replica will use for future PSYNC. */ - server.master->reploff = adjustMeaningfulReplOffset(); + int offset_adjusted; + server.master->reploff = adjustMeaningfulReplOffset(&offset_adjusted); server.master->read_reploff = server.master->reploff; if (c->flags & CLIENT_MULTI) discardTransaction(c); listEmpty(c->reply); @@ -2731,6 +2725,13 @@ void replicationCacheMaster(client *c) { * so make sure to adjust the replication state. This function will * also set server.master to NULL. */ replicationHandleMasterDisconnection(); + + /* If we trimmed this replica backlog, we need to disconnect our chained + * replicas (if any), otherwise they may have the PINGs we removed + * from the stream and their offset would no longer match: upon + * disconnection they will also trim the final PINGs and will be able + * to incrementally sync without issues. */ + if (offset_adjusted) disconnectSlaves(); } /* If the "meaningful" offset, that is the offset without the final PINGs @@ -2740,8 +2741,13 @@ void replicationCacheMaster(client *c) { * offset because of the PINGs and will not be able to incrementally * PSYNC with the new master. * This function trims the replication backlog when needed, and returns - * the offset to be used for future partial sync. */ -long long adjustMeaningfulReplOffset() { + * the offset to be used for future partial sync. + * + * If the integer 'adjusted' was passed by reference, it is set to 1 + * if the function call actually modified the offset and the replication + * backlog, otherwise it is set to 0. It can be NULL if the caller is + * not interested in getting this info. */ +long long adjustMeaningfulReplOffset(int *adjusted) { if (server.master_repl_offset > server.master_repl_meaningful_offset) { long long delta = server.master_repl_offset - server.master_repl_meaningful_offset; @@ -2761,6 +2767,9 @@ long long adjustMeaningfulReplOffset() { (server.repl_backlog_idx + (server.repl_backlog_size - delta)) % server.repl_backlog_size; } + if (adjusted) *adjusted = 1; + } else { + if (adjusted) *adjusted = 0; } return server.master_repl_offset; } @@ -2784,7 +2793,7 @@ void replicationCacheMasterUsingMyself(void) { * by replicationCreateMasterClient(). We'll later set the created * master as server.cached_master, so the replica will use such * offset for PSYNC. */ - server.master_initial_offset = adjustMeaningfulReplOffset(); + server.master_initial_offset = adjustMeaningfulReplOffset(NULL); /* The master client we create can be set to any DBID, because * the new master will start its replication stream with SELECT. */ From fc18f9a7983536c5afbfc8f351af5d74b77cc5f5 Mon Sep 17 00:00:00 2001 From: ShooterIT Date: Tue, 14 Apr 2020 23:56:34 +0800 Subject: [PATCH 25/95] Implements sendfile for redis. --- src/config.h | 6 ++++++ src/replication.c | 51 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/src/config.h b/src/config.h index 0fcc42972..23de5d3ad 100644 --- a/src/config.h +++ b/src/config.h @@ -133,6 +133,12 @@ void setproctitle(const char *fmt, ...); /* Byte ordering detection */ #include /* This will likely define BYTE_ORDER */ +/* Define redis_sendfile. */ +#if defined(__linux__) || (defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_5)) +#define HAVE_SENDFILE 1 +ssize_t redis_sendfile(int out_fd, int in_fd, off_t offset, size_t count); +#endif + #ifndef BYTE_ORDER #if (BSD >= 199103) # include diff --git a/src/replication.c b/src/replication.c index 603ef4895..77422344a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -973,10 +973,41 @@ void removeRDBUsedToSyncReplicas(void) { } } +#if HAVE_SENDFILE +/* Implements redis_sendfile to transfer data between file descriptors and + * avoid transferring data to and from user space. + * + * The function prototype is just like sendfile(2) on Linux. in_fd is a file + * descriptor opened for reading and out_fd is a descriptor opened for writing. + * offset specifies where to start reading data from in_fd. count is the number + * of bytes to copy between the file descriptors. + * + * The return value is the number of bytes written to out_fd, if the transfer + * was successful. On error, -1 is returned, and errno is set appropriately. */ +ssize_t redis_sendfile(int out_fd, int in_fd, off_t offset, size_t count) { +#if defined(__linux__) + #include + return sendfile(out_fd, in_fd, &offset, count); + +#elif defined(__APPLE__) + off_t len = count; + /* Notice that it may return -1 and errno is set to EAGAIN even if some + * bytes have been sent successfully and the len argument is set correctly + * when using a socket marked for non-blocking I/O. */ + if (sendfile(in_fd, out_fd, offset, &len, NULL, 0) == -1 && + errno != EAGAIN) return -1; + else + return (ssize_t)len; + +#endif + errno = ENOSYS; + return -1; +} +#endif + void sendBulkToSlave(connection *conn) { client *slave = connGetPrivateData(conn); - char buf[PROTO_IOBUF_LEN]; - ssize_t nwritten, buflen; + ssize_t nwritten; /* Before sending the RDB file, we send the preamble as configured by the * replication process. Currently the preamble is just the bulk count of @@ -1002,6 +1033,21 @@ void sendBulkToSlave(connection *conn) { } /* If the preamble was already transferred, send the RDB bulk data. */ +#if HAVE_SENDFILE + if ((nwritten = redis_sendfile(conn->fd,slave->repldbfd, + slave->repldboff,PROTO_IOBUF_LEN)) == -1) + { + if (errno != EAGAIN) { + serverLog(LL_WARNING,"Sendfile error sending DB to replica: %s", + strerror(errno)); + freeClient(slave); + } + return; + } +#else + ssize_t buflen; + char buf[PROTO_IOBUF_LEN]; + lseek(slave->repldbfd,slave->repldboff,SEEK_SET); buflen = read(slave->repldbfd,buf,PROTO_IOBUF_LEN); if (buflen <= 0) { @@ -1018,6 +1064,7 @@ void sendBulkToSlave(connection *conn) { } return; } +#endif slave->repldboff += nwritten; server.stat_net_output_bytes += nwritten; if (slave->repldboff == slave->repldbsize) { From 8a4e01f2bc6f2aaf6a4aa721ddb265813675d8d0 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 22 May 2020 19:29:09 +0200 Subject: [PATCH 26/95] Make disconnectSlaves() synchronous in the base case. Otherwise we run into that: Backtrace: src/redis-server 127.0.0.1:21322(logStackTrace+0x45)[0x479035] src/redis-server 127.0.0.1:21322(sigsegvHandler+0xb9)[0x4797f9] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fd373c5e390] src/redis-server 127.0.0.1:21322(_serverAssert+0x6a)[0x47660a] src/redis-server 127.0.0.1:21322(freeReplicationBacklog+0x42)[0x451282] src/redis-server 127.0.0.1:21322[0x4552d4] src/redis-server 127.0.0.1:21322[0x4c5593] src/redis-server 127.0.0.1:21322(aeProcessEvents+0x2e6)[0x42e786] src/redis-server 127.0.0.1:21322(aeMain+0x1d)[0x42eb0d] src/redis-server 127.0.0.1:21322(main+0x4c5)[0x42b145] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fd3738a3830] src/redis-server 127.0.0.1:21322(_start+0x29)[0x42b409] Since we disconnect all the replicas and free the replication backlog in certain replication paths, and the code that will free the replication backlog expects that no replica is connected. However we still need to free the replicas asynchronously in certain cases, as documented in the top comment of disconnectSlaves(). --- src/networking.c | 17 ++++++++++++++--- src/replication.c | 10 +++++----- src/server.h | 2 +- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/networking.c b/src/networking.c index a1948ce60..364654642 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1037,14 +1037,25 @@ static void freeClientArgv(client *c) { /* Close all the slaves connections. This is useful in chained replication * when we resync with our own master and want to force all our slaves to - * resync with us as well. */ -void disconnectSlaves(void) { + * resync with us as well. + * + * If 'async' is non-zero we free the clients asynchronously. This is needed + * when we call this function from a context where in the chain of the + * callers somebody is iterating the list of clients. For instance when + * CLIENT KILL TYPE master is called, caching the master client may + * adjust the meaningful offset of replication, and in turn call + * discionectSlaves(). Since CLIENT KILL iterates the clients this is + * not safe. */ +void disconnectSlaves(int async) { listIter li; listNode *ln; listRewind(server.slaves,&li); while((ln = listNext(&li))) { listNode *ln = listFirst(server.slaves); - freeClientAsync((client*)ln->value); + if (async) + freeClientAsync((client*)ln->value); + else + freeClient((client*)ln->value); } } diff --git a/src/replication.c b/src/replication.c index 77422344a..e1c65d48b 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2076,7 +2076,7 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) { memcpy(server.cached_master->replid,new,sizeof(server.replid)); /* Disconnect all the sub-slaves: they need to be notified. */ - disconnectSlaves(); + disconnectSlaves(0); } } @@ -2349,7 +2349,7 @@ void syncWithMaster(connection *conn) { * as well, if we have any sub-slaves. The master may transfer us an * entirely different data set and we have no way to incrementally feed * our slaves after that. */ - disconnectSlaves(); /* Force our slaves to resync with us as well. */ + disconnectSlaves(0); /* Force our slaves to resync with us as well. */ freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */ /* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC @@ -2496,7 +2496,7 @@ void replicationSetMaster(char *ip, int port) { /* Force our slaves to resync with us as well. They may hopefully be able * to partially resync with us, but we can notify the replid change. */ - disconnectSlaves(); + disconnectSlaves(0); cancelReplicationHandshake(); /* Before destroying our master state, create a cached master using * our own parameters, to later PSYNC with the new master. */ @@ -2543,7 +2543,7 @@ void replicationUnsetMaster(void) { * of the replication ID change (see shiftReplicationId() call). However * the slaves will be able to partially resync with us, so it will be * a very fast reconnection. */ - disconnectSlaves(); + disconnectSlaves(0); server.repl_state = REPL_STATE_NONE; /* We need to make sure the new master will start the replication stream @@ -2778,7 +2778,7 @@ void replicationCacheMaster(client *c) { * from the stream and their offset would no longer match: upon * disconnection they will also trim the final PINGs and will be able * to incrementally sync without issues. */ - if (offset_adjusted) disconnectSlaves(); + if (offset_adjusted) disconnectSlaves(1); } /* If the "meaningful" offset, that is the offset without the final PINGs diff --git a/src/server.h b/src/server.h index f835bf5e9..2d17d69c8 100644 --- a/src/server.h +++ b/src/server.h @@ -1660,7 +1660,7 @@ int getClientType(client *c); int getClientTypeByName(char *name); char *getClientTypeName(int class); void flushSlavesOutputBuffers(void); -void disconnectSlaves(void); +void disconnectSlaves(int async); int listenToPort(int port, int *fds, int *count); void pauseClients(mstime_t duration); int clientsArePaused(void); From 1f5163f454e570335070696758605e87f6b47a0c Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 24 May 2020 08:00:12 +0300 Subject: [PATCH 27/95] add CI for 32bit build --- .github/workflows/ci.yml | 18 ++++++++++++++++++ .github/workflows/daily.yml | 16 ++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 551fb2d91..439e3f3df 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,6 +3,7 @@ name: CI on: [push, pull_request] jobs: + test-ubuntu-latest: runs-on: ubuntu-latest steps: @@ -29,3 +30,20 @@ jobs: - uses: actions/checkout@v1 - name: make run: make + + biuld-32bit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: make + run: | + sudo apt-get update && sudo apt-get install libc6-dev-i386 + make 32bit + + build-libc-malloc: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: make + run: make MALLOC=libc + diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index b6a9abb68..828268c70 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -5,6 +5,7 @@ on: - cron: '0 7 * * *' jobs: + test-jemalloc: runs-on: ubuntu-latest timeout-minutes: 1200 @@ -33,6 +34,21 @@ jobs: - name: module api test run: ./runtest-moduleapi --verbose + test-32bit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: make + run: | + sudo apt-get update && sudo apt-get install libc6-dev-i386 + make 32bit + - name: test + run: | + sudo apt-get install tcl8.5 + ./runtest --accurate --verbose + - name: module api test + run: ./runtest-moduleapi --verbose + test-valgrind: runs-on: ubuntu-latest timeout-minutes: 14400 From d089cc8963f8a4673ae77967ccb684f707f057ae Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Mon, 25 May 2020 11:17:51 +0800 Subject: [PATCH 28/95] PSYNC2: second_replid_offset should be real meaningful offset After adjustMeaningfulReplOffset(), all the other related variable should be updated, including server.second_replid_offset. Or the old version redis like 5.0 may receive wrong data from replication stream, cause redis 5.0 can sync with redis 6.0, but doesn't know meaningful offset. --- src/replication.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/replication.c b/src/replication.c index e1c65d48b..d45a15783 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2531,14 +2531,14 @@ void replicationUnsetMaster(void) { sdsfree(server.masterhost); server.masterhost = NULL; + if (server.master) freeClient(server.master); + replicationDiscardCachedMaster(); + cancelReplicationHandshake(); /* When a slave is turned into a master, the current replication ID * (that was inherited from the master at synchronization time) is * used as secondary ID up to the current offset, and a new replication * ID is created to continue with a new replication history. */ shiftReplicationId(); - if (server.master) freeClient(server.master); - replicationDiscardCachedMaster(); - cancelReplicationHandshake(); /* Disconnecting all the slaves is required: we need to inform slaves * of the replication ID change (see shiftReplicationId() call). However * the slaves will be able to partially resync with us, so it will be From 7a32a8485ef02958e708060a87e85a1e7bda214a Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 May 2020 11:47:38 +0200 Subject: [PATCH 29/95] Clarify what is happening in PR #7320. --- src/replication.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index d45a15783..3ebe451a2 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2537,7 +2537,11 @@ void replicationUnsetMaster(void) { /* When a slave is turned into a master, the current replication ID * (that was inherited from the master at synchronization time) is * used as secondary ID up to the current offset, and a new replication - * ID is created to continue with a new replication history. */ + * ID is created to continue with a new replication history. + * + * NOTE: this function MUST be called after we call + * freeClient(server.master), since there we adjust the replication + * offset trimming the final PINGs. See Github issue #7320. */ shiftReplicationId(); /* Disconnecting all the slaves is required: we need to inform slaves * of the replication ID change (see shiftReplicationId() call). However From d9164a07b53b7d693c80a3dd2eeb9930d590d91c Mon Sep 17 00:00:00 2001 From: John Sully Date: Tue, 26 May 2020 01:39:54 -0400 Subject: [PATCH 30/95] Test TLS as part of CI Former-commit-id: 561a1f8d8a2ad5a048acbc3a7b17360ce114dec0 --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9bebf2e2..44d429f78 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,14 +7,14 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - - name: make + - name: make BUILD_TLS=yes run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev make -j2 - name: test run: | sudo apt-get -y install tcl8.5 - ./runtest --clients 2 --verbose + ./runtest --clients 2 --verbose --tls - name: cluster-test run: | ./runtest-cluster From 2411e4e33faa3f56cb663f48febcad6756562311 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 May 2020 18:37:05 +0200 Subject: [PATCH 31/95] Test: PSYNC2 test can now show server logs. --- tests/integration/psync2.tcl | 113 +++++++++++++++++++++++++++-------- 1 file changed, 88 insertions(+), 25 deletions(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index d6b2d19d0..29a880f99 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -1,3 +1,75 @@ + +proc show_cluster_status {} { + uplevel 1 { + # The following is the regexp we use to match the log line + # time info. Logs are in the following form: + # + # 11296:M 25 May 2020 17:37:14.652 # Server initialized + set log_regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*} + set repl_regexp {(master|repl|sync|backlog|meaningful|offset)} + + puts "Master ID is $master_id" + for {set j 0} {$j < 5} {incr j} { + puts "$j: sync_full: [status $R($j) sync_full]" + puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" + puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" + puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" + puts "$j: x var is : [$R($j) GET x]" + puts "---" + } + + # Show the replication logs of every instance, interleaving + # them by the log date. + # + # First: load the lines as lists for each instance. + array set log {} + for {set j 0} {$j < 5} {incr j} { + set fd [open $R_log($j)] + while {[gets $fd l] >= 0} { + if {[regexp $log_regexp $l] && + [regexp -nocase $repl_regexp $l]} { + lappend log($j) $l + } + } + close $fd + } + + # To interleave the lines, at every step consume the element of + # the list with the lowest time and remove it. Do it until + # all the lists are empty. + # + # regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*} $l - logdate + while 1 { + # Find the log with smallest time. + set empty 0 + set best 0 + set bestdate {} + for {set j 0} {$j < 5} {incr j} { + if {[llength $log($j)] == 0} { + incr empty + continue + } + regexp $log_regexp [lindex $log($j) 0] - date + if {$bestdate eq {}} { + set best $j + set bestdate $date + } else { + if {[string compare $bestdate $date] > 0} { + set best $j + set bestdate $date + } + } + } + if {$empty == 5} break ; # Our exit condition: no more logs + + # Emit the one with the smallest time (that is the first + # event in the time line). + puts "\[$best port $R_port($best)\] [lindex $log($best) 0]" + set log($best) [lrange $log($best) 1 end] + } + } +} + start_server {tags {"psync2"}} { start_server {} { start_server {} { @@ -28,6 +100,7 @@ start_server {} { set R($j) [srv [expr 0-$j] client] set R_host($j) [srv [expr 0-$j] host] set R_port($j) [srv [expr 0-$j] port] + set R_log($j) [srv [expr 0-$j] stdout] if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"} } @@ -74,6 +147,7 @@ start_server {} { [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" && [status $R([expr {($master_id+4)%5}]) master_link_status] == "up" } else { + show_cluster_status fail "Replica not reconnecting" } @@ -85,6 +159,7 @@ start_server {} { wait_for_condition 50 1000 { [$R($j) get x] == $counter_value } else { + show_cluster_status fail "Instance #$j x variable is inconsistent" } } @@ -120,6 +195,7 @@ start_server {} { wait_for_condition 50 1000 { [$R($j) get x] == $counter_value } else { + show_cluster_status fail "Instance #$j x variable is inconsistent" } } @@ -134,26 +210,12 @@ start_server {} { [status $R(3) master_repl_offset] >= $masteroff && [status $R(4) master_repl_offset] >= $masteroff } else { - puts "Master ID is $master_id" - for {set j 0} {$j < 5} {incr j} { - puts "$j: sync_full: [status $R($j) sync_full]" - puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" - puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" - puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" - puts "---" - } + show_cluster_status fail "Replicas offsets didn't catch up with the master after too long time." } if {$debug_msg} { - puts "Master ID is $master_id" - for {set j 0} {$j < 5} {incr j} { - puts "$j: sync_full: [status $R($j) sync_full]" - puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" - puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" - puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" - puts "---" - } + show_cluster_status } test "PSYNC2: total sum of full synchronizations is exactly 4" { @@ -161,7 +223,10 @@ start_server {} { for {set j 0} {$j < 5} {incr j} { incr sum [status $R($j) sync_full] } - assert {$sum == 4} + if {$sum != 4} { + show_cluster_status + assert {$sum == 4} + } } # In absence of pings, are the instances really able to have @@ -174,14 +239,7 @@ start_server {} { [status $R($master_id) master_repl_offset] == [status $R(3) master_repl_offset] && [status $R($master_id) master_repl_offset] == [status $R(4) master_repl_offset] } else { - puts "Master ID is $master_id" - for {set j 0} {$j < 5} {incr j} { - puts "$j: sync_full: [status $R($j) sync_full]" - puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" - puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" - puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" - puts "---" - } + show_cluster_status fail "Replicas and master offsets were unable to match *exactly*." } $R($master_id) config set repl-ping-replica-period 10 @@ -210,6 +268,7 @@ start_server {} { [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" && [status $R([expr {($master_id+4)%5}]) master_link_status] == "up" } else { + show_cluster_status fail "Replica not reconnecting" } } @@ -233,6 +292,7 @@ start_server {} { puts "prev sync_partial_ok: $sync_partial" puts "prev sync_partial_err: $sync_partial_err" puts [$R($master_id) info stats] + show_cluster_status fail "Replica didn't partial sync" } set new_sync_count [status $R($master_id) sync_full] @@ -254,6 +314,7 @@ start_server {} { wait_for_condition 50 1000 { [$R($master_id) debug digest] == [$R($slave_id) debug digest] } else { + show_cluster_status fail "Replica not reconnecting" } @@ -288,6 +349,7 @@ start_server {} { wait_for_condition 50 1000 { [status $R($master_id) connected_slaves] == 4 } else { + show_cluster_status fail "Replica not reconnecting" } set new_sync_count [status $R($master_id) sync_full] @@ -298,6 +360,7 @@ start_server {} { wait_for_condition 50 1000 { [$R($master_id) debug digest] == [$R($slave_id) debug digest] } else { + show_cluster_status fail "Debug digest mismatch between master and replica in post-restart handshake" } } From fee0c7630495c5bc1717e9b36c52244a8c14c4c2 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 26 May 2020 23:55:18 +0200 Subject: [PATCH 32/95] Replication: log backlog creation event. --- src/replication.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/replication.c b/src/replication.c index 3ebe451a2..07dc50edb 100644 --- a/src/replication.c +++ b/src/replication.c @@ -748,6 +748,9 @@ void syncCommand(client *c) { changeReplicationId(); clearReplicationId2(); createReplicationBacklog(); + serverLog(LL_NOTICE,"Replication backlog created, my new " + "replication IDs are '%s' and '%s'", + server.replid, server.replid2); } /* CASE 1: BGSAVE is in progress, with disk target. */ From 0705a29959c91c5d9c0352953ac2dc8eecd5aaf0 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Tue, 26 May 2020 13:50:59 +0300 Subject: [PATCH 33/95] avoid using sendfile if tls-replication is enabled this obviously broke the tests, but went unnoticed so far since tls wasn't often tested. --- src/replication.c | 65 ++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/src/replication.c b/src/replication.c index 07dc50edb..2b21b02d8 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1035,39 +1035,46 @@ void sendBulkToSlave(connection *conn) { } } - /* If the preamble was already transferred, send the RDB bulk data. */ + /* If the preamble was already transferred, send the RDB bulk data. + * try to use sendfile system call if supported, unless tls is enabled. + * fallback to normal read+write otherwise. */ + nwritten = 0; #if HAVE_SENDFILE - if ((nwritten = redis_sendfile(conn->fd,slave->repldbfd, - slave->repldboff,PROTO_IOBUF_LEN)) == -1) - { - if (errno != EAGAIN) { - serverLog(LL_WARNING,"Sendfile error sending DB to replica: %s", - strerror(errno)); - freeClient(slave); + if (!server.tls_replication) { + if ((nwritten = redis_sendfile(conn->fd,slave->repldbfd, + slave->repldboff,PROTO_IOBUF_LEN)) == -1) + { + if (errno != EAGAIN) { + serverLog(LL_WARNING,"Sendfile error sending DB to replica: %s", + strerror(errno)); + freeClient(slave); + } + return; } - return; - } -#else - ssize_t buflen; - char buf[PROTO_IOBUF_LEN]; - - lseek(slave->repldbfd,slave->repldboff,SEEK_SET); - buflen = read(slave->repldbfd,buf,PROTO_IOBUF_LEN); - if (buflen <= 0) { - serverLog(LL_WARNING,"Read error sending DB to replica: %s", - (buflen == 0) ? "premature EOF" : strerror(errno)); - freeClient(slave); - return; - } - if ((nwritten = connWrite(conn,buf,buflen)) == -1) { - if (connGetState(conn) != CONN_STATE_CONNECTED) { - serverLog(LL_WARNING,"Write error sending DB to replica: %s", - connGetLastError(conn)); - freeClient(slave); - } - return; } #endif + if (!nwritten) { + ssize_t buflen; + char buf[PROTO_IOBUF_LEN]; + + lseek(slave->repldbfd,slave->repldboff,SEEK_SET); + buflen = read(slave->repldbfd,buf,PROTO_IOBUF_LEN); + if (buflen <= 0) { + serverLog(LL_WARNING,"Read error sending DB to replica: %s", + (buflen == 0) ? "premature EOF" : strerror(errno)); + freeClient(slave); + return; + } + if ((nwritten = connWrite(conn,buf,buflen)) == -1) { + if (connGetState(conn) != CONN_STATE_CONNECTED) { + serverLog(LL_WARNING,"Write error sending DB to replica: %s", + connGetLastError(conn)); + freeClient(slave); + } + return; + } + } + slave->repldboff += nwritten; server.stat_net_output_bytes += nwritten; if (slave->repldboff == slave->repldbsize) { From abb9dcd975f97601ab9dd372ae779cec28d77dd4 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Mon, 25 May 2020 17:25:23 +0300 Subject: [PATCH 34/95] daily CI test with tls --- .github/workflows/daily.yml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 828268c70..c22d49895 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -49,6 +49,21 @@ jobs: - name: module api test run: ./runtest-moduleapi --verbose + test-tls: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + - name: make + run: | + make BUILD_TLS=yes + - name: test + run: | + sudo apt-get install tcl8.5 tcl-tls + ./utils/gen-test-certs.sh + ./runtest --accurate --verbose --tls + - name: module api test + run: ./runtest-moduleapi --verbose --tls + test-valgrind: runs-on: ubuntu-latest timeout-minutes: 14400 From 7e55485b21e71d474426cc92ff504ac8d90f1aa4 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 May 2020 11:45:49 +0200 Subject: [PATCH 35/95] Set a protocol error if master use the inline protocol. We want to react a bit more aggressively if we sense that the master is sending us some corrupted stream. By setting the protocol error we both ensure that the replica will disconnect, and avoid caching the master so that a full SYNC will be required. This is protective against replication bugs. --- src/networking.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 364654642..bb682db4c 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1553,6 +1553,19 @@ int processInlineBuffer(client *c) { if (querylen == 0 && getClientType(c) == CLIENT_TYPE_SLAVE) c->repl_ack_time = server.unixtime; + /* Masters should never send us inline protocol to run actual + * commands. If this happens, it is likely due to a bug in Redis where + * we got some desynchronization in the protocol, for example + * beause of a PSYNC gone bad. + * + * However the is an exception: masters may send us just a newline + * to keep the connection active. */ + if (querylen != 0 && c->flags & CLIENT_MASTER) { + serverLog(LL_WARNING,"WARNING: Receiving inline protocol from master, master stream corruption? Closing the master connection and discarding the cached master."); + setProtocolError("Master using the inline protocol. Desync?",c); + return C_ERR; + } + /* Move querybuffer position to the next query in the buffer. */ c->qb_pos += querylen+linefeed_chars; @@ -1576,7 +1589,7 @@ int processInlineBuffer(client *c) { * CLIENT_PROTOCOL_ERROR. */ #define PROTO_DUMP_LEN 128 static void setProtocolError(const char *errstr, client *c) { - if (server.verbosity <= LL_VERBOSE) { + if (server.verbosity <= LL_VERBOSE || c->flags & CLIENT_MASTER) { sds client = catClientInfoString(sdsempty(),c); /* Sample some protocol to given an idea about what was inside. */ @@ -1595,7 +1608,9 @@ static void setProtocolError(const char *errstr, client *c) { } /* Log all the client and protocol info. */ - serverLog(LL_VERBOSE, + int loglevel = (c->flags & CLIENT_MASTER) ? LL_WARNING : + LL_VERBOSE; + serverLog(loglevel, "Protocol error (%s) from client: %s. %s", errstr, client, buf); sdsfree(client); } From 911c579b68689bae06ed84ce097ddbe2e0c8efd4 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 May 2020 12:06:33 +0200 Subject: [PATCH 36/95] Remove the meaningful offset feature. After a closer look, the Redis core devleopers all believe that this was too fragile, caused many bugs that we didn't expect and that were very hard to track. Better to find an alternative solution that is simpler. --- src/networking.c | 23 ++------------- src/replication.c | 73 ++++------------------------------------------- src/server.c | 4 --- src/server.h | 3 +- 4 files changed, 10 insertions(+), 93 deletions(-) diff --git a/src/networking.c b/src/networking.c index bb682db4c..7a5f1d7b9 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1037,25 +1037,14 @@ static void freeClientArgv(client *c) { /* Close all the slaves connections. This is useful in chained replication * when we resync with our own master and want to force all our slaves to - * resync with us as well. - * - * If 'async' is non-zero we free the clients asynchronously. This is needed - * when we call this function from a context where in the chain of the - * callers somebody is iterating the list of clients. For instance when - * CLIENT KILL TYPE master is called, caching the master client may - * adjust the meaningful offset of replication, and in turn call - * discionectSlaves(). Since CLIENT KILL iterates the clients this is - * not safe. */ -void disconnectSlaves(int async) { + * resync with us as well. */ +void disconnectSlaves(void) { listIter li; listNode *ln; listRewind(server.slaves,&li); while((ln = listNext(&li))) { listNode *ln = listFirst(server.slaves); - if (async) - freeClientAsync((client*)ln->value); - else - freeClient((client*)ln->value); + freeClient((client*)ln->value); } } @@ -1769,7 +1758,6 @@ int processMultibulkBuffer(client *c) { * 2. In the case of master clients, the replication offset is updated. * 3. Propagate commands we got from our master to replicas down the line. */ void commandProcessed(client *c) { - int cmd_is_ping = c->cmd && c->cmd->proc == pingCommand; long long prev_offset = c->reploff; if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { /* Update the applied replication offset of our master. */ @@ -1794,16 +1782,11 @@ void commandProcessed(client *c) { * sub-replicas and to the replication backlog. */ if (c->flags & CLIENT_MASTER) { long long applied = c->reploff - prev_offset; - long long prev_master_repl_meaningful_offset = server.master_repl_meaningful_offset; if (applied) { replicationFeedSlavesFromMasterStream(server.slaves, c->pending_querybuf, applied); sdsrange(c->pending_querybuf,applied,-1); } - /* The server.master_repl_meaningful_offset variable represents - * the offset of the replication stream without the pending PINGs. */ - if (cmd_is_ping) - server.master_repl_meaningful_offset = prev_master_repl_meaningful_offset; } } diff --git a/src/replication.c b/src/replication.c index 2b21b02d8..0484ec8a5 100644 --- a/src/replication.c +++ b/src/replication.c @@ -39,7 +39,6 @@ #include #include -long long adjustMeaningfulReplOffset(int *adjusted); void replicationDiscardCachedMaster(void); void replicationResurrectCachedMaster(connection *conn); void replicationSendAck(void); @@ -163,7 +162,6 @@ void feedReplicationBacklog(void *ptr, size_t len) { unsigned char *p = ptr; server.master_repl_offset += len; - server.master_repl_meaningful_offset = server.master_repl_offset; /* This is a circular buffer, so write as much data we can at every * iteration and rewind the "idx" index if we reach the limit. */ @@ -1831,7 +1829,6 @@ void readSyncBulkPayload(connection *conn) { * we are starting a new history. */ memcpy(server.replid,server.master->replid,sizeof(server.replid)); server.master_repl_offset = server.master->reploff; - server.master_repl_meaningful_offset = server.master->reploff; clearReplicationId2(); /* Let's create the replication backlog if needed. Slaves need to @@ -2086,7 +2083,7 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) { memcpy(server.cached_master->replid,new,sizeof(server.replid)); /* Disconnect all the sub-slaves: they need to be notified. */ - disconnectSlaves(0); + disconnectSlaves(); } } @@ -2359,7 +2356,7 @@ void syncWithMaster(connection *conn) { * as well, if we have any sub-slaves. The master may transfer us an * entirely different data set and we have no way to incrementally feed * our slaves after that. */ - disconnectSlaves(0); /* Force our slaves to resync with us as well. */ + disconnectSlaves(); /* Force our slaves to resync with us as well. */ freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */ /* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC @@ -2506,7 +2503,7 @@ void replicationSetMaster(char *ip, int port) { /* Force our slaves to resync with us as well. They may hopefully be able * to partially resync with us, but we can notify the replid change. */ - disconnectSlaves(0); + disconnectSlaves(); cancelReplicationHandshake(); /* Before destroying our master state, create a cached master using * our own parameters, to later PSYNC with the new master. */ @@ -2557,7 +2554,7 @@ void replicationUnsetMaster(void) { * of the replication ID change (see shiftReplicationId() call). However * the slaves will be able to partially resync with us, so it will be * a very fast reconnection. */ - disconnectSlaves(0); + disconnectSlaves(); server.repl_state = REPL_STATE_NONE; /* We need to make sure the new master will start the replication stream @@ -2760,10 +2757,7 @@ void replicationCacheMaster(client *c) { sdsclear(server.master->querybuf); sdsclear(server.master->pending_querybuf); - /* Adjust reploff and read_reploff to the last meaningful offset we - * executed. This is the offset the replica will use for future PSYNC. */ - int offset_adjusted; - server.master->reploff = adjustMeaningfulReplOffset(&offset_adjusted); + server.master->reploff = server.master_repl_offset; server.master->read_reploff = server.master->reploff; if (c->flags & CLIENT_MULTI) discardTransaction(c); listEmpty(c->reply); @@ -2786,53 +2780,6 @@ void replicationCacheMaster(client *c) { * so make sure to adjust the replication state. This function will * also set server.master to NULL. */ replicationHandleMasterDisconnection(); - - /* If we trimmed this replica backlog, we need to disconnect our chained - * replicas (if any), otherwise they may have the PINGs we removed - * from the stream and their offset would no longer match: upon - * disconnection they will also trim the final PINGs and will be able - * to incrementally sync without issues. */ - if (offset_adjusted) disconnectSlaves(1); -} - -/* If the "meaningful" offset, that is the offset without the final PINGs - * in the stream, is different than the last offset, use it instead: - * often when the master is no longer reachable, replicas will never - * receive the PINGs, however the master will end with an incremented - * offset because of the PINGs and will not be able to incrementally - * PSYNC with the new master. - * This function trims the replication backlog when needed, and returns - * the offset to be used for future partial sync. - * - * If the integer 'adjusted' was passed by reference, it is set to 1 - * if the function call actually modified the offset and the replication - * backlog, otherwise it is set to 0. It can be NULL if the caller is - * not interested in getting this info. */ -long long adjustMeaningfulReplOffset(int *adjusted) { - if (server.master_repl_offset > server.master_repl_meaningful_offset) { - long long delta = server.master_repl_offset - - server.master_repl_meaningful_offset; - serverLog(LL_NOTICE, - "Using the meaningful offset %lld instead of %lld to exclude " - "the final PINGs (%lld bytes difference)", - server.master_repl_meaningful_offset, - server.master_repl_offset, - delta); - server.master_repl_offset = server.master_repl_meaningful_offset; - if (server.repl_backlog_histlen <= delta) { - server.repl_backlog_histlen = 0; - server.repl_backlog_idx = 0; - } else { - server.repl_backlog_histlen -= delta; - server.repl_backlog_idx = - (server.repl_backlog_idx + (server.repl_backlog_size - delta)) % - server.repl_backlog_size; - } - if (adjusted) *adjusted = 1; - } else { - if (adjusted) *adjusted = 0; - } - return server.master_repl_offset; } /* This function is called when a master is turend into a slave, in order to @@ -2854,7 +2801,7 @@ void replicationCacheMasterUsingMyself(void) { * by replicationCreateMasterClient(). We'll later set the created * master as server.cached_master, so the replica will use such * offset for PSYNC. */ - server.master_initial_offset = adjustMeaningfulReplOffset(NULL); + server.master_initial_offset = server.master_repl_offset; /* The master client we create can be set to any DBID, because * the new master will start its replication stream with SELECT. */ @@ -3246,18 +3193,10 @@ void replicationCron(void) { clientsArePaused(); if (!manual_failover_in_progress) { - long long before_ping = server.master_repl_meaningful_offset; ping_argv[0] = createStringObject("PING",4); replicationFeedSlaves(server.slaves, server.slaveseldb, ping_argv, 1); decrRefCount(ping_argv[0]); - /* The server.master_repl_meaningful_offset variable represents - * the offset of the replication stream without the pending PINGs. - * This is useful to set the right replication offset for PSYNC - * when the master is turned into a replica. Otherwise pending - * PINGs may not allow it to perform an incremental sync with the - * new master. */ - server.master_repl_meaningful_offset = before_ping; } } diff --git a/src/server.c b/src/server.c index 5bc4666ee..b7a6a928f 100644 --- a/src/server.c +++ b/src/server.c @@ -2394,7 +2394,6 @@ void initServerConfig(void) { server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT; server.repl_down_since = 0; /* Never connected, repl is down since EVER. */ server.master_repl_offset = 0; - server.master_repl_meaningful_offset = 0; /* Replication partial resync backlog */ server.repl_backlog = NULL; @@ -4471,7 +4470,6 @@ sds genRedisInfoString(const char *section) { "master_replid:%s\r\n" "master_replid2:%s\r\n" "master_repl_offset:%lld\r\n" - "master_repl_meaningful_offset:%lld\r\n" "second_repl_offset:%lld\r\n" "repl_backlog_active:%d\r\n" "repl_backlog_size:%lld\r\n" @@ -4480,7 +4478,6 @@ sds genRedisInfoString(const char *section) { server.replid, server.replid2, server.master_repl_offset, - server.master_repl_meaningful_offset, server.second_replid_offset, server.repl_backlog != NULL, server.repl_backlog_size, @@ -4858,7 +4855,6 @@ void loadDataFromDisk(void) { { memcpy(server.replid,rsi.repl_id,sizeof(server.replid)); server.master_repl_offset = rsi.repl_offset; - server.master_repl_meaningful_offset = rsi.repl_offset; /* If we are a slave, create a cached master from this * information, in order to allow partial resynchronizations * with masters. */ diff --git a/src/server.h b/src/server.h index 2d17d69c8..0c0b4d052 100644 --- a/src/server.h +++ b/src/server.h @@ -1261,7 +1261,6 @@ struct redisServer { char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */ char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/ long long master_repl_offset; /* My current replication offset */ - long long master_repl_meaningful_offset; /* Offset minus latest PINGs. */ long long second_replid_offset; /* Accept offsets up to this for replid2. */ int slaveseldb; /* Last SELECTed DB in replication output */ int repl_ping_slave_period; /* Master pings the slave every N seconds */ @@ -1660,7 +1659,7 @@ int getClientType(client *c); int getClientTypeByName(char *name); char *getClientTypeName(int class); void flushSlavesOutputBuffers(void); -void disconnectSlaves(int async); +void disconnectSlaves(void); int listenToPort(int port, int *fds, int *count); void pauseClients(mstime_t duration); int clientsArePaused(void); From 24a0f7bf55ba9b0f3f28ed0ef54927bddc1a1574 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 May 2020 12:47:34 +0200 Subject: [PATCH 37/95] Remove the PSYNC2 meaningful offset test. --- tests/integration/psync2-pingoff.tcl | 112 --------------------------- tests/test_helper.tcl | 1 - 2 files changed, 113 deletions(-) delete mode 100644 tests/integration/psync2-pingoff.tcl diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl deleted file mode 100644 index 2c2303141..000000000 --- a/tests/integration/psync2-pingoff.tcl +++ /dev/null @@ -1,112 +0,0 @@ -# Test the meaningful offset implementation to make sure masters -# are able to PSYNC with replicas even if the replication stream -# has pending PINGs at the end. - -start_server {tags {"psync2"}} { -start_server {} { - # Config - set debug_msg 0 ; # Enable additional debug messages - - for {set j 0} {$j < 2} {incr j} { - set R($j) [srv [expr 0-$j] client] - set R_host($j) [srv [expr 0-$j] host] - set R_port($j) [srv [expr 0-$j] port] - $R($j) CONFIG SET repl-ping-replica-period 1 - if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"} - } - - # Setup replication - test "PSYNC2 meaningful offset: setup" { - $R(1) replicaof $R_host(0) $R_port(0) - $R(0) set foo bar - wait_for_condition 50 1000 { - [status $R(1) master_link_status] == "up" && - [$R(0) dbsize] == 1 && [$R(1) dbsize] == 1 - } else { - fail "Replicas not replicating from master" - } - } - - test "PSYNC2 meaningful offset: write and wait replication" { - $R(0) INCR counter - $R(0) INCR counter - $R(0) INCR counter - wait_for_condition 50 1000 { - [$R(0) GET counter] eq [$R(1) GET counter] - } else { - fail "Master and replica don't agree about counter" - } - } - - # In this test we'll make sure the replica will get stuck, but with - # an active connection: this way the master will continue to send PINGs - # every second (we modified the PING period earlier) - test "PSYNC2 meaningful offset: pause replica and promote it" { - $R(1) MULTI - $R(1) DEBUG SLEEP 5 - $R(1) SLAVEOF NO ONE - $R(1) EXEC - $R(1) ping ; # Wait for it to return back available - } - - test "Make the old master a replica of the new one and check conditions" { - set sync_partial [status $R(1) sync_partial_ok] - assert {$sync_partial == 0} - $R(0) REPLICAOF $R_host(1) $R_port(1) - wait_for_condition 50 1000 { - [status $R(1) sync_partial_ok] == 1 - } else { - fail "The new master was not able to partial sync" - } - } -}} - - -start_server {tags {"psync2"}} { -start_server {} { -start_server {} { - - for {set j 0} {$j < 3} {incr j} { - set R($j) [srv [expr 0-$j] client] - set R_host($j) [srv [expr 0-$j] host] - set R_port($j) [srv [expr 0-$j] port] - $R($j) CONFIG SET repl-ping-replica-period 1 - } - - test "Chained replicas disconnect when replica re-connect with the same master" { - # Add a second replica as a chained replica of the current replica - $R(1) replicaof $R_host(0) $R_port(0) - $R(2) replicaof $R_host(1) $R_port(1) - wait_for_condition 50 1000 { - [status $R(2) master_link_status] == "up" - } else { - fail "Chained replica not replicating from its master" - } - - # Do a write on the master, and wait for 3 seconds for the master to - # send some PINGs to its replica - $R(0) INCR counter2 - after 2000 - set sync_partial_master [status $R(0) sync_partial_ok] - set sync_partial_replica [status $R(1) sync_partial_ok] - $R(0) CONFIG SET repl-ping-replica-period 100 - - # Disconnect the master's direct replica - $R(0) client kill type replica - wait_for_condition 50 1000 { - [status $R(1) master_link_status] == "up" && - [status $R(2) master_link_status] == "up" && - [status $R(0) sync_partial_ok] == $sync_partial_master + 1 && - [status $R(1) sync_partial_ok] == $sync_partial_replica + 1 - } else { - fail "Disconnected replica failed to PSYNC with master" - } - - # Verify that the replica and its replica's meaningful and real - # offsets match with the master - assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] - assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset] - assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(1) master_repl_meaningful_offset] - assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(2) master_repl_meaningful_offset] - } -}}} diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index de0a64728..05e45b999 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -47,7 +47,6 @@ set ::all_tests { integration/logging integration/psync2 integration/psync2-reg - integration/psync2-pingoff unit/pubsub unit/slowlog unit/scripting From 0163e4e495b332191ffbcbb1a66d35cbcbdaed05 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 May 2020 12:50:02 +0200 Subject: [PATCH 38/95] Another meaningful offset test removed. --- tests/integration/psync2.tcl | 100 ----------------------------------- 1 file changed, 100 deletions(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 29a880f99..7c50f620e 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -370,103 +370,3 @@ start_server {} { } }}}}} - -start_server {tags {"psync2"}} { -start_server {} { -start_server {} { -start_server {} { -start_server {} { - test {pings at the end of replication stream are ignored for psync} { - set master [srv -4 client] - set master_host [srv -4 host] - set master_port [srv -4 port] - set replica1 [srv -3 client] - set replica2 [srv -2 client] - set replica3 [srv -1 client] - set replica4 [srv -0 client] - - $replica1 replicaof $master_host $master_port - $replica2 replicaof $master_host $master_port - $replica3 replicaof $master_host $master_port - $replica4 replicaof $master_host $master_port - wait_for_condition 50 1000 { - [status $master connected_slaves] == 4 - } else { - fail "replicas didn't connect" - } - - $master incr x - wait_for_condition 50 1000 { - [$replica1 get x] == 1 && [$replica2 get x] == 1 && - [$replica3 get x] == 1 && [$replica4 get x] == 1 - } else { - fail "replicas didn't get incr" - } - - # disconnect replica1 and replica2 - # and wait for the master to send a ping to replica3 and replica4 - $replica1 replicaof no one - $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id - $master config set repl-ping-replica-period 1 - after 1500 - - # make everyone sync from the replica1 that didn't get the last ping from the old master - # replica4 will keep syncing from the old master which now syncs from replica1 - # and replica2 will re-connect to the old master (which went back in time) - set new_master_host [srv -3 host] - set new_master_port [srv -3 port] - $replica3 replicaof $new_master_host $new_master_port - $master replicaof $new_master_host $new_master_port - $replica2 replicaof $master_host $master_port - wait_for_condition 50 1000 { - [status $replica2 master_link_status] == "up" && - [status $replica3 master_link_status] == "up" && - [status $replica4 master_link_status] == "up" && - [status $master master_link_status] == "up" - } else { - fail "replicas didn't connect" - } - - # make sure replication is still alive and kicking - $replica1 incr x - wait_for_condition 50 1000 { - [$replica2 get x] == 2 && - [$replica3 get x] == 2 && - [$replica4 get x] == 2 && - [$master get x] == 2 - } else { - fail "replicas didn't get incr" - } - - # make sure there are full syncs other than the initial ones - assert_equal [status $master sync_full] 4 - assert_equal [status $replica1 sync_full] 0 - assert_equal [status $replica2 sync_full] 0 - assert_equal [status $replica3 sync_full] 0 - assert_equal [status $replica4 sync_full] 0 - - # force psync - $master client kill type master - $replica2 client kill type master - $replica3 client kill type master - $replica4 client kill type master - - # make sure replication is still alive and kicking - $replica1 incr x - wait_for_condition 50 1000 { - [$replica2 get x] == 3 && - [$replica3 get x] == 3 && - [$replica4 get x] == 3 && - [$master get x] == 3 - } else { - fail "replicas didn't get incr" - } - - # make sure there are full syncs other than the initial ones - assert_equal [status $master sync_full] 4 - assert_equal [status $replica1 sync_full] 0 - assert_equal [status $replica2 sync_full] 0 - assert_equal [status $replica3 sync_full] 0 - assert_equal [status $replica4 sync_full] 0 -} -}}}}} From 14d99c183f8e4e571a93514e42f28743f9645f85 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 May 2020 17:08:51 +0200 Subject: [PATCH 39/95] Drop useless line from replicationCacheMaster(). --- src/replication.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index 0484ec8a5..110f0e82d 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2756,8 +2756,6 @@ void replicationCacheMaster(client *c) { * pending outputs to the master. */ sdsclear(server.master->querybuf); sdsclear(server.master->pending_querybuf); - - server.master->reploff = server.master_repl_offset; server.master->read_reploff = server.master->reploff; if (c->flags & CLIENT_MULTI) discardTransaction(c); listEmpty(c->reply); From 84117d13b74220c0da485f05fed72f9d9769d712 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 28 May 2020 10:08:16 +0200 Subject: [PATCH 40/95] Replication: showLatestBacklog() refactored out. --- src/networking.c | 26 +------------------------- src/replication.c | 34 ++++++++++++++++++++++++++++++++++ src/server.h | 1 + 3 files changed, 36 insertions(+), 25 deletions(-) diff --git a/src/networking.c b/src/networking.c index 7a5f1d7b9..8d3e057b7 100644 --- a/src/networking.c +++ b/src/networking.c @@ -396,31 +396,7 @@ void addReplyErrorLength(client *c, const char *s, size_t len) { if (ctype == CLIENT_TYPE_MASTER && server.repl_backlog && server.repl_backlog_histlen > 0) { - long long dumplen = 256; - if (server.repl_backlog_histlen < dumplen) - dumplen = server.repl_backlog_histlen; - - /* Identify the first byte to dump. */ - long long idx = - (server.repl_backlog_idx + (server.repl_backlog_size - dumplen)) % - server.repl_backlog_size; - - /* Scan the circular buffer to collect 'dumplen' bytes. */ - sds dump = sdsempty(); - while(dumplen) { - long long thislen = - ((server.repl_backlog_size - idx) < dumplen) ? - (server.repl_backlog_size - idx) : dumplen; - - dump = sdscatrepr(dump,server.repl_backlog+idx,thislen); - dumplen -= thislen; - idx = 0; - } - - /* Finally log such bytes: this is vital debugging info to - * understand what happened. */ - serverLog(LL_WARNING,"Latest backlog is: '%s'", dump); - sdsfree(dump); + showLatestBacklog(); } server.stat_unexpected_error_replies++; } diff --git a/src/replication.c b/src/replication.c index 110f0e82d..063a8705e 100644 --- a/src/replication.c +++ b/src/replication.c @@ -307,6 +307,40 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) { } } +/* This is a debugging function that gets called when we detect something + * wrong with the replication protocol: the goal is to peek into the + * replication backlog and show a few final bytes to make simpler to + * guess what kind of bug it could be. */ +void showLatestBacklog(void) { + if (server.repl_backlog == NULL) return; + + long long dumplen = 256; + if (server.repl_backlog_histlen < dumplen) + dumplen = server.repl_backlog_histlen; + + /* Identify the first byte to dump. */ + long long idx = + (server.repl_backlog_idx + (server.repl_backlog_size - dumplen)) % + server.repl_backlog_size; + + /* Scan the circular buffer to collect 'dumplen' bytes. */ + sds dump = sdsempty(); + while(dumplen) { + long long thislen = + ((server.repl_backlog_size - idx) < dumplen) ? + (server.repl_backlog_size - idx) : dumplen; + + dump = sdscatrepr(dump,server.repl_backlog+idx,thislen); + dumplen -= thislen; + idx = 0; + } + + /* Finally log such bytes: this is vital debugging info to + * understand what happened. */ + serverLog(LL_WARNING,"Latest backlog is: '%s'", dump); + sdsfree(dump); +} + /* This function is used in order to proxy what we receive from our master * to our sub-slaves. */ #include diff --git a/src/server.h b/src/server.h index 0c0b4d052..a08585292 100644 --- a/src/server.h +++ b/src/server.h @@ -1810,6 +1810,7 @@ void clearReplicationId2(void); void chopReplicationBacklog(void); void replicationCacheMasterUsingMyself(void); void feedReplicationBacklog(void *ptr, size_t len); +void showLatestBacklog(void); void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask); void rdbPipeWriteHandlerConnRemoved(struct connection *conn); From 98e6f2cd5b320bbc066ce6336d44cd471b207c04 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 28 May 2020 08:21:24 +0300 Subject: [PATCH 41/95] revive meaningful offset tests --- tests/integration/psync2-pingoff.tcl | 212 +++++++++++++++++++++++++++ tests/test_helper.tcl | 1 + 2 files changed, 213 insertions(+) create mode 100644 tests/integration/psync2-pingoff.tcl diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl new file mode 100644 index 000000000..5d8459a92 --- /dev/null +++ b/tests/integration/psync2-pingoff.tcl @@ -0,0 +1,212 @@ +# Test the meaningful offset implementation to make sure masters +# are able to PSYNC with replicas even if the replication stream +# has pending PINGs at the end. + +start_server {tags {"psync2"}} { +start_server {} { + # Config + set debug_msg 0 ; # Enable additional debug messages + + for {set j 0} {$j < 2} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + $R($j) CONFIG SET repl-ping-replica-period 1 + if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"} + } + + # Setup replication + test "PSYNC2 meaningful offset: setup" { + $R(1) replicaof $R_host(0) $R_port(0) + $R(0) set foo bar + wait_for_condition 50 1000 { + [status $R(1) master_link_status] == "up" && + [$R(0) dbsize] == 1 && [$R(1) dbsize] == 1 + } else { + fail "Replicas not replicating from master" + } + } + + test "PSYNC2 meaningful offset: write and wait replication" { + $R(0) INCR counter + $R(0) INCR counter + $R(0) INCR counter + wait_for_condition 50 1000 { + [$R(0) GET counter] eq [$R(1) GET counter] + } else { + fail "Master and replica don't agree about counter" + } + } + + # In this test we'll make sure the replica will get stuck, but with + # an active connection: this way the master will continue to send PINGs + # every second (we modified the PING period earlier) + test "PSYNC2 meaningful offset: pause replica and promote it" { + $R(1) MULTI + $R(1) DEBUG SLEEP 5 + $R(1) SLAVEOF NO ONE + $R(1) EXEC + $R(1) ping ; # Wait for it to return back available + } + + test "Make the old master a replica of the new one and check conditions" { + set sync_partial [status $R(1) sync_partial_ok] + assert {$sync_partial == 0} + $R(0) REPLICAOF $R_host(1) $R_port(1) + wait_for_condition 50 1000 { + [status $R(1) sync_partial_ok] == 1 + } else { + fail "The new master was not able to partial sync" + } + } +}} + + +start_server {tags {"psync2"}} { +start_server {} { +start_server {} { +start_server {} { +start_server {} { + test {pings at the end of replication stream are ignored for psync} { + set master [srv -4 client] + set master_host [srv -4 host] + set master_port [srv -4 port] + set replica1 [srv -3 client] + set replica2 [srv -2 client] + set replica3 [srv -1 client] + set replica4 [srv -0 client] + + $replica1 replicaof $master_host $master_port + $replica2 replicaof $master_host $master_port + $replica3 replicaof $master_host $master_port + $replica4 replicaof $master_host $master_port + wait_for_condition 50 1000 { + [status $master connected_slaves] == 4 + } else { + fail "replicas didn't connect" + } + + $master incr x + wait_for_condition 50 1000 { + [$replica1 get x] == 1 && [$replica2 get x] == 1 && + [$replica3 get x] == 1 && [$replica4 get x] == 1 + } else { + fail "replicas didn't get incr" + } + + # disconnect replica1 and replica2 + # and wait for the master to send a ping to replica3 and replica4 + $replica1 replicaof no one + $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id + $master config set repl-ping-replica-period 1 + after 1500 + + # make everyone sync from the replica1 that didn't get the last ping from the old master + # replica4 will keep syncing from the old master which now syncs from replica1 + # and replica2 will re-connect to the old master (which went back in time) + set new_master_host [srv -3 host] + set new_master_port [srv -3 port] + $replica3 replicaof $new_master_host $new_master_port + $master replicaof $new_master_host $new_master_port + $replica2 replicaof $master_host $master_port + wait_for_condition 50 1000 { + [status $replica2 master_link_status] == "up" && + [status $replica3 master_link_status] == "up" && + [status $replica4 master_link_status] == "up" && + [status $master master_link_status] == "up" + } else { + fail "replicas didn't connect" + } + + # make sure replication is still alive and kicking + $replica1 incr x + wait_for_condition 50 1000 { + [$replica2 get x] == 2 && + [$replica3 get x] == 2 && + [$replica4 get x] == 2 && + [$master get x] == 2 + } else { + fail "replicas didn't get incr" + } + + # make sure there are full syncs other than the initial ones + assert_equal [status $master sync_full] 4 + assert_equal [status $replica1 sync_full] 0 + assert_equal [status $replica2 sync_full] 0 + assert_equal [status $replica3 sync_full] 0 + assert_equal [status $replica4 sync_full] 0 + + # force psync + $master client kill type master + $replica2 client kill type master + $replica3 client kill type master + $replica4 client kill type master + + # make sure replication is still alive and kicking + $replica1 incr x + wait_for_condition 50 1000 { + [$replica2 get x] == 3 && + [$replica3 get x] == 3 && + [$replica4 get x] == 3 && + [$master get x] == 3 + } else { + fail "replicas didn't get incr" + } + + # make sure there are full syncs other than the initial ones + assert_equal [status $master sync_full] 4 + assert_equal [status $replica1 sync_full] 0 + assert_equal [status $replica2 sync_full] 0 + assert_equal [status $replica3 sync_full] 0 + assert_equal [status $replica4 sync_full] 0 +} +}}}}} + +start_server {tags {"psync2"}} { +start_server {} { +start_server {} { + + for {set j 0} {$j < 3} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + $R($j) CONFIG SET repl-ping-replica-period 1 + } + + test "Chained replicas disconnect when replica re-connect with the same master" { + # Add a second replica as a chained replica of the current replica + $R(1) replicaof $R_host(0) $R_port(0) + $R(2) replicaof $R_host(1) $R_port(1) + wait_for_condition 50 1000 { + [status $R(2) master_link_status] == "up" + } else { + fail "Chained replica not replicating from its master" + } + + # Do a write on the master, and wait for 3 seconds for the master to + # send some PINGs to its replica + $R(0) INCR counter2 + after 2000 + set sync_partial_master [status $R(0) sync_partial_ok] + set sync_partial_replica [status $R(1) sync_partial_ok] + $R(0) CONFIG SET repl-ping-replica-period 100 + + # Disconnect the master's direct replica + $R(0) client kill type replica + wait_for_condition 50 1000 { + [status $R(1) master_link_status] == "up" && + [status $R(2) master_link_status] == "up" && + [status $R(0) sync_partial_ok] == $sync_partial_master + 1 && + [status $R(1) sync_partial_ok] == $sync_partial_replica + 1 + } else { + fail "Disconnected replica failed to PSYNC with master" + } + + # Verify that the replica and its replica's meaningful and real + # offsets match with the master + assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] + assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset] + assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(1) master_repl_meaningful_offset] + assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(2) master_repl_meaningful_offset] + } +}}} diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 05e45b999..de0a64728 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -47,6 +47,7 @@ set ::all_tests { integration/logging integration/psync2 integration/psync2-reg + integration/psync2-pingoff unit/pubsub unit/slowlog unit/scripting From 01039e59648782958ae42fa9e1f28e620ece01f0 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 28 May 2020 09:10:51 +0300 Subject: [PATCH 42/95] adjust revived meaningful offset tests these tests create several edge cases that are otherwise uncovered (at least not consistently) by the test suite, so although they're no longer testing what they were meant to test, it's still a good idea to keep them in hope that they'll expose some issue in the future. --- tests/integration/psync2-pingoff.tcl | 59 ++++++++++++++++++---------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl index 5d8459a92..5a9a46d16 100644 --- a/tests/integration/psync2-pingoff.tcl +++ b/tests/integration/psync2-pingoff.tcl @@ -1,6 +1,9 @@ -# Test the meaningful offset implementation to make sure masters -# are able to PSYNC with replicas even if the replication stream -# has pending PINGs at the end. +# These tests were added together with the meaningful offset implementation +# in redis 6.0.0, which was later abandoned in 6.0.4, they used to test that +# servers are able to PSYNC with replicas even if the replication stream has +# PINGs at the end which present in one sever and missing on another. +# We keep these tests just because they reproduce edge cases in the replication +# logic in hope they'll be able to spot some problem in the future. start_server {tags {"psync2"}} { start_server {} { @@ -16,7 +19,7 @@ start_server {} { } # Setup replication - test "PSYNC2 meaningful offset: setup" { + test "PSYNC2 pingoff: setup" { $R(1) replicaof $R_host(0) $R_port(0) $R(0) set foo bar wait_for_condition 50 1000 { @@ -27,7 +30,7 @@ start_server {} { } } - test "PSYNC2 meaningful offset: write and wait replication" { + test "PSYNC2 pingoff: write and wait replication" { $R(0) INCR counter $R(0) INCR counter $R(0) INCR counter @@ -41,7 +44,7 @@ start_server {} { # In this test we'll make sure the replica will get stuck, but with # an active connection: this way the master will continue to send PINGs # every second (we modified the PING period earlier) - test "PSYNC2 meaningful offset: pause replica and promote it" { + test "PSYNC2 pingoff: pause replica and promote it" { $R(1) MULTI $R(1) DEBUG SLEEP 5 $R(1) SLAVEOF NO ONE @@ -50,14 +53,22 @@ start_server {} { } test "Make the old master a replica of the new one and check conditions" { - set sync_partial [status $R(1) sync_partial_ok] - assert {$sync_partial == 0} + assert_equal [status $R(1) sync_full] 0 $R(0) REPLICAOF $R_host(1) $R_port(1) wait_for_condition 50 1000 { - [status $R(1) sync_partial_ok] == 1 + [status $R(1) sync_full] == 1 } else { - fail "The new master was not able to partial sync" + fail "The new master was not able to sync" } + + # make sure replication is still alive and kicking + $R(1) incr x + wait_for_condition 50 1000 { + [$R(0) get x] == 1 + } else { + fail "replica didn't get incr" + } + assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] } }} @@ -67,7 +78,7 @@ start_server {} { start_server {} { start_server {} { start_server {} { - test {pings at the end of replication stream are ignored for psync} { + test {test various edge cases of repl topology changes with missing pings at the end} { set master [srv -4 client] set master_host [srv -4 host] set master_port [srv -4 port] @@ -129,9 +140,9 @@ start_server {} { fail "replicas didn't get incr" } - # make sure there are full syncs other than the initial ones - assert_equal [status $master sync_full] 4 - assert_equal [status $replica1 sync_full] 0 + # make sure we have the right amount of full syncs + assert_equal [status $master sync_full] 6 + assert_equal [status $replica1 sync_full] 2 assert_equal [status $replica2 sync_full] 0 assert_equal [status $replica3 sync_full] 0 assert_equal [status $replica4 sync_full] 0 @@ -153,9 +164,9 @@ start_server {} { fail "replicas didn't get incr" } - # make sure there are full syncs other than the initial ones - assert_equal [status $master sync_full] 4 - assert_equal [status $replica1 sync_full] 0 + # make sure we have the right amount of full syncs + assert_equal [status $master sync_full] 6 + assert_equal [status $replica1 sync_full] 2 assert_equal [status $replica2 sync_full] 0 assert_equal [status $replica3 sync_full] 0 assert_equal [status $replica4 sync_full] 0 @@ -197,7 +208,7 @@ start_server {} { [status $R(1) master_link_status] == "up" && [status $R(2) master_link_status] == "up" && [status $R(0) sync_partial_ok] == $sync_partial_master + 1 && - [status $R(1) sync_partial_ok] == $sync_partial_replica + 1 + [status $R(1) sync_partial_ok] == $sync_partial_replica } else { fail "Disconnected replica failed to PSYNC with master" } @@ -206,7 +217,15 @@ start_server {} { # offsets match with the master assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset] - assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(1) master_repl_meaningful_offset] - assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(2) master_repl_meaningful_offset] + + # make sure replication is still alive and kicking + $R(0) incr counter2 + wait_for_condition 50 1000 { + [$R(1) get counter2] == 2 && [$R(2) get counter2] == 2 + } else { + fail "replicas didn't get incr" + } + assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] + assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset] } }}} From 31bd96355729c698c48d2de64965c3e29d271f54 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Wed, 27 May 2020 18:09:09 +0300 Subject: [PATCH 43/95] 32bit CI needs to build modules correctly --- .github/workflows/daily.yml | 4 +++- tests/modules/Makefile | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index c22d49895..acc4dd33a 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -47,7 +47,9 @@ jobs: sudo apt-get install tcl8.5 ./runtest --accurate --verbose - name: module api test - run: ./runtest-moduleapi --verbose + run: | + make -C tests/modules 32bit # the script below doesn't have an argument, we must build manually ahead of time + ./runtest-moduleapi --verbose test-tls: runs-on: ubuntu-latest diff --git a/tests/modules/Makefile b/tests/modules/Makefile index 363231a87..39b8e6efa 100644 --- a/tests/modules/Makefile +++ b/tests/modules/Makefile @@ -28,11 +28,14 @@ TEST_MODULES = \ all: $(TEST_MODULES) +32bit: + $(MAKE) CFLAGS="-m32" LDFLAGS="-melf_i386" + %.xo: %.c ../../src/redismodule.h $(CC) -I../../src $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ %.so: %.xo - $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc + $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LDFLAGS) $(LIBS) -lc .PHONY: clean From 4653d796f03ecf6160b95b17c5378086160035ee Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Tue, 26 May 2020 11:00:48 +0300 Subject: [PATCH 44/95] tests: each test client work on a distinct port range apparently when running tests in parallel (the default of --clients 16), there's a chance for two tests to use the same port. specifically, one test might shutdown a master and still have the replica up, and then another test will re-use the port number of master for another master, and then that replica will connect to the master of the other test. this can cause a master to count too many full syncs and fail a test if we run the tests with --single integration/psync2 --loop --stop see Probmem 2 in #7314 --- tests/instances.tcl | 3 ++- tests/support/server.tcl | 21 ++++++++++----------- tests/support/util.tcl | 8 ++++---- tests/test_helper.tcl | 30 +++++++++++++++++++++--------- tests/unit/other.tcl | 4 ++-- 5 files changed, 39 insertions(+), 27 deletions(-) diff --git a/tests/instances.tcl b/tests/instances.tcl index 0a0cbab12..3a4fadca0 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -25,6 +25,7 @@ set ::sentinel_instances {} set ::redis_instances {} set ::sentinel_base_port 20000 set ::redis_base_port 30000 +set ::redis_port_count 1024 set ::pids {} ; # We kill everything at exit set ::dirs {} ; # We remove all the temp dirs at exit set ::run_matching {} ; # If non empty, only tests matching pattern are run. @@ -57,7 +58,7 @@ proc exec_instance {type cfgfile} { # Spawn a redis or sentinel instance, depending on 'type'. proc spawn_instance {type base_port count {conf {}}} { for {set j 0} {$j < $count} {incr j} { - set port [find_available_port $base_port] + set port [find_available_port $base_port $::redis_port_count] incr base_port puts "Starting $type #$j at port $port" diff --git a/tests/support/server.tcl b/tests/support/server.tcl index d086366dc..146ebc72c 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -214,14 +214,14 @@ proc start_server {options {code undefined}} { dict set config dir [tmpdir server] # start every server on a different port - set ::port [find_available_port [expr {$::port+1}]] + set port [find_available_port $::baseport $::portcount] if {$::tls} { dict set config "port" 0 - dict set config "tls-port" $::port + dict set config "tls-port" $port dict set config "tls-cluster" "yes" dict set config "tls-replication" "yes" } else { - dict set config port $::port + dict set config port $port } set unixsocket [file normalize [format "%s/%s" [dict get $config "dir"] "socket"]] @@ -243,10 +243,10 @@ proc start_server {options {code undefined}} { set server_started 0 while {$server_started == 0} { if {$::verbose} { - puts -nonewline "=== ($tags) Starting server ${::host}:${::port} " + puts -nonewline "=== ($tags) Starting server ${::host}:${port} " } - send_data_packet $::test_server_fd "server-spawning" "port $::port" + send_data_packet $::test_server_fd "server-spawning" "port $port" if {$::valgrind} { set pid [exec valgrind --track-origins=yes --suppressions=src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full src/redis-server $config_file > $stdout 2> $stderr &] @@ -291,19 +291,19 @@ proc start_server {options {code undefined}} { # for availability. Other test clients may grab the port before we # are able to do it for example. if {$port_busy} { - puts "Port $::port was already busy, trying another port..." - set ::port [find_available_port [expr {$::port+1}]] + puts "Port $port was already busy, trying another port..." + set port [find_available_port $::baseport $::portcount] if {$::tls} { - dict set config "tls-port" $::port + dict set config "tls-port" $port } else { - dict set config port $::port + dict set config port $port } create_server_config_file $config_file $config continue; # Try again } if {$code ne "undefined"} { - set serverisup [server_is_up $::host $::port $retrynum] + set serverisup [server_is_up $::host $port $retrynum] } else { set serverisup 1 } @@ -324,7 +324,6 @@ proc start_server {options {code undefined}} { # setup properties to be able to initialize a client object set port_param [expr $::tls ? {"tls-port"} : {"port"}] set host $::host - set port $::port if {[dict exists $config bind]} { set host [dict get $config bind] } if {[dict exists $config $port_param]} { set port [dict get $config $port_param] } diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 7ecf5b79c..a5ded67a3 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -344,8 +344,8 @@ proc roundFloat f { format "%.10g" $f } -proc find_available_port start { - for {set j $start} {$j < $start+1024} {incr j} { +proc find_available_port {start count} { + for {set j $start} {$j < $start+$count} {incr j} { if {[catch {set fd1 [socket 127.0.0.1 $j]}] && [catch {set fd2 [socket 127.0.0.1 [expr $j+10000]]}]} { return $j @@ -356,8 +356,8 @@ proc find_available_port start { } } } - if {$j == $start+1024} { - error "Can't find a non busy port in the $start-[expr {$start+1023}] range." + if {$j == $start+$count} { + error "Can't find a non busy port in the $start-[expr {$start+$count-1}] range." } } diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index de0a64728..7fc0e6f00 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -70,7 +70,9 @@ set ::all_tests { set ::next_test 0 set ::host 127.0.0.1 -set ::port 21111 +set ::port 6379; # port for external server +set ::baseport 21111; # initial port for spawned redis servers +set ::portcount 8000; # we don't wanna use more than 10000 to avoid collision with cluster bus ports set ::traceleaks 0 set ::valgrind 0 set ::tls 0 @@ -228,26 +230,26 @@ proc test_server_main {} { set tclsh [info nameofexecutable] # Open a listening socket, trying different ports in order to find a # non busy one. - set port [find_available_port 11111] + set clientport [find_available_port 11111 32] if {!$::quiet} { - puts "Starting test server at port $port" + puts "Starting test server at port $clientport" } - socket -server accept_test_clients -myaddr 127.0.0.1 $port + socket -server accept_test_clients -myaddr 127.0.0.1 $clientport # Start the client instances set ::clients_pids {} if {$::external} { set p [exec $tclsh [info script] {*}$::argv \ - --client $port --port $::port &] + --client $clientport &] lappend ::clients_pids $p } else { - set start_port [expr {$::port+100}] + set start_port $::baseport + set port_count [expr {$::portcount / $::numclients}] for {set j 0} {$j < $::numclients} {incr j} { - set start_port [find_available_port $start_port] set p [exec $tclsh [info script] {*}$::argv \ - --client $port --port $start_port &] + --client $clientport --baseport $start_port --portcount $port_count &] lappend ::clients_pids $p - incr start_port 10 + incr start_port $port_count } } @@ -510,6 +512,10 @@ proc print_help_screen {} { "--loop Execute the specified set of tests forever." "--wait-server Wait after server is started (so that you can attach a debugger)." "--tls Run tests in TLS mode." + "--host Run tests against an external host." + "--port TCP port to use against external host." + "--baseport Initial port number for spawned redis servers." + "--portcount Port range for spawned redis servers." "--help Print this help screen." } "\n"] } @@ -560,6 +566,12 @@ for {set j 0} {$j < [llength $argv]} {incr j} { } elseif {$opt eq {--port}} { set ::port $arg incr j + } elseif {$opt eq {--baseport}} { + set ::baseport $arg + incr j + } elseif {$opt eq {--portcount}} { + set ::portcount $arg + incr j } elseif {$opt eq {--accurate}} { set ::accurate 1 } elseif {$opt eq {--force-failure}} { diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index 7720c055a..20a32e795 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -167,9 +167,9 @@ start_server {tags {"other"}} { tags {protocol} { test {PIPELINING stresser (also a regression for the old epoll bug)} { if {$::tls} { - set fd2 [::tls::socket $::host $::port] + set fd2 [::tls::socket [srv host] [srv port]] } else { - set fd2 [socket $::host $::port] + set fd2 [socket [srv host] [srv port]] } fconfigure $fd2 -encoding binary -translation binary puts -nonewline $fd2 "SELECT 9\r\n" From 571b03021a341752aaf65177b02b0a11d4912ad6 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Wed, 27 May 2020 16:12:30 +0300 Subject: [PATCH 45/95] tests: find_available_port start search from next port i.e. don't start the search from scratch hitting the used ones again. this will also reduce the likelihood of collisions (if there are any left) by increasing the time until we re-use a port we did use in the past. --- tests/support/util.tcl | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tests/support/util.tcl b/tests/support/util.tcl index a5ded67a3..8bec95374 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -344,21 +344,26 @@ proc roundFloat f { format "%.10g" $f } +set ::last_port_attempted 0 proc find_available_port {start count} { - for {set j $start} {$j < $start+$count} {incr j} { - if {[catch {set fd1 [socket 127.0.0.1 $j]}] && - [catch {set fd2 [socket 127.0.0.1 [expr $j+10000]]}]} { - return $j + set port [expr $::last_port_attempted + 1] + for {set attempts 0} {$attempts < $count} {incr attempts} { + if {$port < $start || $port >= $start+$count} { + set port $start + } + if {[catch {set fd1 [socket 127.0.0.1 $port]}] && + [catch {set fd2 [socket 127.0.0.1 [expr $port+10000]]}]} { + set ::last_port_attempted $port + return $port } else { catch { close $fd1 close $fd2 } } + incr port } - if {$j == $start+$count} { - error "Can't find a non busy port in the $start-[expr {$start+$count-1}] range." - } + error "Can't find a non busy port in the $start-[expr {$start+$count-1}] range." } # Test if TERM looks like to support colors From 40578433c777226c229e8994d693ea941293f8e2 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 28 May 2020 10:22:22 +0200 Subject: [PATCH 46/95] Test: add the tracking unit as default. --- tests/test_helper.tcl | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 7fc0e6f00..fba54acb5 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -65,6 +65,7 @@ set ::all_tests { unit/wait unit/pendingquerybuf unit/tls + unit/tracking } # Index to the next test to run in the ::all_tests list. set ::next_test 0 From 41bb699867dffa075d5baf6848278a3aa0c0ddb2 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 28 May 2020 10:47:27 +0200 Subject: [PATCH 47/95] Test: take PSYNC2 test master timeout high during switch. This will likely avoid false positives due to trailing pings. --- tests/integration/psync2.tcl | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 7c50f620e..3f636463a 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -242,7 +242,6 @@ start_server {} { show_cluster_status fail "Replicas and master offsets were unable to match *exactly*." } - $R($master_id) config set repl-ping-replica-period 10 # Limit anyway the maximum number of cycles. This is useful when the # test is skipped via --only option of the test suite. In that case From c512f64428c09f673f928146bab9b3c161dbdcce Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 28 May 2020 11:57:52 +0200 Subject: [PATCH 48/95] Redis 6.0.4. --- 00-RELEASENOTES | 277 ++++++++++++++++++++++++++++++++++++++++++++++++ src/version.h | 2 +- 2 files changed, 278 insertions(+), 1 deletion(-) diff --git a/00-RELEASENOTES b/00-RELEASENOTES index 7c5a75412..7ce88c556 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -11,6 +11,283 @@ CRITICAL: There is a critical bug affecting MOST USERS. Upgrade ASAP. SECURITY: There are security fixes in the release. -------------------------------------------------------------------------------- +================================================================================ +Redis 6.0.4 Released Thu May 28 11:36:45 CEST 2020 +================================================================================ + +Upgrade urgency CRITICAL: this release fixes a severe replication bug. + +Redis 6.0.4 fixes a critical replication bug caused by a new feature introduced +in Redis 6. The feature, called "meaningful offset" and strongly wanted by +myself (antirez) was an improvement that avoided that masters were no longer +able, during a failover where they were demoted to replicas, to partially +synchronize with the new master. In short the feature was able to avoid full +synchronizations with RDB. How did it work? By trimming the replication backlog +of the final "PING" commands the master was sending in the replication channel: +this way the replication offset would no longer go "after" the one of the +promoted replica, allowing the master to just continue in the same replication +history, receiving only a small data difference. + +However after the introduction of the feature we (the Redis core team) quickly +understood there was something wrong: the apparently harmless feature had +many bugs, and the last bug we discovered, after a joined effort of multiple +people, we were not even able to fully understand after fixing it. Enough was +enough, we decided that the complexity cost of this feature was too high. +So Redis 6.0.4 removes the feature entirely, and fixes the data corruption that +it was able to cause. + +However there are two facts to take in mind. + +Fact 1: Setups using chained replication, that means that certain replicas +are replicating from other replicas, up to Redis 6.0.3 can experience data +corruption. For chained replication we mean that: + + +--------+ +---------+ +-------------+ + | master |--------->| replica |-------->| sub-replica | + +--------+ +---------+ +-------------+ + + +People using chained replication SHOULD UPGRADE ASAP away from Redis 6.0.0, +6.0.1, 6.0.2 or 6.0.3 to Redis 6.0.4. + +To be clear, people NOT using this setup, but having just replicas attached +directly to the master, SHOUDL NOT BE in danger of any problem. But we +are no longer confident on 6.0.x replication implementation complexities +so we suggest to upgrade to 6.0.4 to everybody using an older 6.0.3 release. +We just so far didn't find any bug that affects Redis 6.0.3 that does not +involve chained replication. + +People starting with Redis 6.0.4 are fine. People with Redis 5 are fine. +People upgrading from Redis 5 to Redis 6.0.4 are fine. +TLDR: The problem is with users of 6.0.0, 6.0.1, 6.0.2, 6.0.3. + +Fact 2: Upgrading from Redis 6.0.x to Redis 6.0.4, IF AND ONLY IF you +use chained replication, requires some extra care: + +1. Once you attach your new Redis 6.0.4 instance as a replica of the current + Redis 6.0.x master, you should wait for the first full synchronization, + then you should promote it right away, if your setup involves chained + replication. Don't give it the time to do a new partial synchronization + in the case the link between the master and the replica will break in + the mean time. + +2. As an additional care, you may want to set the replication ping period + to a very large value (for instance 1000000) using the following command: + + CONFIG SET repl-ping-replica-period 1000000 + + Note that if you do "1" with care, "2" is not needed. + However if you do it, make sure to later restore it to its default: + + CONFIG SET repl-ping-replica-period 10 + +So this is the main change in Redis 6. Later we'll find a different way in +order to achieve what we wanted to achieve with the Meaningful Offset feature, +but without the same complexity. + +Other changes in this release: + +* PSYNC2 tests improved. +* Fix a rare active defrag edge case bug leading to stagnation +* Fix Redis 6 asserting at startup in 32 bit systems. +* Redis 6 32 bit is now added back to our testing environments. +* Fix server crash for STRALGO command, +* Implement sendfile for RDB transfer. +* TLS fixes. +* Make replication more resistant by disconnecting the master if we + detect a protocol error. Basically we no longer accept inline protocol + from the master. +* Other improvements in the tests. + +Regards, +antirez + +This is the full list of commits: + +antirez in commit 59cd4c9f6: + Test: take PSYNC2 test master timeout high during switch. + 1 file changed, 1 deletion(-) + +antirez in commit 6c1bb7b19: + Test: add the tracking unit as default. + 1 file changed, 1 insertion(+) + +Oran Agra in commit 1aee695e5: + tests: find_available_port start search from next port + 1 file changed, 12 insertions(+), 7 deletions(-) + +Oran Agra in commit a2ae46352: + tests: each test client work on a distinct port range + 5 files changed, 39 insertions(+), 27 deletions(-) + +Oran Agra in commit 86e562d69: + 32bit CI needs to build modules correctly + 2 files changed, 7 insertions(+), 2 deletions(-) + +Oran Agra in commit ab2984b1e: + adjust revived meaningful offset tests + 1 file changed, 39 insertions(+), 20 deletions(-) + +Oran Agra in commit 1ff5a222d: + revive meaningful offset tests + 2 files changed, 213 insertions(+) + +antirez in commit cc549b46a: + Replication: showLatestBacklog() refactored out. + 3 files changed, 36 insertions(+), 25 deletions(-) + +antirez in commit 377dd0515: + Drop useless line from replicationCacheMaster(). + 1 file changed, 2 deletions(-) + +antirez in commit 3f8d113f1: + Another meaningful offset test removed. + 1 file changed, 100 deletions(-) + +antirez in commit d4541349d: + Remove the PSYNC2 meaningful offset test. + 2 files changed, 113 deletions(-) + +antirez in commit 2112a5702: + Remove the meaningful offset feature. + 4 files changed, 10 insertions(+), 93 deletions(-) + +antirez in commit d2eb6e0b4: + Set a protocol error if master use the inline protocol. + 1 file changed, 17 insertions(+), 2 deletions(-) + +Oran Agra in commit 9c1df3b76: + daily CI test with tls + 1 file changed, 15 insertions(+) + +Oran Agra in commit 115ed1911: + avoid using sendfile if tls-replication is enabled + 1 file changed, 34 insertions(+), 27 deletions(-) + +antirez in commit 11c748aac: + Replication: log backlog creation event. + 1 file changed, 3 insertions(+) + +antirez in commit 8f1013722: + Test: PSYNC2 test can now show server logs. + 1 file changed, 88 insertions(+), 25 deletions(-) + +antirez in commit 2e591fc4a: + Clarify what is happening in PR #7320. + 1 file changed, 5 insertions(+), 1 deletion(-) + +zhaozhao.zz in commit cbb51fb8f: + PSYNC2: second_replid_offset should be real meaningful offset + 1 file changed, 3 insertions(+), 3 deletions(-) + +Oran Agra in commit e0fc88b4d: + add CI for 32bit build + 2 files changed, 34 insertions(+) + +antirez in commit e3f864b5f: + Make disconnectSlaves() synchronous in the base case. + 3 files changed, 20 insertions(+), 9 deletions(-) + +ShooterIT in commit 8af1e513f: + Implements sendfile for redis. + 2 files changed, 55 insertions(+), 2 deletions(-) + +antirez in commit 3c21418cd: + Fix #7306 less aggressively. + 2 files changed, 29 insertions(+), 17 deletions(-) + +Madelyn Olson in commit e201f83ce: + EAGAIN for tls during diskless load + 1 file changed, 4 insertions(+) + +Qu Chen in commit 58fc456cb: + Disconnect chained replicas when the replica performs PSYNC with the master always to avoid replication offset mismatch between master and chained replicas. + 2 files changed, 60 insertions(+), 3 deletions(-) + +hwware in commit 3febc5c29: + using moreargs variable + 1 file changed, 2 insertions(+), 2 deletions(-) + +hwware in commit 8d6738559: + fix server crash for STRALGO command + 1 file changed, 2 insertions(+), 2 deletions(-) + +ShooterIT in commit 7a35eec54: + Replace addDeferredMultiBulkLength with addReplyDeferredLen in comment + 1 file changed, 2 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit f93e1417b: + TLS: Improve tls-protocols clarity in redis.conf. + 1 file changed, 3 insertions(+), 2 deletions(-) + +ShooterIT in commit d0c9e4454: + Fix reply bytes calculation error + 1 file changed, 1 insertion(+), 1 deletion(-) + +zhaozhao.zz in commit 1cde6a060: + Tracking: flag CLIENT_TRACKING_BROKEN_REDIR when redir broken + 1 file changed, 1 insertion(+) + +Oran Agra in commit 436be3498: + fix a rare active defrag edge case bug leading to stagnation + 4 files changed, 146 insertions(+), 23 deletions(-) + +Oran Agra in commit f9d2ffdc5: + improve DEBUG MALLCTL to be able to write to write only fields. + 1 file changed, 27 insertions(+), 7 deletions(-) + +hujie in commit d7968ee92: + fix clear USER_FLAG_ALLCOMMANDS flag in acl + 1 file changed, 5 insertions(+), 4 deletions(-) + +ShooterIT in commit a902e6b25: + Redis Benchmark: generate random test data + 1 file changed, 12 insertions(+), 1 deletion(-) + +hwware in commit 9564ed7c3: + Redis-Benchmark: avoid potentical memmory leaking + 1 file changed, 1 insertion(+), 1 deletion(-) + +WuYunlong in commit 2e4182743: + Handle keys with hash tag when computing hash slot using tcl cluster client. + 1 file changed, 23 insertions(+), 2 deletions(-) + +WuYunlong in commit eb2c8b2c6: + Add a test to prove current tcl cluster client can not handle keys with hash tag. + 1 file changed, 7 insertions(+), 1 deletion(-) + +ShooterIT in commit 928e6976b: + Use dictSize to get the size of dict in dict.c + 1 file changed, 2 insertions(+), 2 deletions(-) + +Madelyn Olson in commit cdcf5af5a: + Converge hash validation for adding and removing + 1 file changed, 21 insertions(+), 14 deletions(-) + +Benjamin Sergeant in commit e8b09d220: + do not handle --cluster-yes for cluster fix mode + 1 file changed, 16 insertions(+), 7 deletions(-) + +Benjamin Sergeant in commit 57b4fb0d8: + fix typo ... + 1 file changed, 1 insertion(+), 1 deletion(-) + +Benjamin Sergeant in commit 29f25e411: + Redis-cli 6.0.1 `--cluster-yes` doesn't work (fix #7246) + 1 file changed, 5 insertions(+), 1 deletion(-) + +Oran Agra in commit 00d8b92b8: + fix valgrind test failure in replication test + 1 file changed, 1 insertion(+), 1 deletion(-) + +Oran Agra in commit 5e17e6276: + add regression test for the race in #7205 + 1 file changed, 52 insertions(+) + +antirez in commit 96e7c011e: + Improve the PSYNC2 test reliability. + 1 file changed, 33 insertions(+), 15 deletions(-) + ================================================================================ Redis 6.0.3 Released Sat May 16 18:10:21 CEST 2020 ================================================================================ diff --git a/src/version.h b/src/version.h index 8ae7b0cb3..8efc3c5aa 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define REDIS_VERSION "6.0.3" +#define REDIS_VERSION "6.0.4" From 71fbeb65b62b7799b1d39c39e56af8891a19b568 Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 28 May 2020 23:13:15 -0400 Subject: [PATCH 49/95] fix TLS test failure Former-commit-id: 57ca6facc3038e005656912b6378cb7fbe04f55a --- src/server.cpp | 2 ++ src/tls.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/server.cpp b/src/server.cpp index cfd739d24..4e84db6ba 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -2197,7 +2197,9 @@ void beforeSleep(struct aeEventLoop *eventLoop) { handleBlockedClientsTimeout(); /* Handle TLS pending data. (must be done before flushAppendOnlyFile) */ + aeReleaseLock(); tlsProcessPendingData(); + aeAcquireLock(); /* If tls still has pending unread data don't sleep at all. */ aeSetDontWait(eventLoop, tlsHasPendingData()); diff --git a/src/tls.cpp b/src/tls.cpp index d297695cc..79037c71b 100644 --- a/src/tls.cpp +++ b/src/tls.cpp @@ -458,7 +458,6 @@ void updateSSLEvent(tls_connection *conn) { void tlsHandleEvent(tls_connection *conn, int mask) { int ret; - serverAssert(!GlobalLocksAcquired()); serverAssert(conn->el == serverTL->el); TLSCONN_DEBUG("tlsEventHandler(): fd=%d, state=%d, mask=%d, r=%d, w=%d, flags=%d", @@ -910,6 +909,7 @@ int tlsHasPendingData() { int tlsProcessPendingData() { listIter li; listNode *ln; + serverAssert(!GlobalLocksAcquired()); int processed = listLength(pending_list); listRewind(pending_list,&li); From f4f9e83ead3e55b91b6fca46d1254adf58e1971b Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 28 May 2020 23:15:18 -0400 Subject: [PATCH 50/95] Fix TLS tests in CI Former-commit-id: f95578ac0268b3f29139fc715bba0b27b92767c3 --- .github/workflows/ci.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 44d429f78..8d5e78c3b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,10 +11,13 @@ jobs: run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev make -j2 - - name: test + - name: test-tls run: | - sudo apt-get -y install tcl8.5 + sudo apt-get -y install tcl8.5 tcl-tls + ./utils/gen-test-certs.sh ./runtest --clients 2 --verbose --tls + - name: test + ./runtest --clients 2 --verbose - name: cluster-test run: | ./runtest-cluster From 8bb3ac40f121ed5e67282935965b4a06db5ff4cd Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 28 May 2020 23:18:02 -0400 Subject: [PATCH 51/95] CI yml syntax error Former-commit-id: 3196717ceee0bde8a5e30b25dd99bd7f3776c6ab --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d5e78c3b..03947c2a2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,6 +17,7 @@ jobs: ./utils/gen-test-certs.sh ./runtest --clients 2 --verbose --tls - name: test + run: | ./runtest --clients 2 --verbose - name: cluster-test run: | From 34d3bac791171e8ee33e8206fef23dee93957bde Mon Sep 17 00:00:00 2001 From: John Sully Date: Thu, 28 May 2020 23:49:13 -0400 Subject: [PATCH 52/95] gencert its own step Former-commit-id: 6dcbcacb104814e6a710a4ffe101668a573ebd4f --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 03947c2a2..32fe8c771 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,10 +11,11 @@ jobs: run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev make -j2 + - name: gen-cert + run: ./utils/gen-test-certs.sh - name: test-tls run: | sudo apt-get -y install tcl8.5 tcl-tls - ./utils/gen-test-certs.sh ./runtest --clients 2 --verbose --tls - name: test run: | From a768c260dc0b9b08fbe2ffb14f200f8e517ae196 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 29 May 2020 00:16:06 -0400 Subject: [PATCH 53/95] Fix TLS tests Former-commit-id: f0d8d1680f594cc72ccd863eb74b1071368f3052 --- .github/workflows/ci.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32fe8c771..2854b28cc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,22 +7,19 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - - name: make BUILD_TLS=yes + - name: make run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev - make -j2 + make BUILD_TLS=yes -j2 - name: gen-cert run: ./utils/gen-test-certs.sh - name: test-tls run: | sudo apt-get -y install tcl8.5 tcl-tls ./runtest --clients 2 --verbose --tls - - name: test - run: | - ./runtest --clients 2 --verbose - name: cluster-test run: | - ./runtest-cluster + ./runtest-cluster --tls - name: sentinel test run: | ./runtest-sentinel From f79cc0011ae5d64013be88a5714f038de3cad4c5 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 29 May 2020 01:01:41 -0400 Subject: [PATCH 54/95] Fix CI Former-commit-id: 6a902b29e16bebdb2da1d0a33ea5a170fc9949a0 --- .github/workflows/ci.yml | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d00735988..ee2235e2a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: - name: make -j2 run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev - make + make -j2 build-macos-latest: runs-on: macos-latest @@ -61,19 +61,12 @@ jobs: - name: make run: make -j2 - biuld-32bit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v1 - - name: make - run: | - sudo apt-get update && sudo apt-get install libc6-dev-i386 - make 32bit - build-libc-malloc: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: make - run: make MALLOC=libc + run: | + sudo apt-get -y install uuid-dev libcurl4-openssl-dev + make MALLOC=libc -j2 From 688dceb3a898178f01d9289ff79f77c1a360157c Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 29 May 2020 01:44:52 -0400 Subject: [PATCH 55/95] Fix test issue with TLS Former-commit-id: 81b240f81d1c52fd331c4e0e89659913380229c4 --- tests/integration/aof.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/aof.tcl b/tests/integration/aof.tcl index 192c7a84b..d1222b0e4 100644 --- a/tests/integration/aof.tcl +++ b/tests/integration/aof.tcl @@ -315,7 +315,7 @@ tags {"aof"} { } test "AOF+PEXPIREMEMBERAT: set should have 3 values" { - set client [redis [dict get $srv host] [dict get $srv port]] + set client [redis [dict get $srv host] [dict get $srv port] 0 $::tls] wait_for_condition 50 100 { [catch {$client ping} e] == 0 } else { From 2e0c684324a6e8f26e246ab639357562a381af4f Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 29 May 2020 01:58:15 -0400 Subject: [PATCH 56/95] active replica tests on slow computers Former-commit-id: c9920849dd6d6d0f6ecfe0d1002cb0edd7f7bfa9 --- tests/integration/replication-active.tcl | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/replication-active.tcl b/tests/integration/replication-active.tcl index b1d1c5217..0fca9a829 100644 --- a/tests/integration/replication-active.tcl +++ b/tests/integration/replication-active.tcl @@ -36,6 +36,7 @@ start_server {tags {"active-repl"} overrides {active-replica yes}} { test {Active replicas propogate} { $master set testkey foo + after 500 wait_for_condition 50 500 { [string match *foo* [$slave get testkey]] } else { From f18ba6624240f49d186242fcb9f07e26bf3cc689 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 29 May 2020 03:40:52 -0400 Subject: [PATCH 57/95] Cluster crash Former-commit-id: f25c405ad2a8004b79a816072cb011c1dc2407d5 --- src/cluster.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cluster.cpp b/src/cluster.cpp index 6d6f2be2e..1ebaed262 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -2235,6 +2235,8 @@ void clusterWriteHandler(connection *conn) { void clusterLinkConnectHandler(connection *conn) { clusterLink *link = (clusterLink*)connGetPrivateData(conn); clusterNode *node = link->node; + if (node == nullptr) + return; // we're about to be freed /* Check if connection succeeded */ if (connGetState(conn) != CONN_STATE_CONNECTED) { From 68bf5a04a0db494ef1114813bd4c163f4e501f47 Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 29 May 2020 17:44:55 -0400 Subject: [PATCH 58/95] Unify beforeSleep handling Former-commit-id: 1cb48c7bf6a7e91e728a677902a7bfc64fe80dd6 --- src/aof.cpp | 1 + src/networking.cpp | 37 +++++++++++++++---------------------- src/server.cpp | 16 +++++++++------- src/server.h | 1 + tests/test_helper.tcl | 6 +++++- 5 files changed, 31 insertions(+), 30 deletions(-) diff --git a/src/aof.cpp b/src/aof.cpp index e320f6cfd..9b33caf39 100644 --- a/src/aof.cpp +++ b/src/aof.cpp @@ -751,6 +751,7 @@ struct client *createAOFClient(void) { c->bufpos = 0; c->flags = 0; c->fPendingAsyncWrite = FALSE; + c->fPendingAsyncWriteHandler = FALSE; c->btype = BLOCKED_NONE; /* We set the fake client as a replica waiting for the synchronization * so that Redis will not try to send replies to this client. */ diff --git a/src/networking.cpp b/src/networking.cpp index 8211c8d88..ac248d41f 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -135,6 +135,7 @@ client *createClient(connection *conn, int iel) { c->sentlenAsync = 0; c->flags = 0; c->fPendingAsyncWrite = FALSE; + c->fPendingAsyncWriteHandler = FALSE; c->ctime = c->lastinteraction = g_pserver->unixtime; /* If the default user does not require authentication, the user is * directly authenticated. */ @@ -1850,29 +1851,21 @@ void ProcessPendingAsyncWrites() std::atomic_thread_fence(std::memory_order_seq_cst); - if (c->casyncOpsPending == 0) + if (FCorrectThread(c)) { - if (FCorrectThread(c)) - { - prepareClientToWrite(c, false); // queue an event - } - else - { - // We need to start the write on the client's thread - if (aePostFunction(g_pserver->rgthreadvar[c->iel].el, [c]{ - // Install a write handler. Don't do the actual write here since we don't want - // to duplicate the throttling and safety mechanisms of the normal write code - std::lock_guardlock)> lock(c->lock); - serverAssert(c->casyncOpsPending > 0); - c->casyncOpsPending--; - connSetWriteHandler(c->conn, sendReplyToClient, true); - }, false) == AE_ERR - ) - { - // Posting the function failed - continue; // We can retry later in the cron - } - ++c->casyncOpsPending; // race is handled by the client lock in the lambda + prepareClientToWrite(c, false); // queue an event + } + else + { + if (!c->fPendingAsyncWriteHandler) { + c->fPendingAsyncWriteHandler = true; + bool fResult = c->postFunction([](client *c) { + c->fPendingAsyncWriteHandler = false; + connSetWriteHandler(c->conn, sendReplyToClient, true); + }); + + if (!fResult) + c->fPendingAsyncWriteHandler = false; // if we failed to set the handler then prevent this from never being reset } } } diff --git a/src/server.cpp b/src/server.cpp index 8b2b03b43..92bff8cd7 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -1907,8 +1907,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { { processUnblockedClients(IDX_EVENT_LOOP_MAIN); } - - ProcessPendingAsyncWrites(); // This is really a bug, but for now catch any laggards that didn't clean up /* Software watchdog: deliver the SIGALRM that will reach the signal * handler if we don't return here fast enough. */ @@ -2143,6 +2141,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { &ei); + /* CRON functions may trigger async writes, so do this last */ + ProcessPendingAsyncWrites(); + g_pserver->cronloops++; return 1000/g_pserver->hz; } @@ -2192,6 +2193,7 @@ extern int ProcessingEventsWhileBlocked; * call some other low-risk functions. */ void beforeSleep(struct aeEventLoop *eventLoop) { UNUSED(eventLoop); + int iel = ielFromEventLoop(eventLoop); /* Handle precise timeouts of blocked clients. */ handleBlockedClientsTimeout(); @@ -2225,9 +2227,9 @@ void beforeSleep(struct aeEventLoop *eventLoop) { if (moduleCount()) moduleHandleBlockedClients(ielFromEventLoop(eventLoop)); /* Try to process pending commands for clients that were just unblocked. */ - if (listLength(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].unblocked_clients)) + if (listLength(g_pserver->rgthreadvar[iel].unblocked_clients)) { - processUnblockedClients(IDX_EVENT_LOOP_MAIN); + processUnblockedClients(iel); } /* Send all the slaves an ACK request if at least one client blocked @@ -2258,11 +2260,11 @@ void beforeSleep(struct aeEventLoop *eventLoop) { /* Handle writes with pending output buffers. */ int aof_state = g_pserver->aof_state; aeReleaseLock(); - handleClientsWithPendingWrites(IDX_EVENT_LOOP_MAIN, aof_state); + handleClientsWithPendingWrites(iel, aof_state); aeAcquireLock(); /* Close clients that need to be closed asynchronous */ - freeClientsInAsyncFreeQueue(IDX_EVENT_LOOP_MAIN); + freeClientsInAsyncFreeQueue(iel); /* Before we are going to sleep, let the threads access the dataset by * releasing the GIL. Redis main thread will not touch anything at this @@ -2958,7 +2960,7 @@ static void initServerThread(struct redisServerThreadVars *pvar, int fMain) pvar->tlsfd_count = 0; pvar->cclients = 0; pvar->el = aeCreateEventLoop(g_pserver->maxclients+CONFIG_FDSET_INCR); - aeSetBeforeSleepProc(pvar->el, fMain ? beforeSleep : beforeSleepLite, fMain ? 0 : AE_SLEEP_THREADSAFE); + aeSetBeforeSleepProc(pvar->el, beforeSleep, 0); aeSetAfterSleepProc(pvar->el, afterSleep, AE_SLEEP_THREADSAFE); pvar->current_client = nullptr; pvar->clients_paused = 0; diff --git a/src/server.h b/src/server.h index 29bc3ffdd..712ffacb4 100644 --- a/src/server.h +++ b/src/server.h @@ -1268,6 +1268,7 @@ typedef struct client { std::atomic flags; /* Client flags: CLIENT_* macros. */ int casyncOpsPending; int fPendingAsyncWrite; /* NOTE: Not a flag because it is written to outside of the client lock (locked by the global lock instead) */ + int fPendingAsyncWriteHandler; int authenticated; /* Needed when the default user requires auth. */ int replstate; /* Replication state if this is a replica. */ int repl_put_online_on_ack; /* Install replica write handler on ACK. */ diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 724149d94..85b89f711 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -428,6 +428,7 @@ proc signal_idle_client fd { lappend ::active_clients $fd incr ::next_test if {$::loop && $::next_test == [llength $::all_tests]} { + incr ::loop -1 set ::next_test 0 } } elseif {[llength $::run_solo_tests] != 0 && [llength $::active_clients] == 0} { @@ -612,7 +613,10 @@ for {set j 0} {$j < [llength $argv]} {incr j} { } elseif {$opt eq {--stop}} { set ::stop_on_failure 1 } elseif {$opt eq {--loop}} { - set ::loop 1 + set ::loop 1000 + } elseif {$opt eq {--loopn}} { + set ::loop [expr $arg - 1] + incr j } elseif {$opt eq {--timeout}} { set ::timeout $arg incr j From 5316f656d912849d5f7cda6b8600129f0638030b Mon Sep 17 00:00:00 2001 From: John Sully Date: Fri, 29 May 2020 20:27:45 -0400 Subject: [PATCH 59/95] sendFile blocks too long for use with active replication Former-commit-id: aad6a7ce159a3679633020dc407a2068129bbd49 --- src/replication.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/replication.cpp b/src/replication.cpp index f4196eb07..58b080d53 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -1386,13 +1386,15 @@ void sendBulkToSlave(connection *conn) { * fallback to normal read+write otherwise. */ nwritten = 0; #if HAVE_SENDFILE - if (!g_pserver->tls_replication) { + if (!g_pserver->tls_replication && !g_pserver->fActiveReplica) { // sendfile blocks too long for active replication if ((nwritten = redis_sendfile(conn->fd,replica->repldbfd, replica->repldboff,PROTO_IOBUF_LEN)) == -1) { if (errno != EAGAIN) { serverLog(LL_WARNING,"Sendfile error sending DB to replica: %s", strerror(errno)); + ul.unlock(); + aeLock.arm(nullptr); freeClient(replica); } return; @@ -1408,6 +1410,8 @@ void sendBulkToSlave(connection *conn) { if (buflen <= 0) { serverLog(LL_WARNING,"Read error sending DB to replica: %s", (buflen == 0) ? "premature EOF" : strerror(errno)); + ul.unlock(); + aeLock.arm(nullptr); freeClient(replica); return; } @@ -1415,6 +1419,8 @@ void sendBulkToSlave(connection *conn) { if (connGetState(conn) != CONN_STATE_CONNECTED) { serverLog(LL_WARNING,"Write error sending DB to replica: %s", connGetLastError(conn)); + ul.unlock(); + aeLock.arm(nullptr); freeClient(replica); } return; From be6f1df429bae32b614685068c1737f7b78343e5 Mon Sep 17 00:00:00 2001 From: Ben Schermel Date: Fri, 29 May 2020 22:23:48 -0400 Subject: [PATCH 60/95] centos build fix Former-commit-id: f17c492a633ea2be99567dfe2f8a59c0b0136d86 --- src/tls.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tls.cpp b/src/tls.cpp index 79037c71b..0f59fb3bd 100644 --- a/src/tls.cpp +++ b/src/tls.cpp @@ -129,7 +129,7 @@ static void initCryptoLocks(void) { return; } nlocks = CRYPTO_num_locks(); - openssl_locks = zmalloc(sizeof(*openssl_locks) * nlocks); + openssl_locks = (pthread_mutex_t*)zmalloc(sizeof(*openssl_locks) * nlocks); for (i = 0; i < nlocks; i++) { pthread_mutex_init(openssl_locks + i, NULL); } From bbff81e8917cf9edf70185b51aa4f08a77c17b81 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 00:06:15 -0400 Subject: [PATCH 61/95] Fix memory test failures with multithreading enabled Former-commit-id: 58035404227a9ef1c3bd92623a333c915d50eab6 --- tests/unit/maxmemory.tcl | 6 +++++- tests/unit/memefficiency.tcl | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl index 0f64ddc18..3dd706173 100644 --- a/tests/unit/maxmemory.tcl +++ b/tests/unit/maxmemory.tcl @@ -144,7 +144,11 @@ start_server {tags {"maxmemory"}} { } proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} { - start_server {tags {"maxmemory"}} { + # This is a single thread only test because there is a race in getMaxMemoryState + # between zmalloc_used_memory and getting the client outbut buffer memory usage. + # The cure would be worse than the disease, we do not want lock every replica + # simultaneously just to get more accurate memory usage. + start_server {tags {"maxmemory"} overrides { server-threads 1 }} { start_server {} { set slave_pid [s process_id] test "$test_name" { diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index 390bad192..0654898ee 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -37,7 +37,7 @@ start_server {tags {"memefficiency"}} { } run_solo {defrag} { -start_server {tags {"defrag"}} { +start_server {tags {"defrag"} overrides {server-threads 1} } { if {[string match {*jemalloc*} [s mem_allocator]]} { test "Active defrag" { r config set save "" ;# prevent bgsave from interfereing with save below @@ -351,7 +351,7 @@ start_server {tags {"defrag"}} { # if the current slab is lower in utilization the defragger would have ended up in stagnation, # keept running and not move any allocation. # this test is more consistent on a fresh server with no history - start_server {tags {"defrag"}} { + start_server {tags {"defrag"} overrides {server-threads 1}} { r flushdb r config resetstat r config set save "" ;# prevent bgsave from interfereing with save below From f37ee5bd4db3dc8951278c3e2842679da9fe4af4 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 01:14:53 -0400 Subject: [PATCH 62/95] TLS requires we explicitly marshal connected sockets when using them on a different thread Former-commit-id: 341a8caef959883ca6fc71f81f8a3fed747b2341 --- src/cluster.cpp | 1 + src/tls.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/cluster.cpp b/src/cluster.cpp index 1ebaed262..15c030162 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -5272,6 +5272,7 @@ try_again: zfree(ov); zfree(kv); return; /* error sent to the client by migrateGetSocket() */ } + connMarshalThread(cs->conn); rioInitWithBuffer(&cmd,sdsempty()); diff --git a/src/tls.cpp b/src/tls.cpp index 0f59fb3bd..4284a27bf 100644 --- a/src/tls.cpp +++ b/src/tls.cpp @@ -349,6 +349,7 @@ connection *connCreateTLS(void) { void connTLSMarshalThread(connection *c) { tls_connection *conn = (tls_connection*)c; + serverAssert(conn->pending_list_node == nullptr); conn->el = serverTL->el; } From 70008842391ba239c85271777e08ea20bee0c9de Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 01:21:50 -0400 Subject: [PATCH 63/95] Update daily CI job Former-commit-id: 2c659acc7dae6263e2766ed8e6492144d5848071 --- .github/workflows/daily.yml | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 8b3d81dfc..e238d3054 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -38,30 +38,14 @@ jobs: - name: module api test run: ./runtest-moduleapi --verbose - test-32bit: + test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: make run: | - sudo apt-get update && sudo apt-get install libc6-dev-i386 - make 32bit - - name: test - run: | - sudo apt-get install tcl8.5 - ./runtest --accurate --verbose - - name: module api test - run: | - make -C tests/modules 32bit # the script below doesn't have an argument, we must build manually ahead of time - ./runtest-moduleapi --verbose - - test-tls: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v1 - - name: make - run: | - make BUILD_TLS=yes + sudo apt-get -y install uuid-dev libcurl4-openssl-dev + make -j2 - name: test run: | sudo apt-get install tcl8.5 tcl-tls From 76b5b55da97e1fe3968915eb1fa1bb86759254a6 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 01:47:26 -0400 Subject: [PATCH 64/95] Add endurance CI tests Former-commit-id: 3063a028b31ca87173e873e9842f9fbe71d05ba7 --- .github/workflows/endurance.yml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 .github/workflows/endurance.yml diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml new file mode 100644 index 000000000..361c46cb8 --- /dev/null +++ b/.github/workflows/endurance.yml @@ -0,0 +1,22 @@ +name: Endurance + +on: + schedule: + - cron: '*/60 * * * *' + +jobs: + + test: + runs-on: quadcore + timeout-minutes: 60 + steps: + - uses: actions/checkout@v1 + - name: make + run: | + sudo apt-get -y install uuid-dev libcurl4-openssl-dev + make -j8 + - name: 'test 20x' + run: | + sudo apt-get install tcl8.5 + ./runtest --loopn 20 + From d3b89c69ad6e56dd7e9097cbf487d06e721f878a Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 01:53:21 -0400 Subject: [PATCH 65/95] Fix cron syntax Former-commit-id: ca755e490500578b60165e48ab2c5d734994f1d1 --- .github/workflows/endurance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index 361c46cb8..f02f81827 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -2,7 +2,7 @@ name: Endurance on: schedule: - - cron: '*/60 * * * *' + - cron: '0 * * * *' jobs: From aedbb558ab248c1d1a8c79de8c333b69b8fb47ed Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 02:07:30 -0400 Subject: [PATCH 66/95] Reduce clients for endurance test Former-commit-id: 031044c7ea503b2d05d9851da80448480986a8d1 --- .github/workflows/endurance.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index f02f81827..b688c4f1b 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -6,7 +6,7 @@ on: jobs: - test: + test-endurance: runs-on: quadcore timeout-minutes: 60 steps: @@ -18,5 +18,5 @@ jobs: - name: 'test 20x' run: | sudo apt-get install tcl8.5 - ./runtest --loopn 20 + ./runtest --loopn 20 --clients 10 From cc49720fd5d967d50f130ee8978faa84cf108b2d Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 02:10:00 -0400 Subject: [PATCH 67/95] Also test with multiple threads Former-commit-id: a10a0df09f103f74953ae127b64c2e891f73191e --- .github/workflows/endurance.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index b688c4f1b..a76724ce0 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -15,8 +15,11 @@ jobs: run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev make -j8 - - name: 'test 20x' + - name: test-multithread run: | sudo apt-get install tcl8.5 + ./runtest --loopn 3 --config server-threads 3 --clients 2 + - name: 'test 20x' + run: | ./runtest --loopn 20 --clients 10 From e48e8511ecf78014fa3ad6cdf36abec352745be5 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 02:34:37 -0400 Subject: [PATCH 68/95] Only CI ARM daily Former-commit-id: a5325061f21b3d93e151fa86dce7447a6f5e03cb --- .github/workflows/ci.yml | 17 ----------------- .github/workflows/daily.yml | 16 ++++++++++++++++ deps/depot_tools | 1 - 3 files changed, 16 insertions(+), 18 deletions(-) delete mode 160000 deps/depot_tools diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee2235e2a..cd8196756 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,24 +27,7 @@ jobs: - name: module tests run: | ./runtest-moduleapi - - test-ubuntu-arm: - runs-on: [self-hosted, linux, arm] - steps: - - uses: actions/checkout@v1 - - name: make - run: | - sudo apt-get -y install uuid-dev libcurl4-openssl-dev - make -j4 - - name: test - run: | - sudo apt-get -y install tcl8.5 - ./runtest --clients 2 --verbose - - name: module tests - run: | - ./runtest-moduleapi - build-ubuntu-old: runs-on: ubuntu-16.04 steps: diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index e238d3054..9151c1e3a 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -54,6 +54,22 @@ jobs: - name: module api test run: ./runtest-moduleapi --verbose --tls + test-ubuntu-arm: + runs-on: [self-hosted, linux, arm] + steps: + - uses: actions/checkout@v1 + - name: make + run: | + sudo apt-get -y install uuid-dev libcurl4-openssl-dev + make -j4 + - name: test + run: | + sudo apt-get -y install tcl8.5 + ./runtest --clients 2 --verbose + - name: module tests + run: | + ./runtest-moduleapi + test-valgrind: runs-on: ubuntu-latest timeout-minutes: 14400 diff --git a/deps/depot_tools b/deps/depot_tools deleted file mode 160000 index aaf566999..000000000 --- a/deps/depot_tools +++ /dev/null @@ -1 +0,0 @@ -Subproject commit aaf566999558aa8ead38811228cd539a6e6e2fda From 73db61ea86e3aa0af548c496e8294cd74f1d49b7 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 02:40:44 -0400 Subject: [PATCH 69/95] Endurance tests need to be faster, reduce loops Former-commit-id: 1803b1587b4ca28457263eec849b66b6ab9de630 --- .github/workflows/endurance.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index a76724ce0..39f03e13e 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -19,7 +19,7 @@ jobs: run: | sudo apt-get install tcl8.5 ./runtest --loopn 3 --config server-threads 3 --clients 2 - - name: 'test 20x' + - name: 'test 10x' run: | - ./runtest --loopn 20 --clients 10 + ./runtest --loopn 10 --clients 10 From 8fec593defba10aee9af6989ea70aed33758e1cc Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 03:13:04 -0400 Subject: [PATCH 70/95] More clients for endurance Former-commit-id: 8f44f0937b8ea2d43ad71861a949dfe8d187f15d --- .github/workflows/endurance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index 39f03e13e..78a8302de 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -18,7 +18,7 @@ jobs: - name: test-multithread run: | sudo apt-get install tcl8.5 - ./runtest --loopn 3 --config server-threads 3 --clients 2 + ./runtest --loopn 3 --config server-threads 3 --clients 4 - name: 'test 10x' run: | ./runtest --loopn 10 --clients 10 From fcf183e4b5c836f34758d3c48848584d59df9b3f Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 03:27:37 -0400 Subject: [PATCH 71/95] daily CI config fix Former-commit-id: 460c5ed2b336cf7beba55704e12d8e1eb32baea2 --- .github/workflows/daily.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 9151c1e3a..3c10236b4 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -45,13 +45,15 @@ jobs: - name: make run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev - make -j2 - - name: test + make BUILD_TLS=yes -j2 + - name: "test (tls)" run: | sudo apt-get install tcl8.5 tcl-tls ./utils/gen-test-certs.sh ./runtest --accurate --verbose --tls - - name: module api test + - name: test + run: ./runtest --accurate --verbose + - name: module api test (tls) run: ./runtest-moduleapi --verbose --tls test-ubuntu-arm: From baadab90e7b5d5bca5f6ab58f8bf32196569d020 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 15:13:28 -0400 Subject: [PATCH 72/95] We only want one master connection in progress at a time, ensure that if the connection fails we try a different master Former-commit-id: 8a0441c14475dc54616337270e092068acaa274a --- src/replication.cpp | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/replication.cpp b/src/replication.cpp index 58b080d53..103529eaf 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -3747,6 +3747,18 @@ void replicationCron(void) { listIter liMaster; listNode *lnMaster; listRewind(g_pserver->masters, &liMaster); + + bool fInMasterConnection = false; + while ((lnMaster = listNext(&liMaster)) && !fInMasterConnection) + { + redisMaster *mi = (redisMaster*)listNodeValue(lnMaster); + if (mi->repl_state != REPL_STATE_NONE && mi->repl_state != REPL_STATE_CONNECTED && mi->repl_state != REPL_STATE_CONNECT) { + fInMasterConnection = true; + } + } + + bool fConnectionStarted = false; + listRewind(g_pserver->masters, &liMaster); while ((lnMaster = listNext(&liMaster))) { redisMaster *mi = (redisMaster*)listNodeValue(lnMaster); @@ -3785,12 +3797,14 @@ void replicationCron(void) { } /* Check if we should connect to a MASTER */ - if (mi->repl_state == REPL_STATE_CONNECT) { + if (mi->repl_state == REPL_STATE_CONNECT && !fInMasterConnection) { serverLog(LL_NOTICE,"Connecting to MASTER %s:%d", mi->masterhost, mi->masterport); if (connectWithMaster(mi) == C_OK) { serverLog(LL_NOTICE,"MASTER <-> REPLICA sync started"); } + fInMasterConnection = true; + fConnectionStarted = true; } /* Send ACK to master from time to time. @@ -3801,6 +3815,11 @@ void replicationCron(void) { replicationSendAck(mi); } + if (fConnectionStarted) { + // If we cancel this handshake we want the next attempt to be a different master + listRotateHeadToTail(g_pserver->masters); + } + /* If we have attached slaves, PING them from time to time. * So slaves can implement an explicit timeout to masters, and will * be able to detect a link disconnection even if the TCP connection From 003dfea7f903899f0447c6801eaffff7ae526fb2 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 15:31:54 -0400 Subject: [PATCH 73/95] Ensure endurance run can complete in an hour Former-commit-id: 63c6735ffcc96a855e1d106f38c70a7c9018ed20 --- .github/workflows/endurance.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index 78a8302de..71a67d2ee 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -15,11 +15,8 @@ jobs: run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev make -j8 - - name: test-multithread + - name: test-multithread (5X) run: | sudo apt-get install tcl8.5 - ./runtest --loopn 3 --config server-threads 3 --clients 4 - - name: 'test 10x' - run: | - ./runtest --loopn 10 --clients 10 + ./runtest --loopn 5 --config server-threads 3 --clients 4 From d344d9b0b92c6120787e837462f2b0d91bb8ff94 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 18:22:27 -0400 Subject: [PATCH 74/95] Auto tune lock for high CPU tension scenarios Former-commit-id: 8edbae2e04538f82a146a6c2b459a6dfcacf99b2 --- src/fastlock.cpp | 27 ++++++++++++++++++++++++++- src/fastlock.h | 1 + src/fastlock_x64.asm | 11 +++++++++-- src/redis-benchmark.cpp | 2 ++ src/redis-cli.c | 2 ++ src/server.cpp | 5 +++++ 6 files changed, 45 insertions(+), 3 deletions(-) diff --git a/src/fastlock.cpp b/src/fastlock.cpp index 28092828f..d8fea0b6c 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -40,6 +40,7 @@ #include #ifdef __linux__ #include +#include #endif #include #include @@ -69,6 +70,8 @@ __attribute__((weak)) void logStackTrace(ucontext_t *) {} #endif extern int g_fInCrash; +extern int g_fTestMode; +int g_fHighCpuPressure = false; /**************************************************** * @@ -332,6 +335,7 @@ extern "C" void fastlock_lock(struct fastlock *lock) unsigned mask = (1U << (myticket % 32)); unsigned cloops = 0; ticket ticketT; + unsigned loopLimit = g_fHighCpuPressure ? 0x10000 : 0x100000; for (;;) { @@ -344,7 +348,7 @@ extern "C" void fastlock_lock(struct fastlock *lock) #elif defined(__aarch64__) __asm__ __volatile__ ("yield"); #endif - if ((++cloops % 0x100000) == 0) + if ((++cloops % loopLimit) == 0) { fastlock_sleep(lock, tid, ticketT.u, mask); } @@ -461,3 +465,24 @@ void fastlock_lock_recursive(struct fastlock *lock, int nesting) fastlock_lock(lock); lock->m_depth = nesting; } + +void fastlock_auto_adjust_waits() +{ +#ifdef __linux__ + struct sysinfo sysinf; + auto fHighPressurePrev = g_fHighCpuPressure; + memset(&sysinf, 0, sizeof sysinf); + if (!sysinfo(&sysinf)) { + auto avgCoreLoad = sysinf.loads[0] / get_nprocs(); + g_fHighCpuPressure = (avgCoreLoad > ((1 << SI_LOAD_SHIFT) * 0.9)); + if (g_fHighCpuPressure) + serverLog(!fHighPressurePrev ? 3 /*LL_WARNING*/ : 1 /* LL_VERBOSE */, "NOTICE: Detuning locks due to high load per core: %.2f%%", avgCoreLoad / (double)(1 << SI_LOAD_SHIFT)*100.0); + } + + if (!g_fHighCpuPressure && fHighPressurePrev) { + serverLog(3 /*LL_WARNING*/, "NOTICE: CPU pressure reduced"); + } +#else + g_fHighCpuPressure = g_fTestMode; +#endif +} \ No newline at end of file diff --git a/src/fastlock.h b/src/fastlock.h index cdf8fe454..e4ab1874f 100644 --- a/src/fastlock.h +++ b/src/fastlock.h @@ -15,6 +15,7 @@ void fastlock_unlock(struct fastlock *lock); void fastlock_free(struct fastlock *lock); int fastlock_unlock_recursive(struct fastlock *lock); void fastlock_lock_recursive(struct fastlock *lock, int nesting); +void fastlock_auto_adjust_waits(); uint64_t fastlock_getlongwaitcount(); // this is a global value diff --git a/src/fastlock_x64.asm b/src/fastlock_x64.asm index bcea0e095..35a11f2ba 100644 --- a/src/fastlock_x64.asm +++ b/src/fastlock_x64.asm @@ -3,6 +3,7 @@ .extern gettid .extern fastlock_sleep +.extern g_fHighCpuPressure # This is the first use of assembly in this codebase, a valid question is WHY? # The spinlock we implement here is performance critical, and simply put GCC @@ -33,6 +34,13 @@ fastlock_lock: cmp [rdi], esi # Is the TID we got back the owner of the lock? je .LLocked # Don't spin in that case + mov r9d, 0x1000 # 1000h is set so we overflow on the 1024*1024'th iteration (like the C code) + mov eax, [rip+g_fHighCpuPressure] + test eax, eax + jz .LNoTestMode + mov r9d, 0x10000 +.LNoTestMode: + xor eax, eax # eliminate partial register dependency inc eax # we want to add one lock xadd [rdi+66], ax # do the xadd, ax contains the value before the addition @@ -45,8 +53,7 @@ fastlock_lock: cmp dx, ax # is our ticket up? je .LLocked # leave the loop pause - add ecx, 0x1000 # Have we been waiting a long time? (oflow if we have) - # 1000h is set so we overflow on the 1024*1024'th iteration (like the C code) + add ecx, r9d # Have we been waiting a long time? (oflow if we have) jnc .LLoop # If so, give up our timeslice to someone who's doing real work # Like the compiler, you're probably thinking: "Hey! I should take these pushs out of the loop" # But the compiler doesn't know that we rarely hit this, and when we do we know the lock is diff --git a/src/redis-benchmark.cpp b/src/redis-benchmark.cpp index 5ee6b8dba..e3935cae6 100644 --- a/src/redis-benchmark.cpp +++ b/src/redis-benchmark.cpp @@ -66,6 +66,8 @@ struct benchmarkThread; struct clusterNode; struct redisConfig; +int g_fTestMode = false; + static struct config { aeEventLoop *el; const char *hostip; diff --git a/src/redis-cli.c b/src/redis-cli.c index 142608a3a..d818fa4e9 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -69,6 +69,8 @@ redisContext *context; struct config config; +int g_fTestMode = 0; + /* User preferences. */ static struct pref { int hints; diff --git a/src/server.cpp b/src/server.cpp index 92bff8cd7..3cd5a8cee 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -2110,6 +2110,11 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { migrateCloseTimedoutSockets(); } + /* Tune the fastlock to CPU load */ + run_with_period(30000) { + fastlock_auto_adjust_waits(); + } + /* Resize tracking keys table if needed. This is also done at every * command execution, but we want to be sure that if the last command * executed changes the value via CONFIG SET, the server will perform From b58db1f7b92e240f9e689e01f018255f61b33d15 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 20:03:04 -0400 Subject: [PATCH 75/95] Update endurance.yml Former-commit-id: 4a9742e696a4ebed2338d806e4e1dd758f9c5453 --- .github/workflows/endurance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index 71a67d2ee..2be46b194 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -18,5 +18,5 @@ jobs: - name: test-multithread (5X) run: | sudo apt-get install tcl8.5 - ./runtest --loopn 5 --config server-threads 3 --clients 4 + ./runtest --loopn 5 --config server-threads 3 --clients 3 From 412df47945ee0a169ad3118450aac58271e5fa09 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 30 May 2020 22:16:56 -0400 Subject: [PATCH 76/95] Endurance tests need more cores Former-commit-id: dcd297eb5bad0be0bf9f5b34c6fb9ec7486cbe65 --- .github/workflows/endurance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index 2be46b194..326d1203b 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -7,7 +7,7 @@ on: jobs: test-endurance: - runs-on: quadcore + runs-on: octocore timeout-minutes: 60 steps: - uses: actions/checkout@v1 From e00b2a63863222a74b89474d12e698f75ec21e63 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 03:43:06 -0400 Subject: [PATCH 77/95] Update endurance.yml Former-commit-id: 3e6a5ac9cfb108c9d1da4b97b0aaffb9ed92ce88 --- .github/workflows/endurance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index 326d1203b..0b03abc5d 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -17,6 +17,6 @@ jobs: make -j8 - name: test-multithread (5X) run: | - sudo apt-get install tcl8.5 + sudo apt-get install -y tcl8.5 ./runtest --loopn 5 --config server-threads 3 --clients 3 From 4a110615f3af1d2f198f56349791f4da11f0cafd Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 14:18:12 -0400 Subject: [PATCH 78/95] Update endurance.yml Former-commit-id: 37033179feb9d92d6124b08e662353959eb4f19e --- .github/workflows/endurance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index 0b03abc5d..de9b6083c 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -18,5 +18,5 @@ jobs: - name: test-multithread (5X) run: | sudo apt-get install -y tcl8.5 - ./runtest --loopn 5 --config server-threads 3 --clients 3 + ./runtest --loopn 5 --config server-threads 3 --clients 5 From 891977d9b7346840e2e6a6bf7dc4b34e13f7c206 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 16:02:57 -0400 Subject: [PATCH 79/95] Fix race in futex_sleep Former-commit-id: 73300c57005a49bafbfc44db9c40ba7d1d4eedce --- src/fastlock.cpp | 16 ++++++++++++---- src/fastlock_x64.asm | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/fastlock.cpp b/src/fastlock.cpp index d8fea0b6c..60eb653bf 100644 --- a/src/fastlock.cpp +++ b/src/fastlock.cpp @@ -293,12 +293,21 @@ uint64_t fastlock_getlongwaitcount() return rval; } -extern "C" void fastlock_sleep(fastlock *lock, pid_t pid, unsigned wake, unsigned mask) +extern "C" void fastlock_sleep(fastlock *lock, pid_t pid, unsigned wake, unsigned myticket) { #ifdef __linux__ g_dlock.registerwait(lock, pid); + unsigned mask = (1U << (myticket % 32)); __atomic_fetch_or(&lock->futex, mask, __ATOMIC_ACQUIRE); - futex(&lock->m_ticket.u, FUTEX_WAIT_BITSET_PRIVATE, wake, nullptr, mask); + + // double check the lock wasn't release between the last check and us setting the futex mask + uint32_t u; + __atomic_load(&lock->m_ticket.u, &u, __ATOMIC_ACQUIRE); + if ((u & 0xffff) != myticket) + { + futex(&lock->m_ticket.u, FUTEX_WAIT_BITSET_PRIVATE, wake, nullptr, mask); + } + __atomic_fetch_and(&lock->futex, ~mask, __ATOMIC_RELEASE); g_dlock.clearwait(lock, pid); #endif @@ -332,7 +341,6 @@ extern "C" void fastlock_lock(struct fastlock *lock) int tid = gettid(); unsigned myticket = __atomic_fetch_add(&lock->m_ticket.m_avail, 1, __ATOMIC_RELEASE); - unsigned mask = (1U << (myticket % 32)); unsigned cloops = 0; ticket ticketT; unsigned loopLimit = g_fHighCpuPressure ? 0x10000 : 0x100000; @@ -350,7 +358,7 @@ extern "C" void fastlock_lock(struct fastlock *lock) #endif if ((++cloops % loopLimit) == 0) { - fastlock_sleep(lock, tid, ticketT.u, mask); + fastlock_sleep(lock, tid, ticketT.u, myticket); } } diff --git a/src/fastlock_x64.asm b/src/fastlock_x64.asm index 35a11f2ba..26c302434 100644 --- a/src/fastlock_x64.asm +++ b/src/fastlock_x64.asm @@ -69,7 +69,7 @@ fastlock_lock: # rdi ARG1 futex (already in rdi) # rsi ARG2 tid (already in esi) # rdx ARG3 ticketT.u (already in edx) - bts ecx, eax # rcx ARG4 mask + mov ecx, eax # rcx ARG4 myticket call fastlock_sleep # cleanup and continue pop rax From 7eb05632411836f24f5b57af5a9fefc324117932 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 16:03:23 -0400 Subject: [PATCH 80/95] Active replicas referencing eachother should connect one at a time Former-commit-id: c0c033a0c175eebdf2173e6e4e59e792d2fe4285 --- src/replication.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/replication.cpp b/src/replication.cpp index 103529eaf..2f8a74837 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -1121,6 +1121,25 @@ void processReplconfUuid(client *c, robj *arg) if (uuid_parse(remoteUUID, c->uuid) != 0) goto LError; + listIter liMi; + listNode *lnMi; + listRewind(g_pserver->masters, &liMi); + + // Enforce a fair ordering for connection, if they attempt to connect before us close them out + // This must be consistent so that both make the same decision of who should proceed first + while ((lnMi = listNext(&liMi))) { + redisMaster *mi = (redisMaster*)listNodeValue(lnMi); + if (mi->repl_state == REPL_STATE_CONNECTED) + continue; + if (FSameUuidNoNil(mi->master_uuid, c->uuid)) { + // Decide based on UUID so both clients make the same decision of which host loses + // otherwise we may entere a loop where neither client can proceed + if (memcmp(mi->master_uuid, c->uuid, UUID_BINARY_LEN) < 0) { + freeClientAsync(c); + } + } + } + char szServerUUID[36 + 2]; // 1 for the '+', another for '\0' szServerUUID[0] = '+'; uuid_unparse(cserver.uuid, szServerUUID+1); From 8919c770fe67c161fb9329d9f716e34c7c6b650d Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 17:50:02 -0400 Subject: [PATCH 81/95] Update endurance.yml Former-commit-id: 88f7739ae00d5eefb34bce741bb78f2b9aca84b6 --- .github/workflows/endurance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index de9b6083c..5d88aa968 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -18,5 +18,5 @@ jobs: - name: test-multithread (5X) run: | sudo apt-get install -y tcl8.5 - ./runtest --loopn 5 --config server-threads 3 --clients 5 + ./runtest --loopn 3 --config server-threads 3 --clients 5 From 9ab9a7684dab96a47056c3bacbd957ddf49dab88 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 21:26:21 -0400 Subject: [PATCH 82/95] PUBSUB test reliability: A client race was erroneously failing tests (test only issue) Former-commit-id: 5147f0153ad1efb827a1709c10cd0f58e6ae65d8 --- tests/unit/pubsub.tcl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl index 9c7a43bf0..6c991ac97 100644 --- a/tests/unit/pubsub.tcl +++ b/tests/unit/pubsub.tcl @@ -107,6 +107,8 @@ start_server {tags {"pubsub"}} { set rd1 [redis_deferring_client] assert_equal {1 2 3} [subscribe $rd1 {chan1 chan2 chan3}] unsubscribe $rd1 + # Wait for a response to the unsub + __consume_subscribe_messages $rd1 unsubscribe {chan1 chan2 chan3} assert_equal 0 [r publish chan1 hello] assert_equal 0 [r publish chan2 hello] assert_equal 0 [r publish chan3 hello] @@ -180,6 +182,8 @@ start_server {tags {"pubsub"}} { set rd1 [redis_deferring_client] assert_equal {1 2 3} [psubscribe $rd1 {chan1.* chan2.* chan3.*}] punsubscribe $rd1 + # Wait for a response to the unsub + __consume_subscribe_messages $rd1 punsubscribe {chan1.* chan2.* chan3.*} assert_equal 0 [r publish chan1.hi hello] assert_equal 0 [r publish chan2.hi hello] assert_equal 0 [r publish chan3.hi hello] From dd45ade779bbbb681126a9e843ac6479f00c0349 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 21:26:39 -0400 Subject: [PATCH 83/95] --loop should run forever Former-commit-id: 9b1e636a6654a4ba3b629b5ad1097caf513ca000 --- tests/test_helper.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 85b89f711..625ef9602 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -613,7 +613,7 @@ for {set j 0} {$j < [llength $argv]} {incr j} { } elseif {$opt eq {--stop}} { set ::stop_on_failure 1 } elseif {$opt eq {--loop}} { - set ::loop 1000 + set ::loop -1 } elseif {$opt eq {--loopn}} { set ::loop [expr $arg - 1] incr j From de0540d34b390a73f7549df9fbe8db602203975e Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 21:39:50 -0400 Subject: [PATCH 84/95] Fix client race in test (test only issue) Former-commit-id: f25aebf2698509a132ebf599374b245efb51e365 --- tests/unit/type/list.tcl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl index 676896a75..9349d8bdf 100644 --- a/tests/unit/type/list.tcl +++ b/tests/unit/type/list.tcl @@ -1,7 +1,10 @@ +# Setting server-threads to 2 is really single threaded because test mode is enabled (no client allocated to thread 1) +# We do this because of the large numbers of nonblocking clients in this tests and the client races that causes start_server { tags {"list"} overrides { "list-max-ziplist-size" 5 + "server-threads 2" } } { source "tests/unit/type/list-common.tcl" From 4b317392be25c25a449cb51b1be76585e4c33c2e Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 22:50:30 -0400 Subject: [PATCH 85/95] Don't start multimaster tests until all nodes are connected Former-commit-id: 202b97eff76501e736a2f0969607e3297e9703a4 --- src/server.cpp | 9 ++++++++ tests/integration/replication-multimaster.tcl | 21 +++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/server.cpp b/src/server.cpp index 3cd5a8cee..bd1170780 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -4640,8 +4640,17 @@ sds genRedisInfoString(const char *section) { listIter li; listNode *ln; listRewind(g_pserver->masters, &li); + bool fAllUp = true; + while ((ln = listNext(&li))) { + redisMaster *mi = (redisMaster*)listNodeValue(ln); + fAllUp = fAllUp && mi->repl_state == REPL_STATE_CONNECTED; + } + + sdscatprintf(info, "all_master_link_status:%s\r\n", + fAllUp ? "up" : "down"); int cmasters = 0; + listRewind(g_pserver->masters, &li); while ((ln = listNext(&li))) { long long slave_repl_offset = 1; diff --git a/tests/integration/replication-multimaster.tcl b/tests/integration/replication-multimaster.tcl index ff7cebac5..86e2ff92e 100644 --- a/tests/integration/replication-multimaster.tcl +++ b/tests/integration/replication-multimaster.tcl @@ -31,13 +31,21 @@ start_server {overrides {hz 500 active-replica yes multi-master yes}} { $R(3) replicaof $R_host(2) $R_port(2) } - after 2000 + test "$topology all nodes up" { + for {set j 0} {$j < 4} {incr j} { + wait_for_condition 50 100 { + [string match {*all_master_link_status:up*} [$R($j) info replication]] + } else { + fail "Multimaster group didn't connect up in a reasonable period of time" + } + } + } test "$topology replicates to all nodes" { $R(0) set testkey foo after 500 for {set n 0} {$n < 4} {incr n} { - wait_for_condition 50 1000 { + wait_for_condition 50 100 { [$R($n) get testkey] == "foo" } else { fail "Failed to replicate to $n" @@ -48,12 +56,17 @@ start_server {overrides {hz 500 active-replica yes multi-master yes}} { test "$topology replicates only once" { $R(0) set testkey 1 after 500 + #wait_for_condition 50 100 { + # [$R(1) get testkey] == 1 && [$R(2) get testkey] == 1 + #} else { + # fail "Set failed to replicate" + #} $R(1) incr testkey after 500 $R(2) incr testkey after 500 for {set n 0} {$n < 4} {incr n} { - wait_for_condition 50 1000 { + wait_for_condition 100 100 { [$R($n) get testkey] == 3 } else { fail "node $n did not replicate" @@ -69,7 +82,7 @@ start_server {overrides {hz 500 active-replica yes multi-master yes}} { $R(0) incr testkey $R(0) exec for {set n 0} {$n < 4} {incr n} { - wait_for_condition 50 1000 { + wait_for_condition 50 100 { [$R($n) get testkey] == 3 } else { fail "node $n failed to replicate" From 9b9c54b4ce6a81c602c5bced54350ead3fe1b40a Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 22:51:02 -0400 Subject: [PATCH 86/95] Do another iteration for endurance Former-commit-id: ac3f8d1ac7af62b01816156b8578f4cbc48709e1 --- .github/workflows/endurance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index 5d88aa968..368ef9515 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -18,5 +18,5 @@ jobs: - name: test-multithread (5X) run: | sudo apt-get install -y tcl8.5 - ./runtest --loopn 3 --config server-threads 3 --clients 5 + ./runtest --loopn 4 --config server-threads 3 --clients 5 From b30fa046f4077cbe2136f8a346945739146bceb4 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 23:22:10 -0400 Subject: [PATCH 87/95] we must always respect the output of sdscat, also change the string so its not a substring of other config params Former-commit-id: e50313b20718bc8df0a53c11b0960e4bcb2177d4 --- src/server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.cpp b/src/server.cpp index bd1170780..4fa6ce725 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -4646,7 +4646,7 @@ sds genRedisInfoString(const char *section) { fAllUp = fAllUp && mi->repl_state == REPL_STATE_CONNECTED; } - sdscatprintf(info, "all_master_link_status:%s\r\n", + info = sdscatprintf(info, "master_global_link_status:%s\r\n", fAllUp ? "up" : "down"); int cmasters = 0; From 08fca5ef31a1c7cdc8ee8f9c6ebfe0907ceffad3 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 23:22:25 -0400 Subject: [PATCH 88/95] sendfile has high latency in some scenarios, don't use it Former-commit-id: 1eb0e3c1c604e71c54423f1d11b8c709c847a516 --- src/config.h | 6 --- src/replication.cpp | 50 ------------------- tests/integration/replication-multimaster.tcl | 2 +- 3 files changed, 1 insertion(+), 57 deletions(-) diff --git a/src/config.h b/src/config.h index 08b9101cb..022cb0033 100644 --- a/src/config.h +++ b/src/config.h @@ -139,12 +139,6 @@ void setproctitle(const char *fmt, ...); /* Byte ordering detection */ #include /* This will likely define BYTE_ORDER */ -/* Define redis_sendfile. */ -#if defined(__linux__) || (defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_5)) -#define HAVE_SENDFILE 1 -ssize_t redis_sendfile(int out_fd, int in_fd, off_t offset, size_t count); -#endif - #ifndef BYTE_ORDER #if (BSD >= 199103) # include diff --git a/src/replication.cpp b/src/replication.cpp index 2f8a74837..67a550cf8 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -1334,40 +1334,6 @@ void removeRDBUsedToSyncReplicas(void) { } } -#if HAVE_SENDFILE -/* Implements redis_sendfile to transfer data between file descriptors and - * avoid transferring data to and from user space. - * - * The function prototype is just like sendfile(2) on Linux. in_fd is a file - * descriptor opened for reading and out_fd is a descriptor opened for writing. - * offset specifies where to start reading data from in_fd. count is the number - * of bytes to copy between the file descriptors. - * - * The return value is the number of bytes written to out_fd, if the transfer - * was successful. On error, -1 is returned, and errno is set appropriately. */ -#if defined(__linux__) -#include -#endif -ssize_t redis_sendfile(int out_fd, int in_fd, off_t offset, size_t count) { -#if defined(__linux__) - return sendfile(out_fd, in_fd, &offset, count); - -#elif defined(__APPLE__) - off_t len = count; - /* Notice that it may return -1 and errno is set to EAGAIN even if some - * bytes have been sent successfully and the len argument is set correctly - * when using a socket marked for non-blocking I/O. */ - if (sendfile(in_fd, out_fd, offset, &len, NULL, 0) == -1 && - errno != EAGAIN) return -1; - else - return (ssize_t)len; - -#endif - errno = ENOSYS; - return -1; -} -#endif - void sendBulkToSlave(connection *conn) { client *replica = (client*)connGetPrivateData(conn); serverAssert(FCorrectThread(replica)); @@ -1404,22 +1370,6 @@ void sendBulkToSlave(connection *conn) { * try to use sendfile system call if supported, unless tls is enabled. * fallback to normal read+write otherwise. */ nwritten = 0; -#if HAVE_SENDFILE - if (!g_pserver->tls_replication && !g_pserver->fActiveReplica) { // sendfile blocks too long for active replication - if ((nwritten = redis_sendfile(conn->fd,replica->repldbfd, - replica->repldboff,PROTO_IOBUF_LEN)) == -1) - { - if (errno != EAGAIN) { - serverLog(LL_WARNING,"Sendfile error sending DB to replica: %s", - strerror(errno)); - ul.unlock(); - aeLock.arm(nullptr); - freeClient(replica); - } - return; - } - } -#endif if (!nwritten) { ssize_t buflen; char buf[PROTO_IOBUF_LEN]; diff --git a/tests/integration/replication-multimaster.tcl b/tests/integration/replication-multimaster.tcl index 86e2ff92e..858cff26a 100644 --- a/tests/integration/replication-multimaster.tcl +++ b/tests/integration/replication-multimaster.tcl @@ -34,7 +34,7 @@ start_server {overrides {hz 500 active-replica yes multi-master yes}} { test "$topology all nodes up" { for {set j 0} {$j < 4} {incr j} { wait_for_condition 50 100 { - [string match {*all_master_link_status:up*} [$R($j) info replication]] + [string match {*master_global_link_status:up*} [$R($j) info replication]] } else { fail "Multimaster group didn't connect up in a reasonable period of time" } From 0a9a32e5d72d766d353ad3ea9081fb9466064d35 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 31 May 2020 23:46:12 -0400 Subject: [PATCH 89/95] Fix module multithreaded test failures Former-commit-id: 1ef35cf466ea944c56974b3795d7d6b5e89f5a3d --- src/ae.cpp | 15 ++++++++++++--- src/ae.h | 2 +- src/module.cpp | 11 ++++++++--- src/networking.cpp | 2 +- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/ae.cpp b/src/ae.cpp index 5eba26b8c..ab5eeb163 100644 --- a/src/ae.cpp +++ b/src/ae.cpp @@ -122,6 +122,7 @@ struct aeCommand AE_ASYNC_OP op; int fd; int mask; + bool fLock = true; union { aePostFunctionProc *proc; aeFileProc *fproc; @@ -166,7 +167,9 @@ void aeProcessCmd(aeEventLoop *eventLoop, int fd, void *, int ) case AE_ASYNC_OP::PostFunction: { - std::unique_lock ulock(g_lock); + std::unique_lock ulock(g_lock, std::defer_lock); + if (cmd.fLock) + ulock.lock(); ((aePostFunctionProc*)cmd.proc)(cmd.clientData); break; } @@ -176,7 +179,9 @@ void aeProcessCmd(aeEventLoop *eventLoop, int fd, void *, int ) if (cmd.pctl != nullptr) cmd.pctl->mutexcv.lock(); - std::unique_lock ulock(g_lock); + std::unique_lock ulock(g_lock, std::defer_lock); + if (cmd.fLock) + ulock.lock(); (*cmd.pfn)(); if (cmd.pctl != nullptr) @@ -236,6 +241,7 @@ int aeCreateRemoteFileEvent(aeEventLoop *eventLoop, int fd, int mask, cmd.fproc = proc; cmd.clientData = clientData; cmd.pctl = nullptr; + cmd.fLock = true; if (fSynchronous) { cmd.pctl = new (MALLOC_LOCAL) aeCommandControl(); @@ -272,13 +278,14 @@ int aePostFunction(aeEventLoop *eventLoop, aePostFunctionProc *proc, void *arg) cmd.op = AE_ASYNC_OP::PostFunction; cmd.proc = proc; cmd.clientData = arg; + cmd.fLock = true; auto size = write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd)); if (size != sizeof(cmd)) return AE_ERR; return AE_OK; } -int aePostFunction(aeEventLoop *eventLoop, std::function fn, bool fSynchronous) +int aePostFunction(aeEventLoop *eventLoop, std::function fn, bool fSynchronous, bool fLock) { if (eventLoop == g_eventLoopThisThread) { @@ -290,6 +297,7 @@ int aePostFunction(aeEventLoop *eventLoop, std::function fn, bool fSynch cmd.op = AE_ASYNC_OP::PostCppFunction; cmd.pfn = new (MALLOC_LOCAL) std::function(fn); cmd.pctl = nullptr; + cmd.fLock = fLock; if (fSynchronous) { cmd.pctl = new (MALLOC_LOCAL) aeCommandControl(); @@ -449,6 +457,7 @@ void aeDeleteFileEventAsync(aeEventLoop *eventLoop, int fd, int mask) cmd.op = AE_ASYNC_OP::DeleteFileEvent; cmd.fd = fd; cmd.mask = mask; + cmd.fLock = true; auto cb = write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd)); AE_ASSERT(cb == sizeof(cmd)); } diff --git a/src/ae.h b/src/ae.h index 156c219ef..3f1ddbf06 100644 --- a/src/ae.h +++ b/src/ae.h @@ -135,7 +135,7 @@ aeEventLoop *aeCreateEventLoop(int setsize); int aePostFunction(aeEventLoop *eventLoop, aePostFunctionProc *proc, void *arg); #ifdef __cplusplus } // EXTERN C -int aePostFunction(aeEventLoop *eventLoop, std::function fn, bool fSynchronous = false); +int aePostFunction(aeEventLoop *eventLoop, std::function fn, bool fSynchronous = false, bool fLock = true); extern "C" { #endif void aeDeleteEventLoop(aeEventLoop *eventLoop); diff --git a/src/module.cpp b/src/module.cpp index b4427630c..859236e72 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -5439,7 +5439,9 @@ RedisModuleTimerID RM_CreateTimer(RedisModuleCtx *ctx, mstime_t period, RedisMod if (memcmp(ri.key,&key,sizeof(key)) == 0) { /* This is the first key, we need to re-install the timer according * to the just added event. */ - aeDeleteTimeEvent(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el,aeTimer); + aePostFunction(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el, [&]{ + aeDeleteTimeEvent(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el,aeTimer); + }, true /* synchronous */, false /* fLock */); aeTimer = -1; } raxStop(&ri); @@ -5447,8 +5449,11 @@ RedisModuleTimerID RM_CreateTimer(RedisModuleCtx *ctx, mstime_t period, RedisMod /* If we have no main timer (the old one was invalidated, or this is the * first module timer we have), install one. */ - if (aeTimer == -1) - aeTimer = aeCreateTimeEvent(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el,period,moduleTimerHandler,NULL,NULL); + if (aeTimer == -1) { + aePostFunction(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el, [&]{ + aeTimer = aeCreateTimeEvent(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el,period,moduleTimerHandler,NULL,NULL); + }, true /* synchronous */, false /* fLock */); + } return key; } diff --git a/src/networking.cpp b/src/networking.cpp index ac248d41f..619db0786 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -2822,7 +2822,7 @@ NULL if (target && target->flags & CLIENT_BLOCKED) { std::unique_lock ul(target->lock); if (unblock_error) - addReplyError(target, + addReplyErrorAsync(target, "-UNBLOCKED client unblocked via CLIENT UNBLOCK"); else replyToBlockedClientTimedOut(target); From 02d602b3fd043bb3c90c07eb862f708aa5ce1bcd Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 1 Jun 2020 00:25:31 -0400 Subject: [PATCH 90/95] Update endurance.yml Former-commit-id: d6be93537ebd31f1c6c1dc63c5d3c299b49efec9 --- .github/workflows/endurance.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/endurance.yml b/.github/workflows/endurance.yml index 368ef9515..24c5f540d 100644 --- a/.github/workflows/endurance.yml +++ b/.github/workflows/endurance.yml @@ -2,7 +2,7 @@ name: Endurance on: schedule: - - cron: '0 * * * *' + - cron: '30 * * * *' jobs: @@ -18,5 +18,5 @@ jobs: - name: test-multithread (5X) run: | sudo apt-get install -y tcl8.5 - ./runtest --loopn 4 --config server-threads 3 --clients 5 + ./runtest --loopn 5 --config server-threads 3 --clients 5 From 9e87395c346ab816cdf3be3bba2983c90e54e858 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 1 Jun 2020 15:33:05 -0400 Subject: [PATCH 91/95] Fix for issue #187 we need to properly handle the case where a key with a subkey expirey itself expires during load Former-commit-id: e6a9a6b428b91b6108df24ae6285ea9b582b7b23 --- src/rdb.cpp | 18 +++++++++++++++++- tests/integration/replication.tcl | 7 ------- tests/support/util.tcl | 7 +++++++ tests/unit/expire.tcl | 17 +++++++++++++++++ 4 files changed, 41 insertions(+), 8 deletions(-) diff --git a/src/rdb.cpp b/src/rdb.cpp index 97f8aebc0..9523d61e7 100644 --- a/src/rdb.cpp +++ b/src/rdb.cpp @@ -2163,6 +2163,7 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { uint64_t mvcc_tstamp = OBJ_MVCC_INVALID; robj *subexpireKey = nullptr; sds key = nullptr; + bool fLastKeyExpired = false; rdb->update_cksum = rdbLoadProgressCallback; rdb->max_processing_chunk = g_pserver->loading_process_events_interval_bytes; @@ -2295,11 +2296,22 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { static_assert(sizeof(unsigned long long) == sizeof(uint64_t), "Ensure long long is 64-bits"); mvcc_tstamp = strtoull(szFromObj(auxval), nullptr, 10); } else if (!strcasecmp(szFromObj(auxkey), "keydb-subexpire-key")) { + if (subexpireKey != nullptr) { + serverLog(LL_WARNING, "Corrupt subexpire entry in RDB skipping. key: %s subkey: %s", key != nullptr ? key : "(null)", subexpireKey != nullptr ? szFromObj(subexpireKey) : "(null)"); + decrRefCount(subexpireKey); + subexpireKey = nullptr; + } subexpireKey = auxval; incrRefCount(subexpireKey); } else if (!strcasecmp(szFromObj(auxkey), "keydb-subexpire-when")) { if (key == nullptr || subexpireKey == nullptr) { - serverLog(LL_WARNING, "Corrupt subexpire entry in RDB skipping. key: %s subkey: %s", key != nullptr ? key : "(null)", subexpireKey != nullptr ? szFromObj(subexpireKey) : "(null)"); + if (!fLastKeyExpired) { // This is not an error if we just expired the key associated with this subexpire + serverLog(LL_WARNING, "Corrupt subexpire entry in RDB skipping. key: %s subkey: %s", key != nullptr ? key : "(null)", subexpireKey != nullptr ? szFromObj(subexpireKey) : "(null)"); + } + if (subexpireKey) { + decrRefCount(subexpireKey); + subexpireKey = nullptr; + } } else { redisObject keyobj; @@ -2404,6 +2416,8 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { rsi->mi->staleKeyMap->operator[](db - g_pserver->db).push_back(objKeyDup); decrRefCount(objKeyDup); } + serverLog(LL_WARNING, "Loaded expired key"); + fLastKeyExpired = true; sdsfree(key); key = nullptr; decrRefCount(val); @@ -2411,6 +2425,8 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { } else { /* Add the new object in the hash table */ int fInserted = dbMerge(db, &keyobj, val, (rsi && rsi->fForceSetKey) || (rdbflags & RDBFLAGS_ALLOW_DUP)); // Note: dbMerge will incrRef + serverLog(LL_WARNING, "Loaded: %s", key); + fLastKeyExpired = false; if (fInserted) { diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 2433517af..b67f5428c 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -1,10 +1,3 @@ -proc log_file_matches {log pattern} { - set fp [open $log r] - set content [read $fp] - close $fp - string match $pattern $content -} - start_server {tags {"repl"} overrides {hz 100}} { set slave [srv 0 client] set slave_host [srv 0 host] diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 402b28a98..1f7a9fb8d 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -1,3 +1,10 @@ +proc log_file_matches {log pattern} { + set fp [open $log r] + set content [read $fp] + close $fp + string match $pattern $content +} + proc randstring {min max {type binary}} { set len [expr {$min+int(rand()*($max-$min+1))}] set output {} diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index ae4dad08b..67b2db4c2 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -272,6 +272,23 @@ start_server {tags {"expire"}} { r expiremember testkey foo 10000 r save r debug reload + if {[log_file_matches [srv 0 stdout] "*Corrupt subexpire*"]} { + fail "Server reported corrupt subexpire" + } assert [expr [r ttl testkey foo] > 0] } + + test {Load subkey for an expired key works} { + # Note test inherits keys from previous tests, we want more traffic in the RDB + r multi + r sadd testset val1 + r expiremember testset val1 300 + r pexpire testset 1 + r debug reload + r exec + set logerr [log_file_matches [srv 0 stdout] "*Corrupt subexpire*"] + if {$logerr} { + fail "Server reported corrupt subexpire" + } + } } From 92de178bfea70f41f61ca928fa7efe2ffc4f5b69 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 1 Jun 2020 16:01:26 -0400 Subject: [PATCH 92/95] PSYNC test reliability improvements (test only issue) Former-commit-id: 50fd4fa7e62f3996f15f6a8c4dcd892022f111ec --- tests/integration/psync2.tcl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index daa9f8666..df4edf7af 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -25,10 +25,12 @@ proc show_cluster_status {} { array set log {} for {set j 0} {$j < 5} {incr j} { set fd [open $R_log($j)] - while {[gets $fd l] >= 0} { + set found 0 + while {[gets $fd l] >= 0 || !$found} { if {[regexp $log_regexp $l] && [regexp -nocase $repl_regexp $l]} { lappend log($j) $l + set found 1 } } close $fd From 4820142896bf8e31f6a7392fb690e090c3f7881e Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 1 Jun 2020 16:13:58 -0400 Subject: [PATCH 93/95] PSYNC test shouldn't wait forever Former-commit-id: 130613e16636923296a8d5b2c4bc623e62fef2f5 --- tests/integration/psync2.tcl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index df4edf7af..4dfa7cddc 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -26,12 +26,14 @@ proc show_cluster_status {} { for {set j 0} {$j < 5} {incr j} { set fd [open $R_log($j)] set found 0 - while {[gets $fd l] >= 0 || !$found} { + set tries 0 + while {([gets $fd l] >= 0 || !$found) && $tries < 1000} { if {[regexp $log_regexp $l] && [regexp -nocase $repl_regexp $l]} { lappend log($j) $l set found 1 } + incr $tries } close $fd } From b2a885c98f698a433de4e58f44a5a42074edb4f3 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 1 Jun 2020 16:34:05 -0400 Subject: [PATCH 94/95] Remove debug logs that shouldn't have been checked in Former-commit-id: 31f58311e3de7441d81dd37bd4396be3b64efec5 --- src/rdb.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/rdb.cpp b/src/rdb.cpp index 9523d61e7..ea82cdbd8 100644 --- a/src/rdb.cpp +++ b/src/rdb.cpp @@ -2416,7 +2416,6 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { rsi->mi->staleKeyMap->operator[](db - g_pserver->db).push_back(objKeyDup); decrRefCount(objKeyDup); } - serverLog(LL_WARNING, "Loaded expired key"); fLastKeyExpired = true; sdsfree(key); key = nullptr; @@ -2425,7 +2424,6 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { } else { /* Add the new object in the hash table */ int fInserted = dbMerge(db, &keyobj, val, (rsi && rsi->fForceSetKey) || (rdbflags & RDBFLAGS_ALLOW_DUP)); // Note: dbMerge will incrRef - serverLog(LL_WARNING, "Loaded: %s", key); fLastKeyExpired = false; if (fInserted) From 419720d5ef4f871f4cd8eeaeb4495eab0e8897d0 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 1 Jun 2020 16:50:30 -0400 Subject: [PATCH 95/95] maxmem tests should be run solo for higher reliability Former-commit-id: 95f46e60779fca715244745ced7b2ab04bbf9e3a --- tests/unit/maxmemory.tcl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl index 3dd706173..e12fedc91 100644 --- a/tests/unit/maxmemory.tcl +++ b/tests/unit/maxmemory.tcl @@ -1,3 +1,4 @@ +run_solo {maxmemory} { start_server {tags {"maxmemory"}} { test "Without maxmemory small integers are shared" { r config set maxmemory 0 @@ -244,4 +245,4 @@ test_slave_buffers {slave buffer are counted correctly} 1000000 10 0 1 # test that slave buffer don't induce eviction # test again with fewer (and bigger) commands without pipeline, but with eviction test_slave_buffers "replica buffer don't induce eviction" 100000 100 1 0 - +} ;# run_solo