From 8945067544706951fd60007ad8cba8e8941b970d Mon Sep 17 00:00:00 2001 From: weimeng Date: Sun, 28 Aug 2022 16:37:26 +0800 Subject: [PATCH] bugfix:del keys in slot replicate to replica, and trigger other invalidations (#11084) Bugfix: with the scenario if we force assigned a slot to other master, old master will lose the slot ownership, then old master will call the function delKeysInSlot() to delete all keys which in the slot. These delete operations should replicate to replicas, avoid the data divergence issue in master and replicas. Additionally, in this case, we now call: * signalModifiedKey (to invalidate WATCH) * moduleNotifyKeyspaceEvent (key space notification for modules) * dirty++ (to signal that the persistence file may be outdated) Co-authored-by: weimeng Co-authored-by: Madelyn Olson --- src/cluster.c | 5 +++ tests/test_helper.tcl | 1 + tests/unit/cluster/slot-ownership.tcl | 61 +++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 tests/unit/cluster/slot-ownership.tcl diff --git a/src/cluster.c b/src/cluster.c index 95c9359bc..210b66e0f 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -7077,8 +7077,13 @@ unsigned int delKeysInSlot(unsigned int hashslot) { de = dictEntryNextInSlot(de); robj *key = createStringObject(sdskey, sdslen(sdskey)); dbDelete(&server.db[0], key); + propagateDeletion(&server.db[0], key, server.lazyfree_lazy_server_del); + propagatePendingCommands(); + signalModifiedKey(NULL, &server.db[0], key); + moduleNotifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, server.db[0].id); decrRefCount(key); j++; + server.dirty++; } return j; } diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 27c817e08..44a4f2e15 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -100,6 +100,7 @@ set ::all_tests { unit/cluster/scripting unit/cluster/hostnames unit/cluster/multi-slot-operations + unit/cluster/slot-ownership } # Index to the next test to run in the ::all_tests list. set ::next_test 0 diff --git a/tests/unit/cluster/slot-ownership.tcl b/tests/unit/cluster/slot-ownership.tcl new file mode 100644 index 000000000..0f3e3cc4f --- /dev/null +++ b/tests/unit/cluster/slot-ownership.tcl @@ -0,0 +1,61 @@ +start_cluster 2 2 {tags {external:skip cluster}} { + + test "Verify that slot ownership transfer through gossip propagates deletes to replicas" { + assert {[s -2 role] eq {slave}} + wait_for_condition 1000 50 { + [s -2 master_link_status] eq {up} + } else { + fail "Instance #2 master link status is not up" + } + + assert {[s -3 role] eq {slave}} + wait_for_condition 1000 50 { + [s -3 master_link_status] eq {up} + } else { + fail "Instance #3 master link status is not up" + } + + # Set a single key that will be used to test deletion + set key "FOO" + R 0 SET $key TEST + set key_slot [R 0 cluster keyslot $key] + set slot_keys_num [R 0 cluster countkeysinslot $key_slot] + assert {$slot_keys_num > 0} + + # Wait for replica to have the key + R 2 readonly + wait_for_condition 1000 50 { + [R 2 exists $key] eq "1" + } else { + fail "Test key was not replicated" + } + + assert_equal [R 2 cluster countkeysinslot $key_slot] $slot_keys_num + + # Assert other shards in cluster doesn't have the key + assert_equal [R 1 cluster countkeysinslot $key_slot] "0" + assert_equal [R 3 cluster countkeysinslot $key_slot] "0" + + set nodeid [R 1 cluster myid] + + R 1 cluster bumpepoch + # Move $key_slot to node 1 + assert_equal [R 1 cluster setslot $key_slot node $nodeid] "OK" + + wait_for_cluster_propagation + + # src master will delete keys in the slot + wait_for_condition 50 100 { + [R 0 cluster countkeysinslot $key_slot] eq 0 + } else { + fail "master 'countkeysinslot $key_slot' did not eq 0" + } + + # src replica will delete keys in the slot + wait_for_condition 50 100 { + [R 2 cluster countkeysinslot $key_slot] eq 0 + } else { + fail "replica 'countkeysinslot $key_slot' did not eq 0" + } + } +}