From d72ba06dd0519fd0bf578cca2a2f5c457629dc6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Fri, 24 May 2024 17:58:03 +0200 Subject: [PATCH] Make cluster replicas return ASK and TRYAGAIN (#495) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After READONLY, make a cluster replica behave as its primary regarding returning ASK redirects and TRYAGAIN. Without this patch, a client reading from a replica cannot tell if a key doesn't exist or if it has already been migrated to another shard as part of an ongoing slot migration. Therefore, without an ASK redirect in this situation, offloading reads to cluster replicas wasn't reliable. Note: The target of a redirect is always a primary. If a client wants to continue reading from a replica after following a redirect, it needs to figure out the replicas of that new primary using CLUSTER SHARDS or similar. This is related to #21 and has been made possible by the introduction of Replication of Slot Migration States in #445. ---- Release notes: During cluster slot migration, replicas are able to return -ASK redirects and -TRYAGAIN. --------- Signed-off-by: Viktor Söderqvist --- src/cluster.c | 14 +++++--- tests/unit/cluster/slot-migration.tcl | 48 +++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index d30d7e19b..71d1cc912 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1048,10 +1048,12 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int * can safely serve the request, otherwise we return a TRYAGAIN * error). To do so we set the importing/migrating state and * increment a counter for every missing key. */ - if (n == myself && getMigratingSlotDest(slot) != NULL) { - migrating_slot = 1; - } else if (getImportingSlotSource(slot) != NULL) { - importing_slot = 1; + if (clusterNodeIsMaster(myself) || c->flags & CLIENT_READONLY) { + if (n == clusterNodeGetMaster(myself) && getMigratingSlotDest(slot) != NULL) { + migrating_slot = 1; + } else if (getImportingSlotSource(slot) != NULL) { + importing_slot = 1; + } } } else { /* If it is not the first key/channel, make sure it is exactly @@ -1120,7 +1122,9 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int /* MIGRATE always works in the context of the local node if the slot * is open (migrating or importing state). We need to be able to freely * move keys among instances in this case. */ - if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand) return myself; + if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand && clusterNodeIsMaster(myself)) { + return myself; + } /* If we don't have all the keys and we are migrating the slot, send * an ASK redirection or TRYAGAIN. */ diff --git a/tests/unit/cluster/slot-migration.tcl b/tests/unit/cluster/slot-migration.tcl index d2cfa8e2c..d141ccc5e 100644 --- a/tests/unit/cluster/slot-migration.tcl +++ b/tests/unit/cluster/slot-migration.tcl @@ -71,6 +71,7 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica set R3_id [R 3 CLUSTER MYID] set R4_id [R 4 CLUSTER MYID] set R5_id [R 5 CLUSTER MYID] + R 0 SET "{aga}2" banana test "Slot migration states are replicated" { # Validate initial states @@ -139,8 +140,51 @@ start_cluster 3 3 {tags {external:skip cluster} overrides {cluster-allow-replica assert_equal [get_open_slots 3] "\[609->-$R1_id\]" assert_equal [get_open_slots 4] "\[609-<-$R0_id\]" catch {[R 3 get aga]} e - assert_equal {MOVED} [lindex [split $e] 0] - assert_equal {609} [lindex [split $e] 1] + set port0 [srv 0 port] + assert_equal "MOVED 609 127.0.0.1:$port0" $e + } + + test "Replica of migrating node returns ASK redirect after READONLY" { + # Validate initial states + assert_equal [get_open_slots 0] "\[609->-$R1_id\]" + assert_equal [get_open_slots 1] "\[609-<-$R0_id\]" + assert_equal [get_open_slots 3] "\[609->-$R1_id\]" + assert_equal [get_open_slots 4] "\[609-<-$R0_id\]" + # Read missing key in readonly replica in migrating state. + assert_equal OK [R 3 READONLY] + set port1 [srv -1 port] + catch {[R 3 get aga]} e + assert_equal "ASK 609 127.0.0.1:$port1" $e + assert_equal OK [R 3 READWRITE] + } + + test "Replica of migrating node returns TRYAGAIN after READONLY" { + # Validate initial states + assert_equal [get_open_slots 0] "\[609->-$R1_id\]" + assert_equal [get_open_slots 1] "\[609-<-$R0_id\]" + assert_equal [get_open_slots 3] "\[609->-$R1_id\]" + assert_equal [get_open_slots 4] "\[609-<-$R0_id\]" + # Read some existing and some missing keys in readonly replica in + # migrating state results in TRYAGAIN, just like its primary would do. + assert_equal OK [R 3 READONLY] + catch {[R 3 mget "{aga}1" "{aga}2"]} e + assert_match "TRYAGAIN *" $e + assert_equal OK [R 3 READWRITE] + } + + test "Replica of importing node returns TRYAGAIN after READONLY and ASKING" { + # Validate initial states + assert_equal [get_open_slots 0] "\[609->-$R1_id\]" + assert_equal [get_open_slots 1] "\[609-<-$R0_id\]" + assert_equal [get_open_slots 3] "\[609->-$R1_id\]" + assert_equal [get_open_slots 4] "\[609-<-$R0_id\]" + # A client follows an ASK redirect to a primary, but wants to read from a replica. + # The replica returns TRYAGAIN just like a primary would do for two missing keys. + assert_equal OK [R 4 READONLY] + assert_equal OK [R 4 ASKING] + catch {R 4 MGET "{aga}1" "{aga}2"} e + assert_match "TRYAGAIN *" $e + assert_equal OK [R 4 READWRITE] } test "New replica inherits migrating slot" {