From d98d1ad574076d05b83b1e3a7ed25c95377385fc Mon Sep 17 00:00:00 2001 From: GutovskyMaria <48364560+gutovsky@users.noreply.github.com> Date: Thu, 7 Oct 2021 08:22:27 +0300 Subject: [PATCH] Hide empty and loading replicas from CLUSTER SLOTS responses (#9287) Hide empty and loading replicas from CLUSTER SLOTS responses --- src/cluster.c | 20 +++- tests/cluster/tests/22-replica-in-sync.tcl | 103 +++++++++++++++++++++ 2 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 tests/cluster/tests/22-replica-in-sync.tcl diff --git a/src/cluster.c b/src/cluster.c index f5291c499..9d1f910b2 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4417,6 +4417,24 @@ int getSlotOrReply(client *c, robj *o) { return (int) slot; } +/* Returns an indication if the replica node is fully available + * and should be listed in CLUSTER SLOTS response. + * Returns 1 for available nodes, 0 for nodes that have + * not finished their initial sync, in failed state, or are + * otherwise considered not available to serve read commands. */ +static int isReplicaAvailable(clusterNode *node) { + if (nodeFailed(node)) { + return 0; + } + long long repl_offset = node->repl_offset; + if (node->flags & CLUSTER_NODE_MYSELF) { + /* Nodes do not update their own information + * in the cluster node list. */ + repl_offset = replicationGetSlaveOffset(); + } + return (repl_offset != 0); +} + void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, int end_slot) { int i, nested_elements = 3; /* slots (2) + master addr (1) */ void *nested_replylen = addReplyDeferredLen(c); @@ -4434,7 +4452,7 @@ void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, in for (i = 0; i < node->numslaves; i++) { /* This loop is copy/pasted from clusterGenNodeDescription() * with modifications for per-slot node aggregation. */ - if (nodeFailed(node->slaves[i])) continue; + if (!isReplicaAvailable(node->slaves[i])) continue; addReplyArrayLen(c, 3); addReplyBulkCString(c, node->slaves[i]->ip); /* Report slave's non-TLS port to non-TLS client in TLS cluster */ diff --git a/tests/cluster/tests/22-replica-in-sync.tcl b/tests/cluster/tests/22-replica-in-sync.tcl new file mode 100644 index 000000000..c147cb068 --- /dev/null +++ b/tests/cluster/tests/22-replica-in-sync.tcl @@ -0,0 +1,103 @@ +source "../tests/includes/init-tests.tcl" + +test "Create a 1 node cluster" { + create_cluster 1 0 +} + +test "Cluster is up" { + assert_cluster_state ok +} + +test "Cluster is writable" { + cluster_write_test 0 +} + +proc is_in_slots {master_id replica} { + set slots [R $master_id cluster slots] + set found_position [string first $replica $slots] + set result [expr {$found_position != -1}] + return $result +} + +proc is_replica_online {info_repl} { + set found_position [string first "state=online" $info_repl] + set result [expr {$found_position != -1}] + return $result +} + +set master_id 0 + +test "Fill up" { + R $master_id debug populate 10000000 key 100 +} + +test "Add new node as replica" { + set replica_id [cluster_find_available_slave 1] + set master_myself [get_myself $master_id] + set replica_myself [get_myself $replica_id] + set replica [dict get $replica_myself id] + R $replica_id cluster replicate [dict get $master_myself id] +} + +test "Check digest and replica state" { + R 1 readonly + wait_for_condition 1000 50 { + [is_in_slots $master_id $replica] + } else { + fail "New replica didn't appear in the slots" + } + wait_for_condition 1000 50 { + [is_replica_online [R $master_id info replication]] + } else { + fail "Replica is down for too long" + } + set replica_digest [R $replica_id debug digest] + assert {$replica_digest ne 0} +} + +test "Replica in loading state is hidden" { + # Kill replica client for master and load new data to the primary + R $master_id multi + R $master_id config set repl-backlog-size 100 + R $master_id client kill type replica + set num 10000 + set value [string repeat A 1024] + for {set j 0} {$j < $num} {incr j} { + set key "{0}" + append key $j + R $master_id set key $value + } + R $master_id exec + + # Check that replica started loading + wait_for_condition 1000 50 { + [s $replica_id loading] eq 1 + } else { + fail "Replica didn't enter loading state" + } + # Check that replica is not in cluster slots + assert {![is_in_slots $master_id $replica]} + + # Wait for sync to finish + wait_for_condition 1000 50 { + [s $replica_id loading] eq 0 + } else { + fail "Replica is in loading state for too long" + } + + # Check replica is back to cluster slots + wait_for_condition 1000 50 { + [is_in_slots $master_id $replica] + } else { + fail "Replica is not back to slots" + } +} + +test "Check disconnected replica not hidden from slots" { + # Disconnect replica from primary + R $master_id client kill type replica + # Check master to have no replicas + assert {[s $master_id connected_slaves] == 0} + # Check that replica is still in the cluster slots + assert {[is_in_slots $master_id $replica]} +}