diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 423ea5aab..33ef7054e 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -6271,7 +6271,7 @@ void clusterCommandSetSlot(client *c) { * To mitigate this issue, the following order needs to be enforced for slot * migration finalization such that the replicas finalize the slot ownership * before the primary: - . * + * * 1. Client C issues SETSLOT n NODE B against node B. * 2. Primary B replicates `SETSLOT n NODE B` to all of its replicas (e.g., B', B''). * 3. Upon replication completion, primary B executes `SETSLOT n NODE B` and @@ -6291,17 +6291,25 @@ void clusterCommandSetSlot(client *c) { * non-replicated behavior.*/ listIter li; listNode *ln; - int legacy_replica_found = 0; + int num_eligible_replicas = 0; listRewind(server.replicas, &li); while ((ln = listNext(&li))) { client *r = ln->value; - if (r->replica_version < 0x702ff /* 7.2.255 */) { - legacy_replica_found++; - break; + + /* We think that when the command comes in, the primary only needs to + * wait for the online replicas. The admin can easily check if there + * are replicas that are down for an extended period of time. If they + * decide to move forward anyways, we should not block it. If a replica + * failed right before the replication and was not included in the + * replication, it would also unlikely win the election. + * + * And 0x702ff is 7.2.255, we only support new versions in this case. */ + if (r->repl_state == REPLICA_STATE_ONLINE && r->replica_version > 0x702ff) { + num_eligible_replicas++; } } - if (!legacy_replica_found) { + if (num_eligible_replicas != 0) { forceCommandPropagation(c, PROPAGATE_REPL); /* We are a primary and this is the first time we see this `SETSLOT` * command. Force-replicate the command to all of our replicas @@ -6311,7 +6319,7 @@ void clusterCommandSetSlot(client *c) { * 2. The repl offset target is set to the primary's current repl offset + 1. * There is no concern of partial replication because replicas always * ack the repl offset at the command boundary. */ - blockClientForReplicaAck(c, timeout_ms, server.primary_repl_offset + 1, myself->num_replicas, 0); + blockClientForReplicaAck(c, timeout_ms, server.primary_repl_offset + 1, num_eligible_replicas, 0); /* Mark client as pending command for execution after replication to replicas. */ c->flag.pending_command = 1; replicationRequestAckFromReplicas(); @@ -6320,7 +6328,7 @@ void clusterCommandSetSlot(client *c) { } /* Slot states have been updated on the compatible replicas (if any). - * Now exuecte the command on the primary. */ + * Now execute the command on the primary. */ if (!strcasecmp(c->argv[3]->ptr, "migrating")) { serverLog(LL_NOTICE, "Migrating slot %d to node %.40s (%s)", slot, n->name, n->human_nodename); server.cluster->migrating_slots_to[slot] = n; diff --git a/tests/unit/cluster/slot-migration.tcl b/tests/unit/cluster/slot-migration.tcl index 030404dfd..d79897196 100644 --- a/tests/unit/cluster/slot-migration.tcl +++ b/tests/unit/cluster/slot-migration.tcl @@ -435,3 +435,35 @@ start_cluster 2 0 {tags {tls:skip external:skip cluster regression} overrides {c R 0 MIGRATE 127.0.0.1 [lindex [R 1 CONFIG GET port] 1] $stream_name 0 5000 } } + +start_cluster 3 6 {tags {external:skip cluster} overrides {cluster-node-timeout 1000} } { + test "Slot migration is ok when the replicas are down" { + # Killing all replicas in primary 0. + assert_equal 2 [s 0 connected_slaves] + catch {R 3 shutdown nosave} + catch {R 6 shutdown nosave} + wait_for_condition 50 100 { + [s 0 connected_slaves] == 0 + } else { + fail "The replicas in primary 0 are still connecting" + } + + # Killing one replica in primary 1. + assert_equal 2 [s -1 connected_slaves] + catch {R 4 shutdown nosave} + wait_for_condition 50 100 { + [s -1 connected_slaves] == 1 + } else { + fail "The replica in primary 1 is still connecting" + } + + # Check slot migration is ok when the replicas are down. + migrate_slot 0 1 0 + migrate_slot 0 2 1 + assert_equal {OK} [R 0 CLUSTER SETSLOT 0 NODE [R 1 CLUSTER MYID]] + assert_equal {OK} [R 0 CLUSTER SETSLOT 1 NODE [R 2 CLUSTER MYID]] + wait_for_slot_state 0 "" + wait_for_slot_state 1 "" + wait_for_slot_state 2 "" + } +}