diff --git a/redis.conf b/redis.conf index 2663cfce6..e16fc4bbb 100644 --- a/redis.conf +++ b/redis.conf @@ -1456,12 +1456,21 @@ lua-time-limit 5000 # master in your cluster. # # Default is 1 (replicas migrate only if their masters remain with at least -# one replica). To disable migration just set it to a very large value. +# one replica). To disable migration just set it to a very large value or +# set cluster-allow-replica-migration to 'no'. # A value of 0 can be set but is useful only for debugging and dangerous # in production. # # cluster-migration-barrier 1 +# Turning off this option allows to use less automatic cluster configuration. +# It both disables migration to orphaned masters and migration from masters +# that became empty. +# +# Default is 'yes' (allow automatic migrations). +# +# cluster-allow-replica-migration yes + # By default Redis Cluster nodes stop accepting queries if they detect there # is at least a hash slot uncovered (no available node is serving it). # This way if the cluster is partially down (for example a range of hash slots diff --git a/src/cluster.c b/src/cluster.c index 76cfb74b9..d28c7541c 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1633,7 +1633,7 @@ void clusterSetNodeAsMaster(clusterNode *n) { * case we receive the info via an UPDATE packet. */ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) { int j; - clusterNode *curmaster, *newmaster = NULL; + clusterNode *curmaster = NULL, *newmaster = NULL; /* The dirty slots list is a list of slots for which we lose the ownership * while having still keys inside. This usually happens after a failover * or after a manual cluster reconfiguration operated by the admin. @@ -1644,6 +1644,12 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc uint16_t dirty_slots[CLUSTER_SLOTS]; int dirty_slots_count = 0; + /* We should detect if sender is new master of our shard. + * We will know it if all our slots were migrated to sender, and sender + * has no slots except ours */ + int sender_slots = 0; + int migrated_our_slots = 0; + /* Here we set curmaster to this node or the node this node * replicates to if it's a slave. In the for loop we are * interested to check if slots are taken away from curmaster. */ @@ -1656,6 +1662,8 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc for (j = 0; j < CLUSTER_SLOTS; j++) { if (bitmapTestBit(slots,j)) { + sender_slots++; + /* The slot is already bound to the sender of this message. */ if (server.cluster->slots[j] == sender) continue; @@ -1682,8 +1690,10 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc dirty_slots_count++; } - if (server.cluster->slots[j] == curmaster) + if (server.cluster->slots[j] == curmaster) { newmaster = sender; + migrated_our_slots++; + } clusterDelSlot(j); clusterAddSlot(sender,j); clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| @@ -1706,7 +1716,9 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc * master. * 2) We are a slave and our master is left without slots. We need * to replicate to the new slots owner. */ - if (newmaster && curmaster->numslots == 0) { + if (newmaster && curmaster->numslots == 0 && + (server.cluster_allow_replica_migration || + sender_slots == migrated_our_slots)) { serverLog(LL_WARNING, "Configuration change detected. Reconfiguring myself " "as a replica of %.40s", sender->name); @@ -3735,7 +3747,8 @@ void clusterCron(void) { * the orphaned masters. Note that it does not make sense to try * a migration if there is no master with at least *two* working * slaves. */ - if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves) + if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves && + server.cluster_allow_replica_migration) clusterHandleSlaveMigration(max_slaves); } diff --git a/src/config.c b/src/config.c index a29f10e1e..0ddb343b0 100644 --- a/src/config.c +++ b/src/config.c @@ -2438,6 +2438,7 @@ standardConfig configs[] = { createBoolConfig("crash-memcheck-enabled", NULL, MODIFIABLE_CONFIG, server.memcheck_enabled, 1, NULL, NULL), createBoolConfig("use-exit-on-panic", NULL, MODIFIABLE_CONFIG, server.use_exit_on_panic, 0, NULL, NULL), createBoolConfig("disable-thp", NULL, MODIFIABLE_CONFIG, server.disable_thp, 1, NULL, NULL), + createBoolConfig("cluster-allow-replica-migration", NULL, MODIFIABLE_CONFIG, server.cluster_allow_replica_migration, 1, NULL, NULL), createBoolConfig("replica-announced", NULL, MODIFIABLE_CONFIG, server.replica_announced, 1, NULL, NULL), /* String Configs */ diff --git a/src/server.h b/src/server.h index 58d2c9418..98f8ea05f 100644 --- a/src/server.h +++ b/src/server.h @@ -1543,6 +1543,7 @@ struct redisServer { char *cluster_configfile; /* Cluster auto-generated config file name. */ struct clusterState *cluster; /* State of the cluster */ int cluster_migration_barrier; /* Cluster replicas migration barrier. */ + int cluster_allow_replica_migration; /* Automatic replica migrations to orphaned masters and from empty masters */ int cluster_slave_validity_factor; /* Slave max data age for failover. */ int cluster_require_full_coverage; /* If true, put the cluster down if there is at least an uncovered slot.*/ diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl index dd18a979a..ea80d81d3 100644 --- a/tests/cluster/tests/12-replica-migration-2.tcl +++ b/tests/cluster/tests/12-replica-migration-2.tcl @@ -29,6 +29,12 @@ test "Each master should have at least two replicas attached" { } } +test "Set allow-replica-migration yes" { + foreach_redis_id id { + R $id CONFIG SET cluster-allow-replica-migration yes + } +} + set master0_id [dict get [get_myself 0] id] test "Resharding all the master #0 slots away from it" { set output [exec \ diff --git a/tests/cluster/tests/12.1-replica-migration-3.tcl b/tests/cluster/tests/12.1-replica-migration-3.tcl new file mode 100644 index 000000000..46a9f79e3 --- /dev/null +++ b/tests/cluster/tests/12.1-replica-migration-3.tcl @@ -0,0 +1,71 @@ +# Replica migration test #2. +# +# Check that if 'cluster-allow-replica-migration' is set to 'no', slaves do not +# migrate when master becomes empty. + +source "../tests/includes/init-tests.tcl" + +# Create a cluster with 5 master and 15 slaves, to make sure there are no +# empty masters and make rebalancing simpler to handle during the test. +test "Create a 5 nodes cluster" { + create_cluster 5 15 +} + +test "Cluster is up" { + assert_cluster_state ok +} + +test "Each master should have at least two replicas attached" { + foreach_redis_id id { + if {$id < 5} { + wait_for_condition 1000 50 { + [llength [lindex [R 0 role] 2]] >= 2 + } else { + fail "Master #$id does not have 2 slaves as expected" + } + } + } +} + +test "Set allow-replica-migration no" { + foreach_redis_id id { + R $id CONFIG SET cluster-allow-replica-migration no + } +} + +set master0_id [dict get [get_myself 0] id] +test "Resharding all the master #0 slots away from it" { + set output [exec \ + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + {*}[rediscli_tls_config "../../../tests"] \ + --cluster-weight ${master0_id}=0 >@ stdout ] +} + +test "Wait cluster to be stable" { + wait_for_condition 1000 50 { + [catch {exec ../../../src/redis-cli --cluster \ + check 127.0.0.1:[get_instance_attrib redis 0 port] \ + {*}[rediscli_tls_config "../../../tests"] \ + }] == 0 + } else { + fail "Cluster doesn't stabilize" + } +} + +test "Master #0 stil should have its replicas" { + assert { [llength [lindex [R 0 role] 2]] >= 2 } +} + +test "Each master should have at least two replicas attached" { + foreach_redis_id id { + if {$id < 5} { + wait_for_condition 1000 50 { + [llength [lindex [R 0 role] 2]] >= 2 + } else { + fail "Master #$id does not have 2 slaves as expected" + } + } + } +} +