Add cluster-allow-replica-migration option. (#5285)

Previously (and by default after commit) when master loose its last slot (due to migration, for example), its replicas will migrate to new last slot holder. There are cases where this is not desired: * Consolidation that results with removed nodes (including the replica, eventually). * Manually configured cluster topologies, which the admin wishes to preserve. Needlessly migrating a replica triggers a full synchronization and can have a negative impact, so we prefer to be able to avoid it where possible. This commit adds 'cluster-allow-replica-migration' configuration option that is enabled by default to preserve existed behavior. When disabled, replicas will not be auto-migrated. Fixes #4896 Co-authored-by: Oran Agra <oran@redislabs.com>
2021-04-04 09:43:24 +03:00 · 2021-04-04 09:43:24 +03:00 · 1cab962098
commit 1cab962098
parent 1eb85249e7
6 changed files with 106 additions and 5 deletions
--- a/redis.conf
+++ b/redis.conf
@ -1456,12 +1456,21 @@ lua-time-limit 5000
 # master in your cluster.
 #
 # Default is 1 (replicas migrate only if their masters remain with at least
-# one replica). To disable migration just set it to a very large value.
+# one replica). To disable migration just set it to a very large value or
+# set cluster-allow-replica-migration to 'no'.
 # A value of 0 can be set but is useful only for debugging and dangerous
 # in production.
 #
 # cluster-migration-barrier 1

+# Turning off this option allows to use less automatic cluster configuration.
+# It both disables migration to orphaned masters and migration from masters
+# that became empty.
+#
+# Default is 'yes' (allow automatic migrations).
+#
+# cluster-allow-replica-migration yes
+
 # By default Redis Cluster nodes stop accepting queries if they detect there
 # is at least a hash slot uncovered (no available node is serving it).
 # This way if the cluster is partially down (for example a range of hash slots
--- a/src/cluster.c
+++ b/src/cluster.c
@ -1633,7 +1633,7 @@ void clusterSetNodeAsMaster(clusterNode *n) {
 * case we receive the info via an UPDATE packet. */
 void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) {
    int j;
-    clusterNode *curmaster, *newmaster = NULL;
+    clusterNode *curmaster = NULL, *newmaster = NULL;
    /* The dirty slots list is a list of slots for which we lose the ownership
     * while having still keys inside. This usually happens after a failover
     * or after a manual cluster reconfiguration operated by the admin.
@ -1644,6 +1644,12 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
    uint16_t dirty_slots[CLUSTER_SLOTS];
    int dirty_slots_count = 0;

+    /* We should detect if sender is new master of our shard.
+     * We will know it if all our slots were migrated to sender, and sender
+     * has no slots except ours */
+    int sender_slots = 0;
+    int migrated_our_slots = 0;
+
    /* Here we set curmaster to this node or the node this node
     * replicates to if it's a slave. In the for loop we are
     * interested to check if slots are taken away from curmaster. */
@ -1656,6 +1662,8 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc

    for (j = 0; j < CLUSTER_SLOTS; j++) {
        if (bitmapTestBit(slots,j)) {
+            sender_slots++;
+
            /* The slot is already bound to the sender of this message. */
            if (server.cluster->slots[j] == sender) continue;

@ -1682,8 +1690,10 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
                    dirty_slots_count++;
                }

-                if (server.cluster->slots[j] == curmaster)
+                if (server.cluster->slots[j] == curmaster) {
                    newmaster = sender;
+                    migrated_our_slots++;
+                }
                clusterDelSlot(j);
                clusterAddSlot(sender,j);
                clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
@ -1706,7 +1716,9 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
     *    master.
     * 2) We are a slave and our master is left without slots. We need
     *    to replicate to the new slots owner. */
-    if (newmaster && curmaster->numslots == 0) {
+    if (newmaster && curmaster->numslots == 0 &&
+            (server.cluster_allow_replica_migration ||
+             sender_slots == migrated_our_slots)) {
        serverLog(LL_WARNING,
            "Configuration change detected. Reconfiguring myself "
            "as a replica of %.40s", sender->name);
@ -3735,7 +3747,8 @@ void clusterCron(void) {
         * the orphaned masters. Note that it does not make sense to try
         * a migration if there is no master with at least *two* working
         * slaves. */
-        if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves)
+        if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves &&
+		server.cluster_allow_replica_migration)
            clusterHandleSlaveMigration(max_slaves);
    }

--- a/src/config.c
+++ b/src/config.c
@ -2438,6 +2438,7 @@ standardConfig configs[] = {
    createBoolConfig("crash-memcheck-enabled", NULL, MODIFIABLE_CONFIG, server.memcheck_enabled, 1, NULL, NULL),
    createBoolConfig("use-exit-on-panic", NULL, MODIFIABLE_CONFIG, server.use_exit_on_panic, 0, NULL, NULL),
    createBoolConfig("disable-thp", NULL, MODIFIABLE_CONFIG, server.disable_thp, 1, NULL, NULL),
+    createBoolConfig("cluster-allow-replica-migration", NULL, MODIFIABLE_CONFIG, server.cluster_allow_replica_migration, 1, NULL, NULL),
    createBoolConfig("replica-announced", NULL, MODIFIABLE_CONFIG, server.replica_announced, 1, NULL, NULL),

    /* String Configs */
--- a/src/server.h
+++ b/src/server.h
@ -1543,6 +1543,7 @@ struct redisServer {
    char *cluster_configfile; /* Cluster auto-generated config file name. */
    struct clusterState *cluster;  /* State of the cluster */
    int cluster_migration_barrier; /* Cluster replicas migration barrier. */
+    int cluster_allow_replica_migration; /* Automatic replica migrations to orphaned masters and from empty masters */
    int cluster_slave_validity_factor; /* Slave max data age for failover. */
    int cluster_require_full_coverage; /* If true, put the cluster down if
                                          there is at least an uncovered slot.*/
--- a/tests/cluster/tests/12-replica-migration-2.tcl
+++ b/tests/cluster/tests/12-replica-migration-2.tcl
@ -29,6 +29,12 @@ test "Each master should have at least two replicas attached" {
    }
 }

+test "Set allow-replica-migration yes" {
+    foreach_redis_id id {
+        R $id CONFIG SET cluster-allow-replica-migration yes
+    }
+}
+
 set master0_id [dict get [get_myself 0] id]
 test "Resharding all the master #0 slots away from it" {
    set output [exec \
--- a/tests/cluster/tests/12.1-replica-migration-3.tcl
+++ b/tests/cluster/tests/12.1-replica-migration-3.tcl
@ -0,0 +1,71 @@
+# Replica migration test #2.
+#
+# Check that if 'cluster-allow-replica-migration' is set to 'no', slaves do not
+# migrate when master becomes empty.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 15 slaves, to make sure there are no
+# empty masters and make rebalancing simpler to handle during the test.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 15
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Each master should have at least two replicas attached" {
+    foreach_redis_id id {
+        if {$id < 5} {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R 0 role] 2]] >= 2
+            } else {
+                fail "Master #$id does not have 2 slaves as expected"
+            }
+        }
+    }
+}
+
+test "Set allow-replica-migration no" {
+    foreach_redis_id id {
+        R $id CONFIG SET cluster-allow-replica-migration no
+    }
+}
+
+set master0_id [dict get [get_myself 0] id]
+test "Resharding all the master #0 slots away from it" {
+    set output [exec \
+        ../../../src/redis-cli --cluster rebalance \
+        127.0.0.1:[get_instance_attrib redis 0 port] \
+        {*}[rediscli_tls_config "../../../tests"] \
+        --cluster-weight ${master0_id}=0 >@ stdout ]
+}
+
+test "Wait cluster to be stable" {
+    wait_for_condition 1000 50 {
+        [catch {exec ../../../src/redis-cli --cluster \
+            check 127.0.0.1:[get_instance_attrib redis 0 port] \
+            {*}[rediscli_tls_config "../../../tests"] \
+            }] == 0
+    } else {
+        fail "Cluster doesn't stabilize"
+    }
+}
+
+test "Master #0 stil should have its replicas" {
+    assert { [llength [lindex [R 0 role] 2]] >= 2 }
+}
+
+test "Each master should have at least two replicas attached" {
+    foreach_redis_id id {
+        if {$id < 5} {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R 0 role] 2]] >= 2
+            } else {
+                fail "Master #$id does not have 2 slaves as expected"
+            }
+        }
+    }
+}
+