From 1cab962098a61adc06b1455b93d65e315b7f64e4 Mon Sep 17 00:00:00 2001
From: Sokolov Yura <funny.falcon@gmail.com>
Date: Sun, 4 Apr 2021 09:43:24 +0300
Subject: [PATCH] Add cluster-allow-replica-migration option. (#5285)

Previously (and by default after commit) when master loose its last slot
(due to migration, for example), its replicas will migrate to new last slot
holder.

There are cases where this is not desired:
* Consolidation that results with removed nodes (including the replica, eventually).
* Manually configured cluster topologies, which the admin wishes to preserve.

Needlessly migrating a replica triggers a full synchronization and can have a negative impact, so
we prefer to be able to avoid it where possible.

This commit adds 'cluster-allow-replica-migration' configuration option that is
enabled by default to preserve existed behavior. When disabled, replicas will
not be auto-migrated.

Fixes #4896

Co-authored-by: Oran Agra <oran@redislabs.com>
---
 redis.conf                                    | 11 ++-
 src/cluster.c                                 | 21 ++++--
 src/config.c                                  |  1 +
 src/server.h                                  |  1 +
 .../cluster/tests/12-replica-migration-2.tcl  |  6 ++
 .../tests/12.1-replica-migration-3.tcl        | 71 +++++++++++++++++++
 6 files changed, 106 insertions(+), 5 deletions(-)
 create mode 100644 tests/cluster/tests/12.1-replica-migration-3.tcl

diff --git a/redis.conf b/redis.conf
index 2663cfce6..e16fc4bbb 100644
--- a/redis.conf
+++ b/redis.conf
@@ -1456,12 +1456,21 @@ lua-time-limit 5000
 # master in your cluster.
 #
 # Default is 1 (replicas migrate only if their masters remain with at least
-# one replica). To disable migration just set it to a very large value.
+# one replica). To disable migration just set it to a very large value or
+# set cluster-allow-replica-migration to 'no'.
 # A value of 0 can be set but is useful only for debugging and dangerous
 # in production.
 #
 # cluster-migration-barrier 1
 
+# Turning off this option allows to use less automatic cluster configuration.
+# It both disables migration to orphaned masters and migration from masters
+# that became empty.
+#
+# Default is 'yes' (allow automatic migrations).
+#
+# cluster-allow-replica-migration yes
+
 # By default Redis Cluster nodes stop accepting queries if they detect there
 # is at least a hash slot uncovered (no available node is serving it).
 # This way if the cluster is partially down (for example a range of hash slots
diff --git a/src/cluster.c b/src/cluster.c
index 76cfb74b9..d28c7541c 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -1633,7 +1633,7 @@ void clusterSetNodeAsMaster(clusterNode *n) {
  * case we receive the info via an UPDATE packet. */
 void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) {
     int j;
-    clusterNode *curmaster, *newmaster = NULL;
+    clusterNode *curmaster = NULL, *newmaster = NULL;
     /* The dirty slots list is a list of slots for which we lose the ownership
      * while having still keys inside. This usually happens after a failover
      * or after a manual cluster reconfiguration operated by the admin.
@@ -1644,6 +1644,12 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
     uint16_t dirty_slots[CLUSTER_SLOTS];
     int dirty_slots_count = 0;
 
+    /* We should detect if sender is new master of our shard.
+     * We will know it if all our slots were migrated to sender, and sender
+     * has no slots except ours */
+    int sender_slots = 0;
+    int migrated_our_slots = 0;
+
     /* Here we set curmaster to this node or the node this node
      * replicates to if it's a slave. In the for loop we are
      * interested to check if slots are taken away from curmaster. */
@@ -1656,6 +1662,8 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
 
     for (j = 0; j < CLUSTER_SLOTS; j++) {
         if (bitmapTestBit(slots,j)) {
+            sender_slots++;
+
             /* The slot is already bound to the sender of this message. */
             if (server.cluster->slots[j] == sender) continue;
 
@@ -1682,8 +1690,10 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
                     dirty_slots_count++;
                 }
 
-                if (server.cluster->slots[j] == curmaster)
+                if (server.cluster->slots[j] == curmaster) {
                     newmaster = sender;
+                    migrated_our_slots++;
+                }
                 clusterDelSlot(j);
                 clusterAddSlot(sender,j);
                 clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
@@ -1706,7 +1716,9 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
      *    master.
      * 2) We are a slave and our master is left without slots. We need
      *    to replicate to the new slots owner. */
-    if (newmaster && curmaster->numslots == 0) {
+    if (newmaster && curmaster->numslots == 0 &&
+            (server.cluster_allow_replica_migration ||
+             sender_slots == migrated_our_slots)) {
         serverLog(LL_WARNING,
             "Configuration change detected. Reconfiguring myself "
             "as a replica of %.40s", sender->name);
@@ -3735,7 +3747,8 @@ void clusterCron(void) {
          * the orphaned masters. Note that it does not make sense to try
          * a migration if there is no master with at least *two* working
          * slaves. */
-        if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves)
+        if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves &&
+		server.cluster_allow_replica_migration)
             clusterHandleSlaveMigration(max_slaves);
     }
 
diff --git a/src/config.c b/src/config.c
index a29f10e1e..0ddb343b0 100644
--- a/src/config.c
+++ b/src/config.c
@@ -2438,6 +2438,7 @@ standardConfig configs[] = {
     createBoolConfig("crash-memcheck-enabled", NULL, MODIFIABLE_CONFIG, server.memcheck_enabled, 1, NULL, NULL),
     createBoolConfig("use-exit-on-panic", NULL, MODIFIABLE_CONFIG, server.use_exit_on_panic, 0, NULL, NULL),
     createBoolConfig("disable-thp", NULL, MODIFIABLE_CONFIG, server.disable_thp, 1, NULL, NULL),
+    createBoolConfig("cluster-allow-replica-migration", NULL, MODIFIABLE_CONFIG, server.cluster_allow_replica_migration, 1, NULL, NULL),
     createBoolConfig("replica-announced", NULL, MODIFIABLE_CONFIG, server.replica_announced, 1, NULL, NULL),
 
     /* String Configs */
diff --git a/src/server.h b/src/server.h
index 58d2c9418..98f8ea05f 100644
--- a/src/server.h
+++ b/src/server.h
@@ -1543,6 +1543,7 @@ struct redisServer {
     char *cluster_configfile; /* Cluster auto-generated config file name. */
     struct clusterState *cluster;  /* State of the cluster */
     int cluster_migration_barrier; /* Cluster replicas migration barrier. */
+    int cluster_allow_replica_migration; /* Automatic replica migrations to orphaned masters and from empty masters */
     int cluster_slave_validity_factor; /* Slave max data age for failover. */
     int cluster_require_full_coverage; /* If true, put the cluster down if
                                           there is at least an uncovered slot.*/
diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl
index dd18a979a..ea80d81d3 100644
--- a/tests/cluster/tests/12-replica-migration-2.tcl
+++ b/tests/cluster/tests/12-replica-migration-2.tcl
@@ -29,6 +29,12 @@ test "Each master should have at least two replicas attached" {
     }
 }
 
+test "Set allow-replica-migration yes" {
+    foreach_redis_id id {
+        R $id CONFIG SET cluster-allow-replica-migration yes
+    }
+}
+
 set master0_id [dict get [get_myself 0] id]
 test "Resharding all the master #0 slots away from it" {
     set output [exec \
diff --git a/tests/cluster/tests/12.1-replica-migration-3.tcl b/tests/cluster/tests/12.1-replica-migration-3.tcl
new file mode 100644
index 000000000..46a9f79e3
--- /dev/null
+++ b/tests/cluster/tests/12.1-replica-migration-3.tcl
@@ -0,0 +1,71 @@
+# Replica migration test #2.
+#
+# Check that if 'cluster-allow-replica-migration' is set to 'no', slaves do not
+# migrate when master becomes empty.
+
+source "../tests/includes/init-tests.tcl"
+
+# Create a cluster with 5 master and 15 slaves, to make sure there are no
+# empty masters and make rebalancing simpler to handle during the test.
+test "Create a 5 nodes cluster" {
+    create_cluster 5 15
+}
+
+test "Cluster is up" {
+    assert_cluster_state ok
+}
+
+test "Each master should have at least two replicas attached" {
+    foreach_redis_id id {
+        if {$id < 5} {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R 0 role] 2]] >= 2
+            } else {
+                fail "Master #$id does not have 2 slaves as expected"
+            }
+        }
+    }
+}
+
+test "Set allow-replica-migration no" {
+    foreach_redis_id id {
+        R $id CONFIG SET cluster-allow-replica-migration no
+    }
+}
+
+set master0_id [dict get [get_myself 0] id]
+test "Resharding all the master #0 slots away from it" {
+    set output [exec \
+        ../../../src/redis-cli --cluster rebalance \
+        127.0.0.1:[get_instance_attrib redis 0 port] \
+        {*}[rediscli_tls_config "../../../tests"] \
+        --cluster-weight ${master0_id}=0 >@ stdout ]
+}
+
+test "Wait cluster to be stable" {
+    wait_for_condition 1000 50 {
+        [catch {exec ../../../src/redis-cli --cluster \
+            check 127.0.0.1:[get_instance_attrib redis 0 port] \
+            {*}[rediscli_tls_config "../../../tests"] \
+            }] == 0
+    } else {
+        fail "Cluster doesn't stabilize"
+    }
+}
+
+test "Master #0 stil should have its replicas" {
+    assert { [llength [lindex [R 0 role] 2]] >= 2 }
+}
+
+test "Each master should have at least two replicas attached" {
+    foreach_redis_id id {
+        if {$id < 5} {
+            wait_for_condition 1000 50 {
+                [llength [lindex [R 0 role] 2]] >= 2
+            } else {
+                fail "Master #$id does not have 2 slaves as expected"
+            }
+        }
+    }
+}
+