From 9b51949abe3616148437a914eab49c1a6a53c599 Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 6 Sep 2024 13:19:50 +0800 Subject: [PATCH] Fix missing replication link re-connection when primary's IP/port is updated in `clusterProcessGossipSection` (#965) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `clusterProcessGossipSection` currently doesn't trigger a check and call `replicationSetPrimary` when `myself`'s primary node’s IP/port is updated. This fix ensures that after every node address update, `replicationSetPrimary` is called if the updated node is `myself`'s primary. This prevents missed updates and ensures that replicas reconnect properly to maintain their replication link with the primary. --- src/cluster_legacy.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 21bdd0991..beba1c2b5 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2275,6 +2275,23 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { node->tls_port = msg_tls_port; node->cport = ntohs(g->cport); node->flags &= ~CLUSTER_NODE_NOADDR; + + serverLog(LL_NOTICE, "Address updated for node %.40s (%s), now %s:%d", node->name, node->human_nodename, + node->ip, getNodeDefaultClientPort(node)); + + /* Check if this is our primary and we have to change the + * replication target as well. + * + * This is needed in case the check in nodeUpdateAddressIfNeeded + * failed due to a race condition. For example, if the replica just + * received a packet from another node that contains new address + * about the primary, we will update primary node address in here, + * when the replica receive the packet from the primary, the check + * in nodeUpdateAddressIfNeeded will fail since the address has been + * updated correctly, and we will not have the opportunity to call + * replicationSetPrimary and update the primary host. */ + if (nodeIsReplica(myself) && myself->replicaof == node) + replicationSetPrimary(node->ip, getNodeDefaultReplicationPort(node), 0); } } else if (!node) { /* If it's not in NOADDR state and we don't have it, we