From 45ae39af04bd2533c3ce5eebd7101b3baaedfa9e Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 6 Sep 2024 13:19:50 +0800 Subject: [PATCH] Fix missing replication link re-connection when primary's IP/port is updated in `clusterProcessGossipSection` (#965) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `clusterProcessGossipSection` currently doesn't trigger a check and call `replicationSetPrimary` when `myself`'s primary node’s IP/port is updated. This fix ensures that after every node address update, `replicationSetPrimary` is called if the updated node is `myself`'s primary. This prevents missed updates and ensures that replicas reconnect properly to maintain their replication link with the primary. Signed-off-by: Madelyn Olson --- src/cluster.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/cluster.c b/src/cluster.c index cebe4b9c8..207828450 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -2219,6 +2219,23 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { node->tls_port = msg_tls_port; node->cport = ntohs(g->cport); node->flags &= ~CLUSTER_NODE_NOADDR; + + serverLog(LL_NOTICE,"Address updated for node %.40s (%s), now %s:%d", + node->name, node->human_nodename, node->ip, getNodeDefaultClientPort(node)); + + /* Check if this is our primary and we have to change the + * replication target as well. + * + * This is needed in case the check in nodeUpdateAddressIfNeeded + * failed due to a race condition. For example, if the replica just + * received a packet from another node that contains new address + * about the primary, we will update primary node address in here, + * when the replica receive the packet from the primary, the check + * in nodeUpdateAddressIfNeeded will fail since the address has been + * updated correctly, and we will not have the opportunity to call + * replicationSetPrimary and update the primary host. */ + if (nodeIsSlave(myself) && myself->slaveof == node) + replicationSetMaster(node->ip, getNodeDefaultReplicationPort(node)); } } else { /* If it's not in NOADDR state and we don't have it, we