From 5a2f4a1e94dac4c74717bea2e3ba2a4697c3ddb6 Mon Sep 17 00:00:00 2001 From: Binbin <binloveplay1314@qq.com> Date: Sun, 7 Jan 2024 12:24:41 +0800 Subject: [PATCH] Use shard-id of the master if the replica does not support shard-id (#12805) If there are nodes in the cluster that do not support shard-id, they will gossip shard-id. From the perspective of nodes that support shard-id, their shard-id is meaningless (since shard-id is randomly generated when we create a node.) Nodes that support shard-id will save the shard-id information in nodes.conf. If the node is restarted according to nodes.conf, the server will report a corrupted cluster config file error. Because auxShardIdSetter will reject configurations with inconsistent master-replica shard-ids. A cluster-wide consensus for the node's shard_id is not necessary. The key is maintaining consistency of the shard_id on each individual 7.2 node. As the cluster progressively upgrades to version 7.2, we can expect the shard_ids across all nodes to naturally converge and align. In this PR, when processing the gossip, if sender is a replica and does not support shard-id, set the shard_id to the shard_id of its master. (cherry picked from commit 4cae66f5e803c527c4e6141c06b94670162eca2c) --- src/cluster.c | 20 +++++++++++++++++++- src/cluster.h | 1 + 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index a752ad1bf..e4b2d0f53 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -2675,11 +2675,24 @@ void clusterProcessPingExtensions(clusterMsg *hdr, clusterLink *link) { /* We know this will be valid since we validated it ahead of time */ ext = getNextPingExt(ext); } + /* If the node did not send us a hostname extension, assume * they don't have an announced hostname. Otherwise, we'll * set it now. */ updateAnnouncedHostname(sender, ext_hostname); updateAnnouncedHumanNodename(sender, ext_humannodename); + + /* If the node did not send us a shard-id extension, it means the sender + * does not support it (old version), node->shard_id is randomly generated. + * A cluster-wide consensus for the node's shard_id is not necessary. + * The key is maintaining consistency of the shard_id on each individual 7.2 node. + * As the cluster progressively upgrades to version 7.2, we can expect the shard_ids + * across all nodes to naturally converge and align. + * + * If sender is a replica, set the shard_id to the shard_id of its master. + * Otherwise, we'll set it now. */ + if (ext_shardid == NULL) ext_shardid = clusterNodeGetMaster(sender)->shard_id; + updateShardId(sender, ext_shardid); } @@ -5710,7 +5723,7 @@ void addShardReplyForClusterShards(client *c, list *nodes) { addReplyBulkCString(c, "slots"); /* Use slot_info_pairs from the primary only */ - while (n->slaveof != NULL) n = n->slaveof; + n = clusterNodeGetMaster(n); if (n->slot_info_pairs != NULL) { serverAssert((n->slot_info_pairs_count % 2) == 0); @@ -7644,6 +7657,11 @@ unsigned int countKeysInSlot(unsigned int hashslot) { return (*server.db->slots_to_keys).by_slot[hashslot].count; } +clusterNode *clusterNodeGetMaster(clusterNode *node) { + while (node->slaveof != NULL) node = node->slaveof; + return node; +} + /* ----------------------------------------------------------------------------- * Operation(s) on channel rax tree. * -------------------------------------------------------------------------- */ diff --git a/src/cluster.h b/src/cluster.h index 21c9c4499..db05ebcab 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -413,6 +413,7 @@ void clusterInit(void); void clusterInitListeners(void); void clusterCron(void); void clusterBeforeSleep(void); +clusterNode *clusterNodeGetMaster(clusterNode *node); clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask); int verifyClusterNodeId(const char *name, int length); clusterNode *clusterLookupNode(const char *name, int length);