diff --git a/src/cluster.c b/src/cluster.c index 1be4de2c9..a9b1b9a64 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -43,6 +43,12 @@ #include #include +/* Only primaries that own slots have voting rights. + * Returns 1 if the node has voting rights, otherwise returns 0. */ +static inline int clusterNodeIsVotingPrimary(clusterNode *n) { + return (n->flags & CLUSTER_NODE_MASTER) && n->numslots; +} + /* A global reference to myself is handy to make code more clear. * Myself always points to server.cluster->myself, that is, the clusterNode * that represents this node. */ @@ -1961,8 +1967,8 @@ void markNodeAsFailingIfNeeded(clusterNode *node) { if (nodeFailed(node)) return; /* Already FAILing. */ failures = clusterNodeFailureReportsCount(node); - /* Also count myself as a voter if I'm a master. */ - if (nodeIsMaster(myself)) failures++; + /* Also count myself as a voter if I'm a voting primary. */ + if (clusterNodeIsVotingPrimary(myself)) failures++; if (failures < needed_quorum) return; /* No weak agreement from masters. */ serverLog(LL_NOTICE, @@ -2005,7 +2011,7 @@ void clearNodeFailureIfNeeded(clusterNode *node) { * 1) The FAIL state is old enough. * 2) It is yet serving slots from our point of view (not failed over). * Apparently no one is going to fix these slots, clear the FAIL flag. */ - if (nodeIsMaster(node) && node->numslots > 0 && + if (clusterNodeIsVotingPrimary(node) && (now - node->fail_time) > (server.cluster_node_timeout * CLUSTER_FAIL_UNDO_TIME_MULT)) { @@ -2154,8 +2160,8 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { node = clusterLookupNode(g->nodename, CLUSTER_NAMELEN); if (node) { /* We already know this node. - Handle failure reports, only when the sender is a master. */ - if (sender && nodeIsMaster(sender) && node != myself) { + Handle failure reports, only when the sender is a voting primary. */ + if (sender && clusterNodeIsVotingPrimary(sender) && node != myself) { if (flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) { if (clusterNodeAddFailureReport(node,sender)) { serverLog(LL_VERBOSE, @@ -3198,10 +3204,10 @@ int clusterProcessPacket(clusterLink *link) { clusterSendFailoverAuthIfNeeded(sender,hdr); } else if (type == CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK) { if (!sender) return 1; /* We don't know that node. */ - /* We consider this vote only if the sender is a master serving + /* We consider this vote only if the sender is a primary serving * a non zero number of slots, and its currentEpoch is greater or * equal to epoch where this node started the election. */ - if (nodeIsMaster(sender) && sender->numslots > 0 && + if (clusterNodeIsVotingPrimary(sender) && senderCurrentEpoch >= server.cluster->failover_auth_epoch) { server.cluster->failover_auth_count++; @@ -4803,7 +4809,7 @@ void clusterCron(void) { node->flags |= CLUSTER_NODE_PFAIL; update_state = 1; } - if (nodeIsMaster(myself) && server.cluster->size == 1) { + if (clusterNodeIsVotingPrimary(myself) && server.cluster->size == 1) { markNodeAsFailingIfNeeded(node); } else { serverLog(LL_DEBUG,"*** NODE %.40s possibly failing", node->name); @@ -5081,7 +5087,7 @@ void clusterUpdateState(void) { while((de = dictNext(di)) != NULL) { clusterNode *node = dictGetVal(de); - if (nodeIsMaster(node) && node->numslots) { + if (clusterNodeIsVotingPrimary(node)) { server.cluster->size++; if ((node->flags & (CLUSTER_NODE_FAIL|CLUSTER_NODE_PFAIL)) == 0) reachable_masters++; diff --git a/tests/unit/cluster/failure-marking.tcl b/tests/unit/cluster/failure-marking.tcl index c4746c826..cfed7fff0 100644 --- a/tests/unit/cluster/failure-marking.tcl +++ b/tests/unit/cluster/failure-marking.tcl @@ -16,6 +16,8 @@ start_cluster 1 1 {tags {external:skip cluster}} { pause_process $replica1_pid wait_node_marked_fail 0 $replica1_instance_id + + resume_process $replica1_pid } } @@ -49,5 +51,71 @@ start_cluster 2 1 {tags {external:skip cluster}} { resume_process $primary2_pid wait_node_marked_fail 0 $replica1_instance_id + + resume_process $replica1_pid } } + +set old_singledb $::singledb +set ::singledb 1 + +tags {external:skip tls:skip cluster} { + set base_conf [list cluster-enabled yes cluster-ping-interval 100 cluster-node-timeout 3000 save ""] + start_multiple_servers 5 [list overrides $base_conf] { + test "Only primary with slots has the right to mark a node as failed" { + set primary_host [srv 0 host] + set primary_port [srv 0 port] + set primary_pid [srv 0 pid] + set primary_id [R 0 CLUSTER MYID] + set replica_id [R 1 CLUSTER MYID] + set replica_pid [srv -1 pid] + + # Meet others nodes. + R 1 CLUSTER MEET $primary_host $primary_port + R 2 CLUSTER MEET $primary_host $primary_port + R 3 CLUSTER MEET $primary_host $primary_port + R 4 CLUSTER MEET $primary_host $primary_port + + # Build a single primary cluster. + cluster_allocate_slots 1 1 + wait_for_cluster_propagation + R 1 CLUSTER REPLICATE $primary_id + wait_for_cluster_propagation + wait_for_cluster_state "ok" + + # Pause the primary, marking the primary as pfail. + pause_process $primary_pid + wait_node_marked_pfail 1 $primary_id + wait_node_marked_pfail 2 $primary_id + wait_node_marked_pfail 3 $primary_id + wait_node_marked_pfail 4 $primary_id + + # Pause the replica, marking the replica as pfail. + pause_process $replica_pid + wait_node_marked_pfail 2 $replica_id + wait_node_marked_pfail 3 $replica_id + wait_node_marked_pfail 4 $replica_id + + # Resume the primary, marking the replica as fail. + resume_process $primary_pid + wait_node_marked_fail 0 $replica_id + wait_node_marked_fail 2 $replica_id + wait_node_marked_fail 3 $replica_id + wait_node_marked_fail 4 $replica_id + + # Check if we got the right failure reports. + wait_for_condition 1000 50 { + [R 0 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 0 && + [R 2 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1 && + [R 3 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1 && + [R 4 CLUSTER COUNT-FAILURE-REPORTS $replica_id] == 1 + } else { + fail "Cluster COUNT-FAILURE-REPORTS is not right." + } + + resume_process $replica_pid + } + } +} + +set ::singledb $old_singledb