
Add `meet_sent` field in `clusterNode` indicating the last time we sent a MEET packet. Use this field to only (re-)send a MEET packet once every handshake timeout period when detecting a node without an inbound link. When receiving multiple MEET packets on the same link while the node is in handshake state, instead of dropping the packet, we now simply prevent the creation of a new node. This way we still process the MEET packet's gossip and reply with a PONG as any other packets. Improve some logging messages to include `human_nodename`. Add `nodeExceedsHandshakeTimeout()` function. This is a follow-up to this previous PR: https://github.com/valkey-io/valkey/pull/1307 And a partial fix to the crash described in: https://github.com/valkey-io/valkey/pull/1436 --------- Signed-off-by: Pierre Turin <pieturin@amazon.com>
279 lines
12 KiB
Tcl
279 lines
12 KiB
Tcl
# make sure the test infra won't use SELECT
|
|
set old_singledb $::singledb
|
|
set ::singledb 1
|
|
|
|
tags {tls:skip external:skip cluster} {
|
|
set CLUSTER_PACKET_TYPE_PING 0
|
|
set CLUSTER_PACKET_TYPE_PONG 1
|
|
set CLUSTER_PACKET_TYPE_MEET 2
|
|
set CLUSTER_PACKET_TYPE_NONE -1
|
|
set CLUSTER_PACKET_TYPE_ALL -2
|
|
|
|
set base_conf [list cluster-enabled yes]
|
|
start_multiple_servers 2 [list overrides $base_conf] {
|
|
test "Cluster nodes are reachable" {
|
|
for {set id 0} {$id < [llength $::servers]} {incr id} {
|
|
# Every node should be reachable.
|
|
wait_for_condition 1000 50 {
|
|
([catch {R $id ping} ping_reply] == 0) &&
|
|
($ping_reply eq {PONG})
|
|
} else {
|
|
catch {R $id ping} err
|
|
fail "Node #$id keeps replying '$err' to PING."
|
|
}
|
|
}
|
|
}
|
|
|
|
test "Before slots allocation, all nodes report cluster failure" {
|
|
wait_for_cluster_state fail
|
|
}
|
|
|
|
test "Cluster nodes haven't met each other" {
|
|
assert {[llength [get_cluster_nodes 1]] == 1}
|
|
assert {[llength [get_cluster_nodes 0]] == 1}
|
|
}
|
|
|
|
test "Allocate slots" {
|
|
cluster_allocate_slots 2 0
|
|
}
|
|
|
|
test "MEET is reliable when target drops the initial MEETs" {
|
|
# Make 0 drop the initial MEET messages due to link failure
|
|
R 0 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_MEET
|
|
R 0 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1
|
|
|
|
R 1 CLUSTER MEET 127.0.0.1 [srv 0 port]
|
|
|
|
# Wait for at least a few MEETs to be sent so that we are sure that 0 is
|
|
# dropping them.
|
|
wait_for_condition 1000 50 {
|
|
[CI 0 cluster_stats_messages_meet_received] >= 3
|
|
} else {
|
|
fail "Cluster node 1 never sent multiple MEETs to 0"
|
|
}
|
|
|
|
# Make sure the nodes still don't know about each other
|
|
# Using a wait condition here as an assert can be flaky - especially
|
|
# when cluster nodes is processed when the link is established to send MEET.
|
|
wait_for_condition 1000 50 {
|
|
[llength [get_cluster_nodes 1 connected]] == 1
|
|
} else {
|
|
fail "Node 1 recognizes node 0 even though node 0 drops MEETs from node 1"
|
|
}
|
|
assert {[llength [get_cluster_nodes 0 connected]] == 1}
|
|
|
|
R 0 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE
|
|
|
|
# If the MEET is reliable, both a and b will turn to cluster state ok
|
|
wait_for_condition 1000 50 {
|
|
[CI 1 cluster_state] eq {ok} && [CI 0 cluster_state] eq {ok} &&
|
|
[CI 0 cluster_stats_messages_meet_received] >= 4 &&
|
|
[CI 1 cluster_stats_messages_meet_sent] == [CI 0 cluster_stats_messages_meet_received]
|
|
} else {
|
|
fail "Unexpected cluster state: node 1 cluster_state:[CI 1 cluster_state], node 0 cluster_state: [CI 0 cluster_state]"
|
|
}
|
|
}
|
|
} ;# stop servers
|
|
} ;# tags
|
|
|
|
set ::singledb $old_singledb
|
|
|
|
proc cluster_get_first_node_in_handshake id {
|
|
set nodes [get_cluster_nodes $id]
|
|
foreach n $nodes {
|
|
if {[cluster_has_flag $n handshake]} {
|
|
return [dict get $n id]
|
|
}
|
|
}
|
|
return {}
|
|
}
|
|
|
|
proc cluster_nodes_all_know_each_other {num_nodes} {
|
|
# Collect node IDs dynamically
|
|
set node_ids {}
|
|
for {set i 0} {$i < $num_nodes} {incr i} {
|
|
lappend node_ids [dict get [get_myself $i] id]
|
|
}
|
|
|
|
# Check if all nodes know each other
|
|
foreach node_id $node_ids {
|
|
foreach check_node_id $node_ids {
|
|
for {set node_index 0} {$node_index < $num_nodes} {incr node_index} {
|
|
if {[cluster_get_node_by_id $node_index $check_node_id] == {}} {
|
|
return 0
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Verify cluster link counts for each node
|
|
set expected_links [expr {2 * ($num_nodes - 1)}]
|
|
for {set i 0} {$i < $num_nodes} {incr i} {
|
|
if {[llength [R $i CLUSTER LINKS]] != $expected_links} {
|
|
return 0
|
|
}
|
|
}
|
|
|
|
return 1
|
|
}
|
|
|
|
start_cluster 2 0 {tags {external:skip cluster} overrides {cluster-node-timeout 4000 cluster-replica-no-failover yes}} {
|
|
set CLUSTER_PACKET_TYPE_PING 0
|
|
set CLUSTER_PACKET_TYPE_PONG 1
|
|
set CLUSTER_PACKET_TYPE_MEET 2
|
|
set CLUSTER_PACKET_TYPE_NONE -1
|
|
set CLUSTER_PACKET_TYPE_ALL -2
|
|
|
|
test "Handshake eventually succeeds after node handshake timeout on both sides with inconsistent view of the cluster" {
|
|
set cluster_port [find_available_port $::baseport $::portcount]
|
|
start_server [list overrides [list cluster-enabled yes cluster-node-timeout 4000 cluster-port $cluster_port]] {
|
|
# In this test we will trigger a handshake timeout on both sides of the handshake.
|
|
# Node 1 and 2 already know each other, then we make node 1 meet node 0:
|
|
#
|
|
# Node 1 -- MEET -> Node 0 [Node 0 might learn about Node 2 from the gossip section of the msg]
|
|
# Node 1 <- PONG -- Node 0 [we drop this message, so Node 1 will eventually mark the handshake as timed out]
|
|
# Node 1 <- PING -- Node 0 [we drop this message, so Node 1 will never send a PONG and Node 0 will eventually mark the handshake as timed out]
|
|
#
|
|
# After the handshake is timed out, we allow all cluster bus messages to go through.
|
|
# Eventually Node 0 should send a MEET packet to the other nodes to complete the handshake.
|
|
|
|
set node0_id [dict get [get_myself 0] id]
|
|
set node1_id [dict get [get_myself 1] id]
|
|
set node2_id [dict get [get_myself 2] id]
|
|
|
|
# Drop all cluster bus messages
|
|
R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_ALL
|
|
# Drop MEET cluster bus messages, so that Node 0 cannot start a handshake with Node 2.
|
|
R 2 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_MEET
|
|
|
|
R 1 CLUSTER MEET [srv 0 host] [srv 0 port] $cluster_port
|
|
|
|
# Wait for Node 0 to be in handshake
|
|
wait_for_condition 10 400 {
|
|
[cluster_get_first_node_in_handshake 0] != {}
|
|
} else {
|
|
fail "Node 0 never entered handshake state"
|
|
}
|
|
|
|
# We want Node 0 to learn about Node 2 through the gossip section of the MEET message
|
|
set meet_retry 0
|
|
while {[cluster_get_node_by_id 0 $node2_id] eq {}} {
|
|
if {$meet_retry == 10} {
|
|
error "assertion: Retried to meet Node 0 too many times"
|
|
}
|
|
# If Node 0 doesn't know about Node 1 & 2, it means Node 1 did not gossip about node 2 in its MEET message.
|
|
# So we kill the outbound link from Node 1 to Node 0, to force a reconnect and a re-send of the MEET message.
|
|
after 100
|
|
# Since we are in handshake, we use a randomly generated ID we have to find
|
|
R 1 DEBUG CLUSTERLINK KILL ALL [cluster_get_first_node_in_handshake 1]
|
|
incr meet_retry 1
|
|
}
|
|
|
|
# Wait for Node 1's handshake to timeout
|
|
wait_for_condition 50 100 {
|
|
[cluster_get_first_node_in_handshake 1] eq {}
|
|
} else {
|
|
fail "Node 1 never exited handshake state"
|
|
}
|
|
|
|
# Wait for Node 0's handshake to timeout
|
|
wait_for_condition 50 100 {
|
|
[cluster_get_first_node_in_handshake 0] eq {}
|
|
} else {
|
|
fail "Node 0 never exited handshake state"
|
|
}
|
|
|
|
# At this point Node 0 knows Node 2 through the gossip, but Node 1 & 2 don't know Node 0.
|
|
wait_for_condition 50 100 {
|
|
[cluster_get_node_by_id 0 $node2_id] != {} &&
|
|
[cluster_get_node_by_id 1 $node0_id] eq {} &&
|
|
[cluster_get_node_by_id 2 $node0_id] eq {}
|
|
} else {
|
|
fail "Unexpected CLUSTER NODES output, nodes 1 & 2 should not know node 0."
|
|
}
|
|
|
|
# Allow all messages to go through again
|
|
R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE
|
|
R 2 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE
|
|
|
|
# Now Node 0 will send a MEET packet to Node 1 & 2 since it has an outbound link to these nodes but no inbound link.
|
|
# Handshake should now complete successfully.
|
|
wait_for_condition 50 200 {
|
|
[cluster_nodes_all_know_each_other 3]
|
|
} else {
|
|
fail "Unexpected CLUSTER NODES output, all nodes should know each other."
|
|
}
|
|
} ;# stop Node 0
|
|
} ;# test
|
|
} ;# stop cluster
|
|
|
|
start_cluster 2 0 {tags {external:skip cluster} overrides {cluster-node-timeout 4000 cluster-replica-no-failover yes}} {
|
|
set CLUSTER_PACKET_TYPE_PING 0
|
|
set CLUSTER_PACKET_TYPE_PONG 1
|
|
set CLUSTER_PACKET_TYPE_MEET 2
|
|
set CLUSTER_PACKET_TYPE_NONE -1
|
|
set CLUSTER_PACKET_TYPE_ALL -2
|
|
|
|
test "Handshake eventually succeeds after node handshake timeout on one side with inconsistent view of the cluster" {
|
|
set cluster_port [find_available_port $::baseport $::portcount]
|
|
start_server [list overrides [list cluster-enabled yes cluster-node-timeout 4000 cluster-port $cluster_port]] {
|
|
# In this test we will trigger a handshake timeout on one side of the handshake.
|
|
# Node 1 and 2 already know each other, then we make node 0 meet node 1:
|
|
#
|
|
# Node 0 -- MEET -> Node 1
|
|
# Node 0 <- PONG -- Node 1
|
|
# Node 0 <- PING -- Node 1 [Node 0 will mark the handshake as successful]
|
|
# Node 0 -- PONG -> Node 1 [we drop this message, so node 1 will eventually mark the handshake as timed out]
|
|
#
|
|
# After the handshake is timed out, we allow all cluster bus messages to go through.
|
|
# Eventually Node 0 should send a MEET packet to the other nodes to complete the handshake.
|
|
|
|
set node0_id [dict get [get_myself 0] id]
|
|
set node1_id [dict get [get_myself 1] id]
|
|
set node2_id [dict get [get_myself 2] id]
|
|
|
|
# Drop PONG messages
|
|
R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_PONG
|
|
# Drop MEET cluster bus messages, so that Node 0 cannot start a handshake with Node 2.
|
|
R 2 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_MEET
|
|
|
|
# Node 0 meets node 1
|
|
R 0 CLUSTER MEET [srv -1 host] [srv -1 port]
|
|
|
|
# Wait for node 0 to know about the other nodes in the cluster
|
|
wait_for_condition 50 100 {
|
|
[cluster_get_node_by_id 0 $node1_id] != {}
|
|
} else {
|
|
fail "Node 0 never learned about node 1"
|
|
}
|
|
# At this point, node 0 knows about node 1 and might know node 2 if node 1 gossiped about it.
|
|
wait_for_condition 50 100 {
|
|
[cluster_get_first_node_in_handshake 0] eq {}
|
|
} else {
|
|
fail "Node 1 never exited handshake state"
|
|
}
|
|
# At this point, from node 0 point of view, the handshake with node 1 succeeded.
|
|
|
|
wait_for_condition 50 100 {
|
|
[cluster_get_first_node_in_handshake 1] eq {}
|
|
} else {
|
|
fail "Node 1 never exited handshake state"
|
|
}
|
|
assert {[cluster_get_node_by_id 1 $node0_id] eq {}}
|
|
# At this point, from node 1 point of view, the handshake with node 0 timed out.
|
|
|
|
# Allow all messages
|
|
R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE
|
|
R 2 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE
|
|
|
|
# Now Node 0 will send a MEET packet to Node 1 & 2 since it has an outbound link to these nodes but no inblound link.
|
|
# Handshake should now complete successfully.
|
|
wait_for_condition 50 200 {
|
|
[cluster_nodes_all_know_each_other 3]
|
|
} else {
|
|
fail "Unexpected CLUSTER NODES output, all nodes should know each other."
|
|
}
|
|
} ;# stop Node 0
|
|
} ;# test
|
|
} ;# stop cluster
|