
Tests in cluster-multiple-meets were flaky as reported by @madolson * https://github.com/valkey-io/valkey/actions/runs/9688455588/job/26776953320 * https://github.com/valkey-io/valkey/actions/runs/9688455588/job/26776953585 I wasn't able to reproduce this locally, but I suspect that the flakiness is coming from the fact that nodes are reported as "connected" as long as there is an outgoing link. An outgoing link is created before MEET is sent out. Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com>
90 lines
3.5 KiB
Tcl
90 lines
3.5 KiB
Tcl
# make sure the test infra won't use SELECT
|
|
set old_singledb $::singledb
|
|
set ::singledb 1
|
|
|
|
tags {tls:skip external:skip cluster} {
|
|
set base_conf [list cluster-enabled yes]
|
|
start_multiple_servers 2 [list overrides $base_conf] {
|
|
test "Cluster nodes are reachable" {
|
|
for {set id 0} {$id < [llength $::servers]} {incr id} {
|
|
# Every node should be reachable.
|
|
wait_for_condition 1000 50 {
|
|
([catch {R $id ping} ping_reply] == 0) &&
|
|
($ping_reply eq {PONG})
|
|
} else {
|
|
catch {R $id ping} err
|
|
fail "Node #$id keeps replying '$err' to PING."
|
|
}
|
|
}
|
|
}
|
|
|
|
test "Before slots allocation, all nodes report cluster failure" {
|
|
wait_for_cluster_state fail
|
|
}
|
|
|
|
set CLUSTER_PACKET_TYPE_PONG 1
|
|
set CLUSTER_PACKET_TYPE_NONE -1
|
|
|
|
test "Cluster nodes haven't met each other" {
|
|
assert {[llength [get_cluster_nodes 1]] == 1}
|
|
assert {[llength [get_cluster_nodes 0]] == 1}
|
|
}
|
|
|
|
test "Allocate slots" {
|
|
cluster_allocate_slots 2 0;# primaries replicas
|
|
}
|
|
|
|
test "Multiple MEETs from Node 1 to Node 0 should work" {
|
|
# Make 1 drop the PONG responses to MEET
|
|
R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_PONG
|
|
# It is important to close the connection on drop, otherwise a subsequent MEET won't be sent
|
|
R 1 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1
|
|
|
|
R 1 CLUSTER MEET 127.0.0.1 [srv 0 port]
|
|
|
|
# Wait for at least a few MEETs to be sent so that we are sure that 1 is dropping the response to MEET.
|
|
wait_for_condition 1000 50 {
|
|
[CI 0 cluster_stats_messages_meet_received] > 1 &&
|
|
[CI 1 cluster_state] eq {fail} && [CI 0 cluster_state] eq {ok}
|
|
} else {
|
|
fail "Cluster node 1 never sent multiple MEETs to 0"
|
|
}
|
|
|
|
# 0 will be connected to 1, but 1 won't see that 0 is connected
|
|
# Using a wait condition here as an assert can be flaky - especially
|
|
# when cluster nodes is processed when the link is established to send MEET.
|
|
wait_for_condition 1000 50 {
|
|
[llength [get_cluster_nodes 1 connected]] == 1
|
|
} else {
|
|
fail "Node 1 recognizes node 0 even though it drops PONGs from node 0"
|
|
}
|
|
assert {[llength [get_cluster_nodes 0 connected]] == 2}
|
|
|
|
# Drop incoming and outgoing links from/to 1
|
|
R 0 DEBUG CLUSTERLINK KILL ALL [R 1 CLUSTER MYID]
|
|
|
|
# Wait for 0 to know about 1 again after 1 sends a MEET
|
|
wait_for_condition 1000 50 {
|
|
[llength [get_cluster_nodes 0 connected]] == 2
|
|
} else {
|
|
fail "Cluster node 1 never sent multiple MEETs to 0"
|
|
}
|
|
|
|
# Undo packet drop
|
|
R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE
|
|
R 1 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 0
|
|
|
|
# Both a and b will turn to cluster state ok
|
|
wait_for_condition 1000 50 {
|
|
[CI 1 cluster_state] eq {ok} && [CI 0 cluster_state] eq {ok} &&
|
|
[CI 1 cluster_stats_messages_meet_sent] == [CI 0 cluster_stats_messages_meet_received]
|
|
} else {
|
|
fail "1 cluster_state:[CI 1 cluster_state], 0 cluster_state: [CI 0 cluster_state]"
|
|
}
|
|
}
|
|
} ;# stop servers
|
|
} ;# tags
|
|
|
|
set ::singledb $old_singledb
|
|
|