
When there is a link failure while an ongoing MEET request is sent the sending node stops sending anymore MEET and starts sending PINGs. Since every node responds to PINGs from unknown nodes with a PONG, the receiving node never adds the sending node. But the sending node adds the receiving node when it sees a PONG. This can lead to asymmetry in cluster membership. This changes makes the sender keep sending MEET until it sees a PONG, avoiding the asymmetry. --------- Signed-off-by: Sankar <1890648+srgsanky@users.noreply.github.com> Signed-off-by: Ping Xie <pingxie@google.com>
84 lines
3.2 KiB
Tcl
84 lines
3.2 KiB
Tcl
# make sure the test infra won't use SELECT
|
|
set old_singledb $::singledb
|
|
set ::singledb 1
|
|
|
|
tags {tls:skip external:skip cluster} {
|
|
set base_conf [list cluster-enabled yes]
|
|
start_multiple_servers 2 [list overrides $base_conf] {
|
|
test "Cluster nodes are reachable" {
|
|
for {set id 0} {$id < [llength $::servers]} {incr id} {
|
|
# Every node should be reachable.
|
|
wait_for_condition 1000 50 {
|
|
([catch {R $id ping} ping_reply] == 0) &&
|
|
($ping_reply eq {PONG})
|
|
} else {
|
|
catch {R $id ping} err
|
|
fail "Node #$id keeps replying '$err' to PING."
|
|
}
|
|
}
|
|
}
|
|
|
|
test "Before slots allocation, all nodes report cluster failure" {
|
|
wait_for_cluster_state fail
|
|
}
|
|
|
|
set CLUSTER_PACKET_TYPE_PONG 1
|
|
set CLUSTER_PACKET_TYPE_NONE -1
|
|
|
|
test "Cluster nodes haven't met each other" {
|
|
assert {[llength [get_cluster_nodes 1]] == 1}
|
|
assert {[llength [get_cluster_nodes 0]] == 1}
|
|
}
|
|
|
|
test "Allocate slots" {
|
|
cluster_allocate_slots 2 0;# primaries replicas
|
|
}
|
|
|
|
test "Multiple MEETs from Node 1 to Node 0 should work" {
|
|
# Make 1 drop the PONG responses to MEET
|
|
R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_PONG
|
|
# It is important to close the connection on drop, otherwise a subsequent MEET won't be sent
|
|
R 1 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 1
|
|
|
|
R 1 CLUSTER MEET 127.0.0.1 [srv 0 port]
|
|
|
|
# Wait for at least a few MEETs to be sent so that we are sure that 1 is dropping the response to MEET.
|
|
wait_for_condition 1000 50 {
|
|
[CI 0 cluster_stats_messages_meet_received] > 1 &&
|
|
[CI 1 cluster_state] eq {fail} && [CI 0 cluster_state] eq {ok}
|
|
} else {
|
|
fail "Cluster node 1 never sent multiple MEETs to 0"
|
|
}
|
|
|
|
# 0 will be connected to 1, but 1 won't see that 0 is connected
|
|
assert {[llength [get_cluster_nodes 1 connected]] == 1}
|
|
assert {[llength [get_cluster_nodes 0 connected]] == 2}
|
|
|
|
# Drop incoming and outgoing links from/to 1
|
|
R 0 DEBUG CLUSTERLINK KILL ALL [R 1 CLUSTER MYID]
|
|
|
|
# Wait for 0 to know about 1 again after 1 sends a MEET
|
|
wait_for_condition 1000 50 {
|
|
[llength [get_cluster_nodes 0 connected]] == 2
|
|
} else {
|
|
fail "Cluster node 1 never sent multiple MEETs to 0"
|
|
}
|
|
|
|
# Undo packet drop
|
|
R 1 DEBUG DROP-CLUSTER-PACKET-FILTER $CLUSTER_PACKET_TYPE_NONE
|
|
R 1 DEBUG CLOSE-CLUSTER-LINK-ON-PACKET-DROP 0
|
|
|
|
# Both a and b will turn to cluster state ok
|
|
wait_for_condition 1000 50 {
|
|
[CI 1 cluster_state] eq {ok} && [CI 0 cluster_state] eq {ok} &&
|
|
[CI 1 cluster_stats_messages_meet_sent] == [CI 0 cluster_stats_messages_meet_received]
|
|
} else {
|
|
fail "1 cluster_state:[CI 1 cluster_state], 0 cluster_state: [CI 0 cluster_state]"
|
|
}
|
|
}
|
|
} ;# stop servers
|
|
} ;# tags
|
|
|
|
set ::singledb $old_singledb
|
|
|