Harden init-tests for cluster tests (#11635)

Attempt to harden cluster init-tests by doing two things:
* Retry up to 3 times to join the cluster. Cluster meet is entirely idempotent, so it should stabilize if we missed a node.
* Validate the connection is actually established, not just exists in the cluster list. Nodes can exist in handshake, but might later get dropped.
This commit is contained in:
Madelyn Olson 2022-12-22 17:37:00 -08:00 committed by GitHub
parent 9e1a00d663
commit 7379d22196
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 6 deletions

View File

@ -8,8 +8,9 @@
set ::cluster_master_nodes 0
set ::cluster_replica_nodes 0
# Returns a parsed CLUSTER NODES output as a list of dictionaries.
proc get_cluster_nodes id {
# Returns a parsed CLUSTER NODES output as a list of dictionaries. Optional status field
# can be specified to only returns entries that match the provided status.
proc get_cluster_nodes {id {status "*"}} {
set lines [split [R $id cluster nodes] "\r\n"]
set nodes {}
foreach l $lines {
@ -28,7 +29,9 @@ proc get_cluster_nodes id {
linkstate [lindex $args 7] \
slots [lrange $args 8 end] \
]
lappend nodes $node
if {[string match $status [lindex $args 7]]} {
lappend nodes $node
}
}
return $nodes
}

View File

@ -48,7 +48,9 @@ test "Cluster nodes hard reset" {
}
}
test "Cluster Join and auto-discovery test" {
# Helper function to attempt to have each node in a cluster
# meet each other.
proc join_nodes_in_cluster {} {
# Join node 0 with 1, 1 with 2, ... and so forth.
# If auto-discovery works all nodes will know every other node
# eventually.
@ -63,11 +65,25 @@ test "Cluster Join and auto-discovery test" {
foreach_redis_id id {
wait_for_condition 1000 50 {
[llength [get_cluster_nodes $id]] == [llength $ids]
[llength [get_cluster_nodes $id connected]] == [llength $ids]
} else {
fail "Cluster failed to join into a full mesh."
return 0
}
}
return 1
}
test "Cluster Join and auto-discovery test" {
# Use multiple attempts since sometimes nodes timeout
# while attempting to connect.
for {set attempts 3} {$attempts > 0} {incr attempts -1} {
if {[join_nodes_in_cluster] == 1} {
break
}
}
if {$attempts == 0} {
fail "Cluster failed to form full mesh"
}
}
test "Before slots allocation, all nodes report cluster failure" {