Harden init-tests for cluster tests (#11635)

Attempt to harden cluster init-tests by doing two things:
* Retry up to 3 times to join the cluster. Cluster meet is entirely idempotent, so it should stabilize if we missed a node.
* Validate the connection is actually established, not just exists in the cluster list. Nodes can exist in handshake, but might later get dropped.
This commit is contained in:
Madelyn Olson 2022-12-22 17:37:00 -08:00 committed by GitHub
parent 9e1a00d663
commit 7379d22196
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 25 additions and 6 deletions

View File

@ -8,8 +8,9 @@
set ::cluster_master_nodes 0 set ::cluster_master_nodes 0
set ::cluster_replica_nodes 0 set ::cluster_replica_nodes 0
# Returns a parsed CLUSTER NODES output as a list of dictionaries. # Returns a parsed CLUSTER NODES output as a list of dictionaries. Optional status field
proc get_cluster_nodes id { # can be specified to only returns entries that match the provided status.
proc get_cluster_nodes {id {status "*"}} {
set lines [split [R $id cluster nodes] "\r\n"] set lines [split [R $id cluster nodes] "\r\n"]
set nodes {} set nodes {}
foreach l $lines { foreach l $lines {
@ -28,7 +29,9 @@ proc get_cluster_nodes id {
linkstate [lindex $args 7] \ linkstate [lindex $args 7] \
slots [lrange $args 8 end] \ slots [lrange $args 8 end] \
] ]
lappend nodes $node if {[string match $status [lindex $args 7]]} {
lappend nodes $node
}
} }
return $nodes return $nodes
} }

View File

@ -48,7 +48,9 @@ test "Cluster nodes hard reset" {
} }
} }
test "Cluster Join and auto-discovery test" { # Helper function to attempt to have each node in a cluster
# meet each other.
proc join_nodes_in_cluster {} {
# Join node 0 with 1, 1 with 2, ... and so forth. # Join node 0 with 1, 1 with 2, ... and so forth.
# If auto-discovery works all nodes will know every other node # If auto-discovery works all nodes will know every other node
# eventually. # eventually.
@ -63,11 +65,25 @@ test "Cluster Join and auto-discovery test" {
foreach_redis_id id { foreach_redis_id id {
wait_for_condition 1000 50 { wait_for_condition 1000 50 {
[llength [get_cluster_nodes $id]] == [llength $ids] [llength [get_cluster_nodes $id connected]] == [llength $ids]
} else { } else {
fail "Cluster failed to join into a full mesh." return 0
} }
} }
return 1
}
test "Cluster Join and auto-discovery test" {
# Use multiple attempts since sometimes nodes timeout
# while attempting to connect.
for {set attempts 3} {$attempts > 0} {incr attempts -1} {
if {[join_nodes_in_cluster] == 1} {
break
}
}
if {$attempts == 0} {
fail "Cluster failed to form full mesh"
}
} }
test "Before slots allocation, all nodes report cluster failure" { test "Before slots allocation, all nodes report cluster failure" {