From 7379d2219620d809943f32a5d26137a077bbae37 Mon Sep 17 00:00:00 2001 From: Madelyn Olson <34459052+madolson@users.noreply.github.com> Date: Thu, 22 Dec 2022 17:37:00 -0800 Subject: [PATCH] Harden init-tests for cluster tests (#11635) Attempt to harden cluster init-tests by doing two things: * Retry up to 3 times to join the cluster. Cluster meet is entirely idempotent, so it should stabilize if we missed a node. * Validate the connection is actually established, not just exists in the cluster list. Nodes can exist in handshake, but might later get dropped. --- tests/cluster/cluster.tcl | 9 ++++++--- tests/cluster/tests/includes/init-tests.tcl | 22 ++++++++++++++++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/tests/cluster/cluster.tcl b/tests/cluster/cluster.tcl index e86d70324..7b5f0f951 100644 --- a/tests/cluster/cluster.tcl +++ b/tests/cluster/cluster.tcl @@ -8,8 +8,9 @@ set ::cluster_master_nodes 0 set ::cluster_replica_nodes 0 -# Returns a parsed CLUSTER NODES output as a list of dictionaries. -proc get_cluster_nodes id { +# Returns a parsed CLUSTER NODES output as a list of dictionaries. Optional status field +# can be specified to only returns entries that match the provided status. +proc get_cluster_nodes {id {status "*"}} { set lines [split [R $id cluster nodes] "\r\n"] set nodes {} foreach l $lines { @@ -28,7 +29,9 @@ proc get_cluster_nodes id { linkstate [lindex $args 7] \ slots [lrange $args 8 end] \ ] - lappend nodes $node + if {[string match $status [lindex $args 7]]} { + lappend nodes $node + } } return $nodes } diff --git a/tests/cluster/tests/includes/init-tests.tcl b/tests/cluster/tests/includes/init-tests.tcl index fc5897a1a..4875a0106 100644 --- a/tests/cluster/tests/includes/init-tests.tcl +++ b/tests/cluster/tests/includes/init-tests.tcl @@ -48,7 +48,9 @@ test "Cluster nodes hard reset" { } } -test "Cluster Join and auto-discovery test" { +# Helper function to attempt to have each node in a cluster +# meet each other. +proc join_nodes_in_cluster {} { # Join node 0 with 1, 1 with 2, ... and so forth. # If auto-discovery works all nodes will know every other node # eventually. @@ -63,11 +65,25 @@ test "Cluster Join and auto-discovery test" { foreach_redis_id id { wait_for_condition 1000 50 { - [llength [get_cluster_nodes $id]] == [llength $ids] + [llength [get_cluster_nodes $id connected]] == [llength $ids] } else { - fail "Cluster failed to join into a full mesh." + return 0 } } + return 1 +} + +test "Cluster Join and auto-discovery test" { + # Use multiple attempts since sometimes nodes timeout + # while attempting to connect. + for {set attempts 3} {$attempts > 0} {incr attempts -1} { + if {[join_nodes_in_cluster] == 1} { + break + } + } + if {$attempts == 0} { + fail "Cluster failed to form full mesh" + } } test "Before slots allocation, all nodes report cluster failure" {