Binbin fdd023ff82
Migrate cluster mode tests to normal framework (#442)
We currently has two disjoint TCL frameworks:
1. Normal testing framework, which trigger by runtest, which individually
launches nodes for testing.
2. Cluster framework, which trigger by runtest-cluster, which pre-allocates
N nodes and uses them for testing large configurations.

The normal TCL testing framework is much more readily tested and is also
automatically run as part of the CI for new PRs. The runtest-cluster since
it runs very slowly (cannot be parallelized), it currently only runs in daily
CI, this results in some changes to the cluster not being exposed in PR CI
in time.

This PR migrate the Cluster mode tests to normal framework. Some cluster
tests are kept in runtest-cluster because of timing issues or not yet
supported, we can process them later.

Signed-off-by: Binbin <binloveplay1314@qq.com>
2024-05-09 10:14:47 +08:00

127 lines
3.4 KiB
Tcl

# Check the basic monitoring and failover capabilities.
# make sure the test infra won't use SELECT
set old_singledb $::singledb
set ::singledb 1
tags {tls:skip external:skip cluster} {
set base_conf [list cluster-enabled yes]
start_multiple_servers 5 [list overrides $base_conf] {
test "Cluster nodes are reachable" {
for {set id 0} {$id < [llength $::servers]} {incr id} {
# Every node should be reachable.
wait_for_condition 1000 50 {
([catch {R $id ping} ping_reply] == 0) &&
($ping_reply eq {PONG})
} else {
catch {R $id ping} err
fail "Node #$id keeps replying '$err' to PING."
}
}
}
test "Cluster Join and auto-discovery test" {
# Use multiple attempts since sometimes nodes timeout
# while attempting to connect.
for {set attempts 3} {$attempts > 0} {incr attempts -1} {
if {[join_nodes_in_cluster] == 1} {
break
}
}
if {$attempts == 0} {
fail "Cluster failed to form full mesh"
}
}
test "Before slots allocation, all nodes report cluster failure" {
wait_for_cluster_state fail
}
test "Different nodes have different IDs" {
set ids {}
set numnodes 0
for {set id 0} {$id < [llength $::servers]} {incr id} {
incr numnodes
# Every node should just know itself.
set nodeid [dict get [cluster_get_myself $id] id]
assert {$nodeid ne {}}
lappend ids $nodeid
}
set numids [llength [lsort -unique $ids]]
assert {$numids == $numnodes}
}
test "It is possible to perform slot allocation" {
cluster_allocate_slots 5 0
}
test "After the join, every node gets a different config epoch" {
set trynum 60
while {[incr trynum -1] != 0} {
# We check that this condition is true for *all* the nodes.
set ok 1 ; # Will be set to 0 every time a node is not ok.
for {set id 0} {$id < [llength $::servers]} {incr id} {
set epochs {}
foreach n [get_cluster_nodes $id] {
lappend epochs [dict get $n config_epoch]
}
if {[lsort $epochs] != [lsort -unique $epochs]} {
set ok 0 ; # At least one collision!
}
}
if {$ok} break
after 1000
puts -nonewline .
flush stdout
}
if {$trynum == 0} {
fail "Config epoch conflict resolution is not working."
}
}
test "Nodes should report cluster_state is ok now" {
wait_for_cluster_state ok
}
test "Sanity for CLUSTER COUNTKEYSINSLOT" {
set reply [R 0 CLUSTER COUNTKEYSINSLOT 0]
assert {$reply eq 0}
}
test "It is possible to write and read from the cluster" {
cluster_write_test [srv 0 port]
}
test "CLUSTER RESET SOFT test" {
set last_epoch_node0 [CI 0 cluster_current_epoch]
R 0 FLUSHALL
R 0 CLUSTER RESET
assert {[CI 0 cluster_current_epoch] eq $last_epoch_node0}
set last_epoch_node1 [CI 1 cluster_current_epoch]
R 1 FLUSHALL
R 1 CLUSTER RESET SOFT
assert {[CI 1 cluster_current_epoch] eq $last_epoch_node1}
}
test "Coverage: CLUSTER HELP" {
assert_match "*CLUSTER <subcommand> *" [R 0 CLUSTER HELP]
}
test "Coverage: ASKING" {
assert_equal {OK} [R 0 ASKING]
}
test "CLUSTER SLAVES and CLUSTER REPLICAS with zero replicas" {
assert_equal {} [R 0 cluster slaves [R 0 CLUSTER MYID]]
assert_equal {} [R 0 cluster replicas [R 0 CLUSTER MYID]]
}
} ;# stop servers
} ;# tags
set ::singledb $old_singledb