
We currently has two disjoint TCL frameworks: 1. Normal testing framework, which trigger by runtest, which individually launches nodes for testing. 2. Cluster framework, which trigger by runtest-cluster, which pre-allocates N nodes and uses them for testing large configurations. The normal TCL testing framework is much more readily tested and is also automatically run as part of the CI for new PRs. The runtest-cluster since it runs very slowly (cannot be parallelized), it currently only runs in daily CI, this results in some changes to the cluster not being exposed in PR CI in time. This PR migrate the Cluster mode tests to normal framework. Some cluster tests are kept in runtest-cluster because of timing issues or not yet supported, we can process them later. Signed-off-by: Binbin <binloveplay1314@qq.com>
204 lines
5.7 KiB
Tcl
204 lines
5.7 KiB
Tcl
# Slave selection test
|
|
# Check the algorithm trying to pick the slave with the most complete history.
|
|
|
|
# Create a cluster with 5 master and 10 slaves, so that we have 2
|
|
# slaves for each master.
|
|
start_cluster 5 10 {tags {external:skip cluster}} {
|
|
|
|
test "Cluster is up" {
|
|
wait_for_cluster_state ok
|
|
}
|
|
|
|
test "The first master has actually two slaves" {
|
|
wait_for_condition 1000 50 {
|
|
[llength [lindex [R 0 role] 2]] == 2
|
|
&& [llength [R 0 cluster replicas [R 0 CLUSTER MYID]]] == 2
|
|
} else {
|
|
fail "replicas didn't connect"
|
|
}
|
|
}
|
|
|
|
test "CLUSTER SLAVES and CLUSTER REPLICAS output is consistent" {
|
|
# Because we already have command output that cover CLUSTER REPLICAS elsewhere,
|
|
# here we simply judge whether their output is consistent to cover CLUSTER SLAVES.
|
|
set myid [R 0 CLUSTER MYID]
|
|
R 0 multi
|
|
R 0 cluster slaves $myid
|
|
R 0 cluster replicas $myid
|
|
lassign [R 0 exec] res res2
|
|
assert_equal $res $res2
|
|
}
|
|
|
|
test {Slaves of #0 are instance #5 and #10 as expected} {
|
|
set port0 [srv 0 port]
|
|
assert {[lindex [R 5 role] 2] == $port0}
|
|
assert {[lindex [R 10 role] 2] == $port0}
|
|
}
|
|
|
|
test "Instance #5 and #10 synced with the master" {
|
|
wait_for_condition 1000 50 {
|
|
[s -5 master_link_status] eq {up} &&
|
|
[s -10 master_link_status] eq {up}
|
|
} else {
|
|
fail "Instance #5 or #10 master link status is not up"
|
|
}
|
|
}
|
|
|
|
set cluster [valkey_cluster 127.0.0.1:[srv 0 port]]
|
|
|
|
test "Slaves are both able to receive and acknowledge writes" {
|
|
for {set j 0} {$j < 100} {incr j} {
|
|
$cluster set $j $j
|
|
}
|
|
assert {[R 0 wait 2 60000] == 2}
|
|
}
|
|
|
|
set paused_pid [srv 0 pid]
|
|
test "Write data while slave #10 is paused and can't receive it" {
|
|
# Stop the slave with a multi/exec transaction so that the master will
|
|
# be killed as soon as it can accept writes again.
|
|
R 10 multi
|
|
R 10 debug sleep 10
|
|
R 10 client kill 127.0.0.1:$port0
|
|
R 10 deferred 1
|
|
R 10 exec
|
|
|
|
# Write some data the slave can't receive.
|
|
for {set j 0} {$j < 100} {incr j} {
|
|
$cluster set $j $j
|
|
}
|
|
|
|
# Prevent the master from accepting new slaves.
|
|
# Use a large pause value since we'll kill it anyway.
|
|
R 0 CLIENT PAUSE 60000
|
|
|
|
# Wait for the slave to return available again
|
|
R 10 deferred 0
|
|
assert {[R 10 read] eq {OK OK}}
|
|
|
|
# Kill the master so that a reconnection will not be possible.
|
|
pause_process $paused_pid
|
|
}
|
|
|
|
test "Wait for instance #5 (and not #10) to turn into a master" {
|
|
wait_for_condition 1000 50 {
|
|
[s -5 role] eq {master}
|
|
} else {
|
|
fail "No failover detected"
|
|
}
|
|
}
|
|
|
|
test "Wait for the node #10 to return alive before ending the test" {
|
|
R 10 ping
|
|
}
|
|
|
|
test "Cluster should eventually be up again" {
|
|
for {set j 0} {$j < [llength $::servers]} {incr j} {
|
|
if {[process_is_paused $paused_pid]} continue
|
|
wait_for_condition 1000 50 {
|
|
[CI $j cluster_state] eq "ok"
|
|
} else {
|
|
fail "Cluster node $j cluster_state:[CI $j cluster_state]"
|
|
}
|
|
}
|
|
}
|
|
|
|
test "Node #10 should eventually replicate node #5" {
|
|
set port5 [srv -5 port]
|
|
wait_for_condition 1000 50 {
|
|
([lindex [R 10 role] 2] == $port5) &&
|
|
([lindex [R 10 role] 3] eq {connected})
|
|
} else {
|
|
fail "#10 didn't became slave of #5"
|
|
}
|
|
}
|
|
|
|
} ;# start_cluster
|
|
|
|
# Create a cluster with 3 master and 15 slaves, so that we have 5
|
|
# slaves for eatch master.
|
|
start_cluster 3 15 {tags {external:skip cluster}} {
|
|
|
|
test "Cluster is up" {
|
|
wait_for_cluster_state ok
|
|
}
|
|
|
|
test "The first master has actually 5 slaves" {
|
|
wait_for_condition 1000 50 {
|
|
[llength [lindex [R 0 role] 2]] == 5
|
|
} else {
|
|
fail "replicas didn't connect"
|
|
}
|
|
}
|
|
|
|
test {Slaves of #0 are instance #3, #6, #9, #12 and #15 as expected} {
|
|
set port0 [srv 0 port]
|
|
assert {[lindex [R 3 role] 2] == $port0}
|
|
assert {[lindex [R 6 role] 2] == $port0}
|
|
assert {[lindex [R 9 role] 2] == $port0}
|
|
assert {[lindex [R 12 role] 2] == $port0}
|
|
assert {[lindex [R 15 role] 2] == $port0}
|
|
}
|
|
|
|
test {Instance #3, #6, #9, #12 and #15 synced with the master} {
|
|
wait_for_condition 1000 50 {
|
|
[s -3 master_link_status] eq {up} &&
|
|
[s -6 master_link_status] eq {up} &&
|
|
[s -9 master_link_status] eq {up} &&
|
|
[s -12 master_link_status] eq {up} &&
|
|
[s -15 master_link_status] eq {up}
|
|
} else {
|
|
fail "Instance #3 or #6 or #9 or #12 or #15 master link status is not up"
|
|
}
|
|
}
|
|
|
|
proc master_detected {instances} {
|
|
foreach instance [dict keys $instances] {
|
|
if {[s -$instance role] eq {master}} {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
test "New Master down consecutively" {
|
|
set instances [dict create 0 1 3 1 6 1 9 1 12 1 15 1]
|
|
|
|
set loops [expr {[dict size $instances]-1}]
|
|
for {set i 0} {$i < $loops} {incr i} {
|
|
set master_id -1
|
|
foreach instance [dict keys $instances] {
|
|
if {[s -$instance role] eq {master}} {
|
|
set master_id $instance
|
|
break;
|
|
}
|
|
}
|
|
|
|
if {$master_id eq -1} {
|
|
fail "no master detected, #loop $i"
|
|
}
|
|
|
|
set instances [dict remove $instances $master_id]
|
|
|
|
set paused_pid [srv [expr $master_id * -1] pid]
|
|
pause_process $paused_pid
|
|
wait_for_condition 1000 50 {
|
|
[master_detected $instances]
|
|
} else {
|
|
fail "No failover detected when master $master_id fails"
|
|
}
|
|
|
|
for {set j 0} {$j < [llength $::servers]} {incr j} {
|
|
if {[process_is_paused $paused_pid]} continue
|
|
wait_for_condition 1000 50 {
|
|
[CI $j cluster_state] eq "ok"
|
|
} else {
|
|
fail "Cluster node $j cluster_state:[CI $j cluster_state]"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} ;# start_cluster
|