futriix/tests/unit/cluster/manual-takeover.tcl
Binbin fdd023ff82
Migrate cluster mode tests to normal framework (#442)
We currently has two disjoint TCL frameworks:
1. Normal testing framework, which trigger by runtest, which individually
launches nodes for testing.
2. Cluster framework, which trigger by runtest-cluster, which pre-allocates
N nodes and uses them for testing large configurations.

The normal TCL testing framework is much more readily tested and is also
automatically run as part of the CI for new PRs. The runtest-cluster since
it runs very slowly (cannot be parallelized), it currently only runs in daily
CI, this results in some changes to the cluster not being exposed in PR CI
in time.

This PR migrate the Cluster mode tests to normal framework. Some cluster
tests are kept in runtest-cluster because of timing issues or not yet
supported, we can process them later.

Signed-off-by: Binbin <binloveplay1314@qq.com>
2024-05-09 10:14:47 +08:00

91 lines
2.4 KiB
Tcl

# Manual takeover test
start_cluster 5 5 {tags {external:skip cluster}} {
test "Cluster is up" {
wait_for_cluster_state ok
}
test "Cluster is writable" {
cluster_write_test [srv -1 port]
}
# For this test, disable replica failover until
# all of the primaries are confirmed killed. Otherwise
# there might be enough time to elect a replica.
set replica_ids { 5 6 7 }
foreach id $replica_ids {
R $id config set cluster-replica-no-failover yes
}
set paused_pid [srv 0 pid]
set paused_pid1 [srv -1 pid]
set paused_pid2 [srv -2 pid]
test "Killing majority of master nodes" {
pause_process $paused_pid
pause_process $paused_pid1
pause_process $paused_pid2
}
foreach id $replica_ids {
R $id config set cluster-replica-no-failover no
}
test "Cluster should eventually be down" {
for {set j 0} {$j < [llength $::servers]} {incr j} {
if {[process_is_paused $paused_pid]} continue
if {[process_is_paused $paused_pid1]} continue
if {[process_is_paused $paused_pid2]} continue
wait_for_condition 1000 50 {
[CI $j cluster_state] eq "fail"
} else {
fail "Cluster node $j cluster_state:[CI $j cluster_state]"
}
}
}
test "Use takeover to bring slaves back" {
foreach id $replica_ids {
R $id cluster failover takeover
}
}
test "Cluster should eventually be up again" {
for {set j 0} {$j < [llength $::servers]} {incr j} {
if {[process_is_paused $paused_pid]} continue
if {[process_is_paused $paused_pid1]} continue
if {[process_is_paused $paused_pid2]} continue
wait_for_condition 1000 50 {
[CI $j cluster_state] eq "ok"
} else {
fail "Cluster node $j cluster_state:[CI $j cluster_state]"
}
}
}
test "Cluster is writable" {
cluster_write_test [srv -4 port]
}
test "Instance #5, #6, #7 are now masters" {
assert {[s -5 role] eq {master}}
assert {[s -6 role] eq {master}}
assert {[s -7 role] eq {master}}
}
test "Restarting the previously killed master nodes" {
resume_process $paused_pid
resume_process $paused_pid1
resume_process $paused_pid2
}
test "Instance #0, #1, #2 gets converted into a slaves" {
wait_for_condition 1000 50 {
[s 0 role] eq {slave} && [s -1 role] eq {slave} && [s -2 role] eq {slave}
} else {
fail "Old masters not converted into slaves"
}
}
} ;# start_cluster