204 lines
5.7 KiB
Tcl
204 lines
5.7 KiB
Tcl
# Slave selection test
|
|
# Check the algorithm trying to pick the slave with the most complete history.
|
|
|
|
# Create a cluster with 5 master and 10 slaves, so that we have 2
|
|
# slaves for each master.
|
|
start_cluster 5 10 {tags {external:skip cluster}} {
|
|
|
|
test "Cluster is up" {
|
|
wait_for_cluster_state ok
|
|
}
|
|
|
|
test "The first master has actually two slaves" {
|
|
wait_for_condition 1000 50 {
|
|
[llength [lindex [R 0 role] 2]] == 2
|
|
&& [llength [R 0 cluster replicas [R 0 CLUSTER MYID]]] == 2
|
|
} else {
|
|
fail "replicas didn't connect"
|
|
}
|
|
}
|
|
|
|
test "CLUSTER SLAVES and CLUSTER REPLICAS output is consistent" {
|
|
# Because we already have command output that cover CLUSTER REPLICAS elsewhere,
|
|
# here we simply judge whether their output is consistent to cover CLUSTER SLAVES.
|
|
set myid [R 0 CLUSTER MYID]
|
|
R 0 multi
|
|
R 0 cluster slaves $myid
|
|
R 0 cluster replicas $myid
|
|
lassign [R 0 exec] res res2
|
|
assert_equal $res $res2
|
|
}
|
|
|
|
test {Slaves of #0 are instance #5 and #10 as expected} {
|
|
set port0 [srv 0 port]
|
|
assert {[lindex [R 5 role] 2] == $port0}
|
|
assert {[lindex [R 10 role] 2] == $port0}
|
|
}
|
|
|
|
test "Instance #5 and #10 synced with the master" {
|
|
wait_for_condition 1000 50 {
|
|
[s -5 master_link_status] eq {up} &&
|
|
[s -10 master_link_status] eq {up}
|
|
} else {
|
|
fail "Instance #5 or #10 master link status is not up"
|
|
}
|
|
}
|
|
|
|
set cluster [valkey_cluster 127.0.0.1:[srv 0 port]]
|
|
|
|
test "Slaves are both able to receive and acknowledge writes" {
|
|
for {set j 0} {$j < 100} {incr j} {
|
|
$cluster set $j $j
|
|
}
|
|
assert {[R 0 wait 2 60000] == 2}
|
|
}
|
|
|
|
set paused_pid [srv 0 pid]
|
|
test "Write data while slave #10 is paused and can't receive it" {
|
|
# Stop the slave with a multi/exec transaction so that the master will
|
|
# be killed as soon as it can accept writes again.
|
|
R 10 multi
|
|
R 10 debug sleep 10
|
|
R 10 client kill 127.0.0.1:$port0
|
|
R 10 deferred 1
|
|
R 10 exec
|
|
|
|
# Write some data the slave can't receive.
|
|
for {set j 0} {$j < 100} {incr j} {
|
|
$cluster set $j $j
|
|
}
|
|
|
|
# Prevent the master from accepting new slaves.
|
|
# Use a large pause value since we'll kill it anyway.
|
|
R 0 CLIENT PAUSE 60000
|
|
|
|
# Wait for the slave to return available again
|
|
R 10 deferred 0
|
|
assert {[R 10 read] eq {OK OK}}
|
|
|
|
# Kill the master so that a reconnection will not be possible.
|
|
pause_process $paused_pid
|
|
}
|
|
|
|
test "Wait for instance #5 (and not #10) to turn into a master" {
|
|
wait_for_condition 1000 50 {
|
|
[s -5 role] eq {master}
|
|
} else {
|
|
fail "No failover detected"
|
|
}
|
|
}
|
|
|
|
test "Wait for the node #10 to return alive before ending the test" {
|
|
R 10 ping
|
|
}
|
|
|
|
test "Cluster should eventually be up again" {
|
|
for {set j 0} {$j < [llength $::servers]} {incr j} {
|
|
if {[process_is_paused [srv -$j pid]]} continue
|
|
wait_for_condition 1000 50 {
|
|
[CI $j cluster_state] eq "ok"
|
|
} else {
|
|
fail "Cluster node $j cluster_state:[CI $j cluster_state]"
|
|
}
|
|
}
|
|
}
|
|
|
|
test "Node #10 should eventually replicate node #5" {
|
|
set port5 [srv -5 port]
|
|
wait_for_condition 1000 50 {
|
|
([lindex [R 10 role] 2] == $port5) &&
|
|
([lindex [R 10 role] 3] eq {connected})
|
|
} else {
|
|
fail "#10 didn't became slave of #5"
|
|
}
|
|
}
|
|
|
|
} ;# start_cluster
|
|
|
|
# Create a cluster with 3 master and 15 slaves, so that we have 5
|
|
# slaves for eatch master.
|
|
start_cluster 3 15 {tags {external:skip cluster}} {
|
|
|
|
test "Cluster is up" {
|
|
wait_for_cluster_state ok
|
|
}
|
|
|
|
test "The first master has actually 5 slaves" {
|
|
wait_for_condition 1000 50 {
|
|
[llength [lindex [R 0 role] 2]] == 5
|
|
} else {
|
|
fail "replicas didn't connect"
|
|
}
|
|
}
|
|
|
|
test {Slaves of #0 are instance #3, #6, #9, #12 and #15 as expected} {
|
|
set port0 [srv 0 port]
|
|
assert {[lindex [R 3 role] 2] == $port0}
|
|
assert {[lindex [R 6 role] 2] == $port0}
|
|
assert {[lindex [R 9 role] 2] == $port0}
|
|
assert {[lindex [R 12 role] 2] == $port0}
|
|
assert {[lindex [R 15 role] 2] == $port0}
|
|
}
|
|
|
|
test {Instance #3, #6, #9, #12 and #15 synced with the master} {
|
|
wait_for_condition 1000 50 {
|
|
[s -3 master_link_status] eq {up} &&
|
|
[s -6 master_link_status] eq {up} &&
|
|
[s -9 master_link_status] eq {up} &&
|
|
[s -12 master_link_status] eq {up} &&
|
|
[s -15 master_link_status] eq {up}
|
|
} else {
|
|
fail "Instance #3 or #6 or #9 or #12 or #15 master link status is not up"
|
|
}
|
|
}
|
|
|
|
proc master_detected {instances} {
|
|
foreach instance [dict keys $instances] {
|
|
if {[s -$instance role] eq {master}} {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
test "New Master down consecutively" {
|
|
set instances [dict create 0 1 3 1 6 1 9 1 12 1 15 1]
|
|
|
|
set loops [expr {[dict size $instances]-1}]
|
|
for {set i 0} {$i < $loops} {incr i} {
|
|
set master_id -1
|
|
foreach instance [dict keys $instances] {
|
|
if {[s -$instance role] eq {master}} {
|
|
set master_id $instance
|
|
break;
|
|
}
|
|
}
|
|
|
|
if {$master_id eq -1} {
|
|
fail "no master detected, #loop $i"
|
|
}
|
|
|
|
set instances [dict remove $instances $master_id]
|
|
|
|
set paused_pid [srv [expr $master_id * -1] pid]
|
|
pause_process $paused_pid
|
|
wait_for_condition 1000 50 {
|
|
[master_detected $instances]
|
|
} else {
|
|
fail "No failover detected when master $master_id fails"
|
|
}
|
|
|
|
for {set j 0} {$j < [llength $::servers]} {incr j} {
|
|
if {[process_is_paused [srv -$j pid]]} continue
|
|
wait_for_condition 1000 50 {
|
|
[CI $j cluster_state] eq "ok"
|
|
} else {
|
|
fail "Cluster node $j cluster_state:[CI $j cluster_state]"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} ;# start_cluster
|