futriix/tests/cluster/tests/04-resharding.tcl
Jacob Murphy df5db0627f
Remove trademarked language in code comments (#223)
This includes comments used for module API documentation.

* Strategy for replacement: Regex search: `(//|/\*| \*|#).* ("|\()?(r|R)edis( |\.
  |'|\n|,|-|\)|")(?!nor the names of its contributors)(?!Ltd.)(?!Labs)(?!Contributors.)`
* Don't edit copyright comments
* Replace "Redis version X.X" -> "Redis OSS version X.X" to distinguish
from newly licensed repository
* Replace "Redis Object" -> "Object"
* Exclude markdown for now
* Don't edit Lua scripting comments referring to redis.X API
* Replace "Redis Protocol" -> "RESP"
* Replace redis-benchmark, -cli, -server, -check-aof/rdb with "valkey-"
prefix
* Most other places, I use best judgement to either remove "Redis", or
replace with "the server" or "server"

Fixes #148

---------

Signed-off-by: Jacob Murphy <jkmurphy@google.com>
Signed-off-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
2024-04-09 10:24:03 +02:00

197 lines
6.5 KiB
Tcl

# Failover stress test.
# In this test a different node is killed in a loop for N
# iterations. The test checks that certain properties
# are preserved across iterations.
source "../tests/includes/init-tests.tcl"
source "../../../tests/support/cli.tcl"
test "Create a 5 nodes cluster" {
create_cluster 5 5
}
test "Cluster is up" {
assert_cluster_state ok
}
test "Enable AOF in all the instances" {
foreach_redis_id id {
R $id config set appendonly yes
# We use "appendfsync no" because it's fast but also guarantees that
# write(2) is performed before replying to client.
R $id config set appendfsync no
}
foreach_redis_id id {
wait_for_condition 1000 500 {
[RI $id aof_rewrite_in_progress] == 0 &&
[RI $id aof_enabled] == 1
} else {
fail "Failed to enable AOF on instance #$id"
}
}
}
# Return non-zero if the specified PID is about a process still in execution,
# otherwise 0 is returned.
proc process_is_running {pid} {
# PS should return with an error if PID is non existing,
# and catch will return non-zero. We want to return non-zero if
# the PID exists, so we invert the return value with expr not operator.
expr {![catch {exec ps -p $pid}]}
}
# Our resharding test performs the following actions:
#
# - N commands are sent to the cluster in the course of the test.
# - Every command selects a random key from key:0 to key:MAX-1.
# - The operation RPUSH key <randomvalue> is performed.
# - Tcl remembers into an array all the values pushed to each list.
# - After N/2 commands, the resharding process is started in background.
# - The test continues while the resharding is in progress.
# - At the end of the test, we wait for the resharding process to stop.
# - Finally the keys are checked to see if they contain the value they should.
set numkeys 50000
set numops 200000
set start_node_port [get_instance_attrib redis 0 port]
set cluster [redis_cluster 127.0.0.1:$start_node_port]
if {$::tls} {
# setup a non-TLS cluster client to the TLS cluster
set plaintext_port [get_instance_attrib redis 0 plaintext-port]
set cluster_plaintext [redis_cluster 127.0.0.1:$plaintext_port 0]
puts "Testing TLS cluster on start node 127.0.0.1:$start_node_port, plaintext port $plaintext_port"
} else {
set cluster_plaintext $cluster
puts "Testing using non-TLS cluster"
}
catch {unset content}
array set content {}
set tribpid {}
test "Cluster consistency during live resharding" {
set ele 0
for {set j 0} {$j < $numops} {incr j} {
# Trigger the resharding once we execute half the ops.
if {$tribpid ne {} &&
($j % 10000) == 0 &&
![process_is_running $tribpid]} {
set tribpid {}
}
if {$j >= $numops/2 && $tribpid eq {}} {
puts -nonewline "...Starting resharding..."
flush stdout
set target [dict get [get_myself [randomInt 5]] id]
set tribpid [lindex [exec \
../../../src/valkey-cli --cluster reshard \
127.0.0.1:[get_instance_attrib redis 0 port] \
--cluster-from all \
--cluster-to $target \
--cluster-slots 100 \
--cluster-yes \
{*}[valkeycli_tls_config "../../../tests"] \
| [info nameofexecutable] \
../tests/helpers/onlydots.tcl \
&] 0]
}
# Write random data to random list.
set listid [randomInt $numkeys]
set key "key:$listid"
incr ele
# We write both with Lua scripts and with plain commands.
# This way we are able to stress Lua -> server command invocation
# as well, that has tests to prevent Lua to write into wrong
# hash slots.
# We also use both TLS and plaintext connections.
if {$listid % 3 == 0} {
$cluster rpush $key $ele
} elseif {$listid % 3 == 1} {
$cluster_plaintext rpush $key $ele
} else {
$cluster eval {redis.call("rpush",KEYS[1],ARGV[1])} 1 $key $ele
}
lappend content($key) $ele
if {($j % 1000) == 0} {
puts -nonewline W; flush stdout
}
}
# Wait for the resharding process to end
wait_for_condition 1000 500 {
[process_is_running $tribpid] == 0
} else {
fail "Resharding is not terminating after some time."
}
}
test "Verify $numkeys keys for consistency with logical content" {
# Check that the Cluster content matches our logical content.
foreach {key value} [array get content] {
if {[$cluster lrange $key 0 -1] ne $value} {
fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
}
}
}
test "Terminate and restart all the instances" {
foreach_redis_id id {
# Stop AOF so that an initial AOFRW won't prevent the instance from terminating
R $id config set appendonly no
kill_instance redis $id
restart_instance redis $id
}
}
test "Cluster should eventually be up again" {
assert_cluster_state ok
}
test "Verify $numkeys keys after the restart" {
# Check that the Cluster content matches our logical content.
foreach {key value} [array get content] {
if {[$cluster lrange $key 0 -1] ne $value} {
fail "Key $key expected to hold '$value' but actual content is [$cluster lrange $key 0 -1]"
}
}
}
test "Disable AOF in all the instances" {
foreach_redis_id id {
R $id config set appendonly no
}
}
test "Verify slaves consistency" {
set verified_masters 0
foreach_redis_id id {
set role [R $id role]
lassign $role myrole myoffset slaves
if {$myrole eq {slave}} continue
set masterport [get_instance_attrib redis $id port]
set masterdigest [R $id debug digest]
foreach_redis_id sid {
set srole [R $sid role]
if {[lindex $srole 0] eq {master}} continue
if {[lindex $srole 2] != $masterport} continue
wait_for_condition 1000 500 {
[R $sid debug digest] eq $masterdigest
} else {
fail "Master and slave data digest are different"
}
incr verified_masters
}
}
assert {$verified_masters >= 5}
}
test "Dump sanitization was skipped for migrations" {
set verified_masters 0
foreach_redis_id id {
assert {[RI $id dump_payload_sanitizations] == 0}
}
}