futriix/tests/integration/shutdown.tcl
Shivshankar da831c0d22
rename procedure redis_deferring_client to valkey_deferring_client (#270)
Updated procedure redis_deferring_client in test environent to
valkey_deferring_client.

Signed-off-by: Shivshankar-Reddy <shiva.sheri.github@gmail.com>
2024-04-09 10:38:09 -04:00

235 lines
8.7 KiB
Tcl

# This test suite tests shutdown when there are lagging replicas connected.
# Fill up the OS socket send buffer for the replica connection 1M at a time.
# When the replication buffer memory increases beyond 2M (often after writing 4M
# or so), we assume it's because the OS socket send buffer can't swallow
# anymore.
proc fill_up_os_socket_send_buffer_for_repl {idx} {
set i 0
while {1} {
incr i
populate 1024 junk$i: 1024 $idx
after 10
set buf_size [s $idx mem_total_replication_buffers]
if {$buf_size > 2*1024*1024} {
break
}
}
}
foreach how {sigterm shutdown} {
test "Shutting down master waits for replica to catch up ($how)" {
start_server {overrides {save ""}} {
start_server {overrides {save ""}} {
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
set master_pid [srv -1 pid]
set replica [srv 0 client]
set replica_pid [srv 0 pid]
# Config master.
$master config set shutdown-timeout 300; # 5min for slow CI
$master config set repl-backlog-size 1; # small as possible
$master config set hz 100; # cron runs every 10ms
# Config replica.
$replica replicaof $master_host $master_port
wait_for_sync $replica
# Preparation: Set k to 1 on both master and replica.
$master set k 1
wait_for_ofs_sync $master $replica
# Pause the replica.
pause_process $replica_pid
# Fill up the OS socket send buffer for the replica connection
# to prevent the following INCR from reaching the replica via
# the OS.
fill_up_os_socket_send_buffer_for_repl -1
# Incr k and immediately shutdown master.
$master incr k
switch $how {
sigterm {
exec kill -SIGTERM $master_pid
}
shutdown {
set rd [valkey_deferring_client -1]
$rd shutdown
}
}
wait_for_condition 50 100 {
[s -1 shutdown_in_milliseconds] > 0
} else {
fail "Master not indicating ongoing shutdown."
}
# Wake up replica and check if master has waited for it.
after 20; # 2 cron intervals
resume_process $replica_pid
wait_for_condition 300 1000 {
[$replica get k] eq 2
} else {
fail "Master exited before replica could catch up."
}
# Check shutdown log messages on master
wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 500
assert_equal 0 [count_log_message -1 "*Lagging replica*"]
verify_log_message -1 "*1 of 1 replicas are in sync*" 0
}
}
} {} {repl external:skip}
}
test {Shutting down master waits for replica timeout} {
start_server {overrides {save ""}} {
start_server {overrides {save ""}} {
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
set master_pid [srv -1 pid]
set replica [srv 0 client]
set replica_pid [srv 0 pid]
# Config master.
$master config set shutdown-timeout 1; # second
# Config replica.
$replica replicaof $master_host $master_port
wait_for_sync $replica
# Preparation: Set k to 1 on both master and replica.
$master set k 1
wait_for_ofs_sync $master $replica
# Pause the replica.
pause_process $replica_pid
# Fill up the OS socket send buffer for the replica connection to
# prevent the following INCR k from reaching the replica via the OS.
fill_up_os_socket_send_buffer_for_repl -1
# Incr k and immediately shutdown master.
$master incr k
exec kill -SIGTERM $master_pid
wait_for_condition 50 100 {
[s -1 shutdown_in_milliseconds] > 0
} else {
fail "Master not indicating ongoing shutdown."
}
# Let master finish shutting down and check log.
wait_for_log_messages -1 {"*ready to exit, bye bye*"} 0 100 100
verify_log_message -1 "*Lagging replica*" 0
verify_log_message -1 "*0 of 1 replicas are in sync*" 0
# Wake up replica.
resume_process $replica_pid
assert_equal 1 [$replica get k]
}
}
} {} {repl external:skip}
test "Shutting down master waits for replica then fails" {
start_server {overrides {save ""}} {
start_server {overrides {save ""}} {
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
set master_pid [srv -1 pid]
set replica [srv 0 client]
set replica_pid [srv 0 pid]
# Config master and replica.
$replica replicaof $master_host $master_port
wait_for_sync $replica
# Pause the replica and write a key on master.
pause_process $replica_pid
$master incr k
# Two clients call blocking SHUTDOWN in parallel.
set rd1 [valkey_deferring_client -1]
set rd2 [valkey_deferring_client -1]
$rd1 shutdown
$rd2 shutdown
set info_clients [$master info clients]
assert_match "*connected_clients:3*" $info_clients
assert_match "*blocked_clients:2*" $info_clients
# Start a very slow initial AOFRW, which will prevent shutdown.
$master config set rdb-key-save-delay 30000000; # 30 seconds
$master config set appendonly yes
# Wake up replica, causing master to continue shutting down.
resume_process $replica_pid
# SHUTDOWN returns an error to both clients blocking on SHUTDOWN.
catch { $rd1 read } e1
catch { $rd2 read } e2
assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1
assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2
$rd1 close
$rd2 close
# Check shutdown log messages on master.
verify_log_message -1 "*1 of 1 replicas are in sync*" 0
verify_log_message -1 "*Writing initial AOF, can't exit*" 0
verify_log_message -1 "*Errors trying to shut down*" 0
# Let master to exit fast, without waiting for the very slow AOFRW.
catch {$master shutdown nosave force}
}
}
} {} {repl external:skip}
test "Shutting down master waits for replica then aborted" {
start_server {overrides {save ""}} {
start_server {overrides {save ""}} {
set master [srv -1 client]
set master_host [srv -1 host]
set master_port [srv -1 port]
set master_pid [srv -1 pid]
set replica [srv 0 client]
set replica_pid [srv 0 pid]
# Config master and replica.
$replica replicaof $master_host $master_port
wait_for_sync $replica
# Pause the replica and write a key on master.
pause_process $replica_pid
$master incr k
# Two clients call blocking SHUTDOWN in parallel.
set rd1 [valkey_deferring_client -1]
set rd2 [valkey_deferring_client -1]
$rd1 shutdown
$rd2 shutdown
set info_clients [$master info clients]
assert_match "*connected_clients:3*" $info_clients
assert_match "*blocked_clients:2*" $info_clients
# Abort the shutdown
$master shutdown abort
# Wake up replica, causing master to continue shutting down.
resume_process $replica_pid
# SHUTDOWN returns an error to both clients blocking on SHUTDOWN.
catch { $rd1 read } e1
catch { $rd2 read } e2
assert_match "*Errors trying to SHUTDOWN. Check logs*" $e1
assert_match "*Errors trying to SHUTDOWN. Check logs*" $e2
$rd1 close
$rd2 close
# Check shutdown log messages on master.
verify_log_message -1 "*Shutdown manually aborted*" 0
}
}
} {} {repl external:skip}