
Might close https://github.com/valkey-io/valkey/issues/1484. I noticed that we don't disable pause after fork on the last test that was getting executed, so it might getting stuck in pause loops after the test ends if it tries another psync for any reason. --------- Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
1319 lines
56 KiB
Tcl
1319 lines
56 KiB
Tcl
proc log_file_matches {log pattern} {
|
|
set fp [open $log r]
|
|
set content [read $fp]
|
|
close $fp
|
|
string match $pattern $content
|
|
}
|
|
|
|
proc get_client_id_by_last_cmd {r cmd} {
|
|
set client_list [$r client list]
|
|
set client_id ""
|
|
set lines [split $client_list "\n"]
|
|
foreach line $lines {
|
|
if {[string match *cmd=$cmd* $line]} {
|
|
set parts [split $line " "]
|
|
foreach part $parts {
|
|
if {[string match id=* $part]} {
|
|
set client_id [lindex [split $part "="] 1]
|
|
return $client_id
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return $client_id
|
|
}
|
|
|
|
# Wait until the process enters a paused state.
|
|
proc wait_process_paused idx {
|
|
set pid [srv $idx pid]
|
|
wait_for_condition 50 1000 {
|
|
[string match "T*" [exec ps -o state= -p $pid]]
|
|
} else {
|
|
fail "Process $pid didn't stop, current state is [exec ps -o state= -p $pid]"
|
|
}
|
|
}
|
|
|
|
# Wait until the process enters a paused state, then resume the process.
|
|
proc wait_and_resume_process idx {
|
|
set pid [srv $idx pid]
|
|
wait_process_paused $idx
|
|
resume_process $pid
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
$replica config set loglevel debug
|
|
start_server {} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
|
|
# Configure the primary in order to hang waiting for the BGSAVE
|
|
# operation, so that the replica remains in the handshake state.
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set repl-diskless-sync-delay 1000
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set loglevel debug
|
|
|
|
# Start the replication process...
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica replicaof $primary_host $primary_port
|
|
|
|
test "Test dual-channel-replication-enabled replica enters handshake" {
|
|
wait_for_condition 50 1000 {
|
|
[string match *handshake* [$replica role]]
|
|
} else {
|
|
fail "Replica does not enter handshake state"
|
|
}
|
|
}
|
|
|
|
test "Test dual-channel-replication-enabled enters wait_bgsave" {
|
|
wait_for_condition 50 1000 {
|
|
[string match *state=wait_bgsave* [$primary info replication]]
|
|
} else {
|
|
fail "Replica does not enter wait_bgsave state"
|
|
}
|
|
}
|
|
|
|
$primary config set repl-diskless-sync-delay 0
|
|
|
|
test "Test dual-channel-replication-enabled replica is able to sync" {
|
|
verify_replica_online $primary 0 500
|
|
wait_for_condition 50 1000 {
|
|
[string match *connected_slaves:1* [$primary info]]
|
|
} else {
|
|
fail "Replica rdb connection is still open"
|
|
}
|
|
set offset [status $primary master_repl_offset]
|
|
wait_for_condition 500 100 {
|
|
[string match "*slave0:*,offset=$offset,*" [$primary info replication]] &&
|
|
$offset == [status $replica master_repl_offset]
|
|
} else {
|
|
fail "Replicas and primary offsets were unable to match."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
start_server {} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
|
|
$primary config set rdb-key-save-delay 200
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set repl-diskless-sync-delay 0
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica config set repl-diskless-sync no
|
|
|
|
populate 1000 primary 10000
|
|
set load_handle1 [start_one_key_write_load $primary_host $primary_port 100 "mykey1"]
|
|
set load_handle2 [start_one_key_write_load $primary_host $primary_port 100 "mykey2"]
|
|
set load_handle3 [start_one_key_write_load $primary_host $primary_port 100 "mykey3"]
|
|
|
|
# wait for load handlers to start
|
|
wait_for_condition 50 1000 {
|
|
([$primary get "mykey1"] != "") &&
|
|
([$primary get "mykey2"] != "") &&
|
|
([$primary get "mykey3"] != "")
|
|
} else {
|
|
fail "Can't set new keys"
|
|
}
|
|
|
|
set before_used [s 0 used_memory]
|
|
|
|
test "Primary memory usage does not increase during dual-channel-replication sync" {
|
|
$replica replicaof $primary_host $primary_port
|
|
|
|
# Verify used_memory stays low through all the sync
|
|
set max_retry 500
|
|
while {$max_retry} {
|
|
# Verify memory
|
|
set used_memory [s 0 used_memory]
|
|
assert {$used_memory-$before_used <= 1.5*10^6}; # ~1/3 of the space
|
|
# Check replica state
|
|
set primary_info [$primary info]
|
|
set replica_info [$replica info]
|
|
if {[string match *slave0:*state=online* $primary_info] &&
|
|
[string match *master_link_status:up* $replica_info]} {
|
|
break
|
|
} else {
|
|
incr max_retry -1
|
|
after 10
|
|
}
|
|
}
|
|
if {$max_retry == 0} {
|
|
error "assertion:Replica not in sync after 5 seconds"
|
|
}
|
|
}
|
|
stop_write_load $load_handle1
|
|
stop_write_load $load_handle2
|
|
stop_write_load $load_handle3
|
|
|
|
test "Steady state after dual channel sync" {
|
|
wait_for_condition 50 1000 {
|
|
([$replica get "mykey1"] eq [$primary get mykey1]) &&
|
|
([$replica get "mykey2"] eq [$primary get mykey2]) &&
|
|
([$replica get "mykey3"] eq [$primary get mykey3])
|
|
} else {
|
|
fail "Can't set new keys"
|
|
}
|
|
}
|
|
|
|
test "Dual channel replication sync doesn't impair subsequent normal syncs" {
|
|
$replica replicaof no one
|
|
$replica config set dual-channel-replication-enabled no
|
|
$primary set newkey newval
|
|
|
|
set sync_full [s 0 sync_full]
|
|
set sync_partial [s 0 sync_partial_ok]
|
|
|
|
$replica replicaof $primary_host $primary_port
|
|
verify_replica_online $primary 0 500
|
|
# Verify replica used normal sync this time
|
|
assert_equal [expr $sync_full + 1] [s 0 sync_full]
|
|
assert_equal [expr $sync_partial] [s 0 sync_partial_ok]
|
|
assert [string match *connected_slaves:1* [$primary info]]
|
|
}
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
start_server {} {
|
|
foreach enable {yes no} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
# Set primary shared replication buffer size to a bit more then the size of
|
|
# a replication buffer block.
|
|
$primary config set client-output-buffer-limit "replica 1100k 0 0"
|
|
$primary config set dual-channel-replication-enabled $enable
|
|
$primary config set repl-diskless-sync-delay 0
|
|
$replica config set dual-channel-replication-enabled $enable
|
|
|
|
test "Toggle dual-channel-replication-enabled: $enable start" {
|
|
populate 1000 primary 10000
|
|
$primary config set rdb-key-save-delay 0
|
|
set prev_sync_full [s 0 sync_full]
|
|
set prev_sync_partial [s 0 sync_partial_ok]
|
|
|
|
$replica replicaof $primary_host $primary_port
|
|
verify_replica_online $primary 0 500
|
|
wait_for_sync $replica
|
|
|
|
|
|
set cur_sync_full [s 0 sync_full]
|
|
set cur_sync_partial [s 0 sync_partial_ok]
|
|
if {$enable == "yes"} {
|
|
# Verify that dual channel replication sync was used
|
|
assert {$cur_sync_full == [expr $prev_sync_full + 1]}
|
|
assert {$cur_sync_partial == [expr $prev_sync_partial + 1]}
|
|
} else {
|
|
# Verify that normal sync was used
|
|
assert {[s 0 sync_full] == [expr $prev_sync_full + 1]}
|
|
assert {[s 0 sync_partial_ok] == $prev_sync_partial}
|
|
}
|
|
|
|
$replica replicaof no one
|
|
if {$enable == "yes"} {
|
|
# disable dual channel sync
|
|
$replica config set dual-channel-replication-enabled no
|
|
$primary config set dual-channel-replication-enabled no
|
|
} else {
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
}
|
|
|
|
# Force replica to full sync next time
|
|
populate 1000 primary 10000
|
|
set prev_sync_full [s 0 sync_full]
|
|
set prev_sync_partial [s 0 sync_partial_ok]
|
|
|
|
$replica replicaof $primary_host $primary_port
|
|
verify_replica_online $primary 0 500
|
|
wait_for_sync $replica
|
|
|
|
set cur_sync_full [s 0 sync_full]
|
|
set cur_sync_partial [s 0 sync_partial_ok]
|
|
if {$enable == "yes"} {
|
|
# Verify that normal sync was used
|
|
assert {$cur_sync_full == [expr $prev_sync_full + 1]}
|
|
assert {$cur_sync_partial == $prev_sync_partial}
|
|
} else {
|
|
# Verify that dual channel replication sync was used
|
|
assert {$cur_sync_full == [expr $prev_sync_full + 1]}
|
|
assert {$cur_sync_partial == [expr $prev_sync_partial + 1]}
|
|
}
|
|
$replica replicaof no one
|
|
}
|
|
|
|
foreach test_instance {primary replica} {
|
|
$primary config set dual-channel-replication-enabled $enable
|
|
$replica config set dual-channel-replication-enabled $enable
|
|
test "Online toggle dual-channel-replication-enabled on $test_instance, starting with '$enable'" {
|
|
populate 1000 primary 10000
|
|
$primary config set rdb-key-save-delay 100000
|
|
|
|
$replica replicaof $primary_host $primary_port
|
|
# wait for sync to start
|
|
if {$enable == "yes"} {
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] &&
|
|
[string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] &&
|
|
[s 0 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't start a dual-channel sync session in time"
|
|
}
|
|
} else {
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=wait_bgsave*,type=replica*" [$primary info replication]] &&
|
|
[s 0 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't start a normal full sync session in time"
|
|
}
|
|
}
|
|
# Toggle config
|
|
set new_value "yes"
|
|
if {$enable == "yes"} {
|
|
set new_value "no"
|
|
}
|
|
set instance $primary
|
|
if {$test_instance == "replica"} {
|
|
set instance $replica
|
|
}
|
|
$instance config set dual-channel-replication-enabled $new_value
|
|
# Wait for at least one server cron
|
|
after 1000
|
|
|
|
if {$enable == "yes"} {
|
|
# Verify that dual channel replication sync is still in progress
|
|
assert [string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]]
|
|
assert [string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]]
|
|
assert {[s 0 rdb_bgsave_in_progress] eq 1}
|
|
} else {
|
|
# Verify that normal sync is still in progress
|
|
assert [string match "*slave*,state=wait_bgsave*,type=replica*" [$primary info replication]]
|
|
assert {[s 0 rdb_bgsave_in_progress] eq 1}
|
|
}
|
|
$replica replicaof no one
|
|
wait_for_condition 500 1000 {
|
|
[s -1 rdb_bgsave_in_progress] eq 0
|
|
} else {
|
|
fail "Primary should abort sync"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set replica1 [srv 0 client]
|
|
set replica1_host [srv 0 host]
|
|
set replica1_port [srv 0 port]
|
|
set replica1_log [srv 0 stdout]
|
|
start_server {} {
|
|
set replica2 [srv 0 client]
|
|
set replica2_host [srv 0 host]
|
|
set replica2_port [srv 0 port]
|
|
set replica2_log [srv 0 stdout]
|
|
start_server {} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
set loglines [count_log_lines -1]
|
|
|
|
populate 10000 primary 10
|
|
$primary set key1 val1
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set repl-diskless-sync-delay 5; # allow both replicas to ask for sync
|
|
$primary config set dual-channel-replication-enabled yes
|
|
|
|
$replica1 config set dual-channel-replication-enabled yes
|
|
$replica2 config set dual-channel-replication-enabled yes
|
|
$replica1 config set repl-diskless-sync no
|
|
$replica2 config set repl-diskless-sync no
|
|
$replica1 config set loglevel debug
|
|
$replica2 config set loglevel debug
|
|
|
|
test "dual-channel-replication with multiple replicas" {
|
|
$replica1 replicaof $primary_host $primary_port
|
|
$replica2 replicaof $primary_host $primary_port
|
|
verify_replica_online $primary 0 500
|
|
verify_replica_online $primary 1 500
|
|
|
|
wait_for_value_to_propagate_to_replica $primary $replica1 "key1"
|
|
wait_for_value_to_propagate_to_replica $primary $replica2 "key1"
|
|
|
|
assert {[s 0 total_forks] eq "1" }
|
|
}
|
|
|
|
$replica1 replicaof no one
|
|
$replica2 replicaof no one
|
|
|
|
$replica1 config set dual-channel-replication-enabled yes
|
|
$replica2 config set dual-channel-replication-enabled no
|
|
|
|
$primary set key2 val2
|
|
|
|
test "Test diverse replica sync: dual-channel on/off" {
|
|
$replica1 replicaof $primary_host $primary_port
|
|
$replica2 replicaof $primary_host $primary_port
|
|
verify_replica_online $primary 0 500
|
|
verify_replica_online $primary 1 500
|
|
wait_for_value_to_propagate_to_replica $primary $replica1 "key2"
|
|
wait_for_value_to_propagate_to_replica $primary $replica2 "key2"
|
|
wait_for_condition 50 1000 {
|
|
[status $replica1 master_link_status] == "up"
|
|
} else {
|
|
fail "Replica is not synced"
|
|
}
|
|
}
|
|
|
|
$replica1 replicaof no one
|
|
|
|
test "Test replica's buffer limit reached" {
|
|
$primary config set repl-diskless-sync-delay 0
|
|
$primary config set rdb-key-save-delay 10000
|
|
# At this point we have about 10k keys in the db,
|
|
# We expect that the next full sync will take 100 seconds (10k*10000)ms
|
|
# It will give us enough time to fill the replica buffer.
|
|
$replica1 config set dual-channel-replication-enabled yes
|
|
$replica1 config set client-output-buffer-limit "replica 16383 16383 0"
|
|
|
|
$replica1 replicaof $primary_host $primary_port
|
|
# Wait for replica to establish psync using main channel
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]]
|
|
} else {
|
|
fail "replica didn't start sync session in time"
|
|
}
|
|
|
|
populate 10000 primary 10; # set ~ 100kb
|
|
# Wait for replica's buffer limit reached
|
|
wait_for_condition 50 1000 {
|
|
[log_file_matches $replica1_log "*Replication buffer limit reached, stopping buffering*"]
|
|
} else {
|
|
fail "Replica buffer should fill"
|
|
}
|
|
assert {[s -2 replicas_replication_buffer_size] <= 16385*2}
|
|
|
|
# Primary replication buffer should grow
|
|
wait_for_condition 50 1000 {
|
|
[status $primary mem_total_replication_buffers] >= 81915
|
|
} else {
|
|
fail "Primary should take the load"
|
|
}
|
|
}
|
|
|
|
$replica1 replicaof no one
|
|
$replica1 config set client-output-buffer-limit "replica 256mb 256mb 0"; # remove repl buffer limitation
|
|
$primary config set rdb-key-save-delay 0
|
|
|
|
wait_for_condition 500 1000 {
|
|
[s 0 rdb_bgsave_in_progress] eq 0
|
|
} else {
|
|
fail "can't kill rdb child"
|
|
}
|
|
|
|
$primary set key3 val3
|
|
|
|
test "dual-channel-replication fails when primary diskless disabled" {
|
|
set cur_psync [status $primary sync_partial_ok]
|
|
$primary config set repl-diskless-sync no
|
|
|
|
$replica1 config set dual-channel-replication-enabled yes
|
|
$replica1 replicaof $primary_host $primary_port
|
|
|
|
# Wait for mitigation and resync
|
|
wait_for_condition 50 1000 {
|
|
[status $replica1 master_link_status] == "up"
|
|
} else {
|
|
fail "Replica is not synced"
|
|
}
|
|
wait_for_value_to_propagate_to_replica $primary $replica1 "key3"
|
|
|
|
# Verify that we did not use dual-channel-replication sync
|
|
assert {[status $primary sync_partial_ok] == $cur_psync}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
start_server {} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
# Create small enough db to be loaded before replica establish psync connection
|
|
$primary set key1 val1
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary debug pause-after-fork 1
|
|
$primary config set dual-channel-replication-enabled yes
|
|
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica config set loglevel debug
|
|
|
|
test "Test dual-channel-replication sync- psync established after rdb load" {
|
|
$replica replicaof $primary_host $primary_port
|
|
wait_for_log_messages -1 {"*Done loading RDB*"} 0 2000 1
|
|
wait_and_resume_process 0
|
|
|
|
verify_replica_online $primary 0 500
|
|
wait_for_condition 50 1000 {
|
|
[status $replica master_link_status] == "up"
|
|
} else {
|
|
fail "Replica is not synced"
|
|
}
|
|
wait_for_value_to_propagate_to_replica $primary $replica "key1"
|
|
# Confirm the occurrence of a race condition.
|
|
wait_for_log_messages -1 {"*Dual channel replication: Psync established after rdb load*"} 0 2000 1
|
|
}
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
start_server {} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
set backlog_size [expr {10 ** 5}]
|
|
set loglines [count_log_lines -1]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set repl-backlog-size $backlog_size
|
|
$primary config set loglevel debug
|
|
$primary config set repl-diskless-sync-delay 0
|
|
if {$::valgrind} {
|
|
$primary config set repl-timeout 100
|
|
$replica config set repl-timeout 100
|
|
} else {
|
|
$primary config set repl-timeout 10
|
|
$replica config set repl-timeout 10
|
|
}
|
|
$primary config set rdb-key-save-delay 200
|
|
populate 10000 primary 10000
|
|
|
|
set load_handle1 [start_one_key_write_load $primary_host $primary_port 100 "mykey1"]
|
|
set load_handle2 [start_one_key_write_load $primary_host $primary_port 100 "mykey2"]
|
|
set load_handle3 [start_one_key_write_load $primary_host $primary_port 100 "mykey3"]
|
|
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica config set loglevel debug
|
|
|
|
# Pause replica after primary fork
|
|
$replica debug pause-after-fork 1
|
|
|
|
test "dual-channel-replication: Primary COB growth with inactive replica" {
|
|
$replica replicaof $primary_host $primary_port
|
|
# Verify repl backlog can grow
|
|
wait_for_condition 1000 10 {
|
|
[s 0 mem_total_replication_buffers] > [expr {2 * $backlog_size}]
|
|
} else {
|
|
fail "Primary should allow backlog to grow beyond its limits during dual-channel-replication sync handshake"
|
|
}
|
|
wait_and_resume_process -1
|
|
|
|
verify_replica_online $primary 0 500
|
|
wait_for_condition 50 1000 {
|
|
[status $replica master_link_status] == "up"
|
|
} else {
|
|
fail "Replica is not synced"
|
|
}
|
|
}
|
|
|
|
stop_write_load $load_handle1
|
|
stop_write_load $load_handle2
|
|
stop_write_load $load_handle3
|
|
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set replica1 [srv 0 client]
|
|
set replica1_host [srv 0 host]
|
|
set replica1_port [srv 0 port]
|
|
set replica1_log [srv 0 stdout]
|
|
start_server {} {
|
|
set replica2 [srv 0 client]
|
|
set replica2_host [srv 0 host]
|
|
set replica2_port [srv 0 port]
|
|
set replica2_log [srv 0 stdout]
|
|
start_server {} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
set backlog_size [expr {10 ** 6}]
|
|
set loglines [count_log_lines -1]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set repl-backlog-size $backlog_size
|
|
$primary config set loglevel debug
|
|
$primary config set repl-timeout 10
|
|
$primary config set rdb-key-save-delay 10
|
|
populate 1024 primary 16
|
|
|
|
set load_handle0 [start_write_load $primary_host $primary_port 20]
|
|
|
|
$replica1 config set dual-channel-replication-enabled yes
|
|
$replica2 config set dual-channel-replication-enabled yes
|
|
$replica1 config set loglevel debug
|
|
$replica2 config set loglevel debug
|
|
$replica1 config set repl-timeout 10
|
|
$replica2 config set repl-timeout 10
|
|
|
|
# Pause replicas after primary forks for
|
|
$replica1 debug pause-after-fork 1
|
|
$replica2 debug pause-after-fork 1
|
|
test "Test dual-channel: primary tracking replica backlog refcount - start with empty backlog" {
|
|
$replica1 replicaof $primary_host $primary_port
|
|
set res [wait_for_log_messages 0 {"*Add rdb replica * no repl-backlog to track*"} $loglines 2000 1]
|
|
set res [wait_for_log_messages 0 {"*Attach replica rdb client*"} $loglines 2000 1]
|
|
set loglines [lindex $res 1]
|
|
incr $loglines
|
|
wait_and_resume_process -2
|
|
verify_replica_online $primary 0 700
|
|
wait_for_condition 50 1000 {
|
|
[status $replica1 master_link_status] == "up"
|
|
} else {
|
|
fail "Replica is not synced"
|
|
}
|
|
$replica1 replicaof no one
|
|
assert [string match *replicas_waiting_psync:0* [$primary info replication]]
|
|
}
|
|
|
|
test "Test dual-channel: primary tracking replica backlog refcount - start with backlog" {
|
|
$replica2 replicaof $primary_host $primary_port
|
|
set res [wait_for_log_messages 0 {"*Add rdb replica * tracking repl-backlog tail*"} $loglines 2000 1]
|
|
set loglines [lindex $res 1]
|
|
incr $loglines
|
|
wait_and_resume_process -1
|
|
verify_replica_online $primary 0 700
|
|
wait_for_condition 50 1000 {
|
|
[status $replica2 master_link_status] == "up"
|
|
} else {
|
|
fail "Replica is not synced"
|
|
}
|
|
assert [string match *replicas_waiting_psync:0* [$primary info replication]]
|
|
}
|
|
|
|
stop_write_load $load_handle0
|
|
}
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set repl-backlog-size [expr {10 ** 6}]
|
|
$primary config set loglevel debug
|
|
$primary config set repl-timeout 10
|
|
# generate small db
|
|
populate 10 primary 10
|
|
# Pause primary main process after fork
|
|
$primary debug pause-after-fork 1
|
|
# Give replica two second grace period before disconnection
|
|
$primary debug delay-rdb-client-free-seconds 2
|
|
|
|
start_server {} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
set replica_pid [srv 0 pid]
|
|
set loglines [count_log_lines 0]
|
|
|
|
set load_handle0 [start_write_load $primary_host $primary_port 20]
|
|
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica config set loglevel debug
|
|
$replica config set repl-timeout 10
|
|
|
|
test "Psync established after rdb load - within grace period" {
|
|
# Test Sequence:
|
|
# 1. Replica initiates synchronization via RDB channel.
|
|
# 2. Primary's main process is suspended.
|
|
# 3. Replica completes RDB loading and pauses before establishing PSYNC connection.
|
|
# 4. Primary resumes operation and detects closed RDB channel.
|
|
# 5. Replica resumes operation.
|
|
# Expected outcome: Primary maintains RDB channel until replica establishes PSYNC connection.
|
|
$replica replicaof $primary_host $primary_port
|
|
wait_for_log_messages 0 {"*Done loading RDB*"} $loglines 2000 1
|
|
pause_process $replica_pid
|
|
wait_and_resume_process -1
|
|
wait_for_condition 50 100 {
|
|
[string match {*replicas_waiting_psync:1*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary freed RDB client before psync was established"
|
|
}
|
|
resume_process $replica_pid
|
|
|
|
verify_replica_online $primary 0 500
|
|
wait_for_condition 50 100 {
|
|
[string match {*replicas_waiting_psync:0*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary did not free repl buf block after psync establishment"
|
|
}
|
|
$replica replicaof no one
|
|
}
|
|
stop_write_load $load_handle0
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set repl-backlog-size [expr {10 ** 6}]
|
|
$primary config set loglevel debug
|
|
$primary config set repl-timeout 10
|
|
# generate small db
|
|
populate 10 primary 10
|
|
# Pause primary main process after fork
|
|
$primary debug pause-after-fork 1
|
|
# Give replica two second grace period before disconnection
|
|
$primary debug delay-rdb-client-free-seconds 2
|
|
|
|
start_server {} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
set replica_pid [srv 0 pid]
|
|
set loglines [count_log_lines 0]
|
|
|
|
set load_handle0 [start_write_load $primary_host $primary_port 20]
|
|
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica config set loglevel debug
|
|
$replica config set repl-timeout 10
|
|
|
|
test "Psync established after RDB load - beyond grace period" {
|
|
# Test Sequence:
|
|
# 1. Replica initiates synchronization via RDB channel.
|
|
# 2. Primary's main process is suspended.
|
|
# 3. Replica completes RDB loading and pauses before establishing PSYNC connection.
|
|
# 4. Primary resumes operation and detects closed RDB channel.
|
|
# Expected outcome: Primary drops the RDB channel after grace period is done.
|
|
$replica replicaof $primary_host $primary_port
|
|
wait_for_log_messages 0 {"*Done loading RDB*"} $loglines 2000 1
|
|
pause_process $replica_pid
|
|
wait_and_resume_process -1
|
|
wait_for_condition 50 100 {
|
|
[string match {*replicas_waiting_psync:1*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary should wait before freeing repl block"
|
|
}
|
|
|
|
# Sync should fail once the replica ask for PSYNC using main channel
|
|
set res [wait_for_log_messages -1 {"*Replica main channel failed to establish PSYNC within the grace period*"} 0 4000 1]
|
|
wait_for_condition 50 100 {
|
|
[string match {*replicas_waiting_psync:0*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary did not free waiting psync replica after grace period"
|
|
}
|
|
resume_process $replica_pid
|
|
}
|
|
stop_write_load $load_handle0
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
set loglines [count_log_lines 0]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set client-output-buffer-limit "replica 1100k 0 0"
|
|
$primary config set loglevel debug
|
|
# generate small db
|
|
populate 10 primary 10
|
|
start_server {} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
set replica_pid [srv 0 pid]
|
|
|
|
set load_handle0 [start_write_load $primary_host $primary_port 60]
|
|
set load_handle1 [start_write_load $primary_host $primary_port 60]
|
|
set load_handle2 [start_write_load $primary_host $primary_port 60]
|
|
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica config set loglevel debug
|
|
$replica config set repl-timeout 60
|
|
$primary config set repl-backlog-size 1mb
|
|
|
|
test "Test dual-channel-replication primary gets cob overrun before established psync" {
|
|
# Pause primary main process after fork
|
|
$primary debug pause-after-fork 1
|
|
$replica replicaof $primary_host $primary_port
|
|
wait_for_log_messages 0 {"*Done loading RDB*"} 0 1000 10
|
|
|
|
# At this point rdb is loaded but psync hasn't been established yet.
|
|
# Pause the replica so the primary main process will wake up while the
|
|
# replica is unresponsive. We expect the main process to fill the COB and disconnect the replica.
|
|
pause_process $replica_pid
|
|
wait_and_resume_process -1
|
|
$primary debug pause-after-fork 0
|
|
wait_for_log_messages -1 {"*Client * closed * for overcoming of output buffer limits.*"} $loglines 1000 10
|
|
wait_for_condition 50 100 {
|
|
[string match {*replicas_waiting_psync:0*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary did not free repl buf block after sync failure"
|
|
}
|
|
# Full sync will be triggered after the replica is reconnected, pause primary main process after fork.
|
|
# In this way, in the subsequent replicaof no one, we won't get the LOADING error if the replica reconnects
|
|
# too quickly and enters the loading state.
|
|
$primary debug pause-after-fork 1
|
|
resume_process $replica_pid
|
|
set res [wait_for_log_messages -1 {"*Unable to partial resync with replica * for lack of backlog*"} $loglines 2000 10]
|
|
set loglines [lindex $res 1]
|
|
}
|
|
# Waiting for the primary to enter the paused state, that is, make sure that bgsave is triggered.
|
|
wait_process_paused -1
|
|
$replica replicaof no one
|
|
# Resume the primary and make sure the sync is dropped.
|
|
resume_process [srv -1 pid]
|
|
$primary debug pause-after-fork 0
|
|
wait_for_condition 500 1000 {
|
|
[s -1 rdb_bgsave_in_progress] eq 0
|
|
} else {
|
|
fail "Primary should abort sync"
|
|
}
|
|
stop_write_load $load_handle0
|
|
stop_write_load $load_handle1
|
|
stop_write_load $load_handle2
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
set loglines [count_log_lines 0]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set client-output-buffer-limit "replica 1100k 0 0"
|
|
$primary config set loglevel debug
|
|
start_server {} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
set replica_pid [srv 0 pid]
|
|
|
|
set load_handle0 [start_write_load $primary_host $primary_port 60]
|
|
set load_handle1 [start_write_load $primary_host $primary_port 60]
|
|
set load_handle2 [start_write_load $primary_host $primary_port 60]
|
|
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica config set loglevel debug
|
|
$replica config set repl-timeout 60
|
|
$primary config set repl-backlog-size 1mb
|
|
|
|
$primary debug populate 1000 primary 100000
|
|
# Set primary with a slow rdb generation, so that we can easily intercept loading
|
|
# 10ms per key, with 1000 keys is 10 seconds
|
|
$primary config set rdb-key-save-delay 10000
|
|
|
|
test "Test dual-channel-replication primary gets cob overrun during replica rdb load" {
|
|
set cur_client_closed_count [s -1 client_output_buffer_limit_disconnections]
|
|
$replica debug pause-after-fork 1
|
|
$replica replicaof $primary_host $primary_port
|
|
wait_for_condition 500 1000 {
|
|
[s -1 client_output_buffer_limit_disconnections] > $cur_client_closed_count
|
|
} else {
|
|
fail "Primary should disconnect replica due to COB overrun"
|
|
}
|
|
|
|
wait_for_condition 50 100 {
|
|
[string match {*replicas_waiting_psync:0*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary did not free repl buf block after sync failure"
|
|
}
|
|
|
|
# Increase the delay to make sure the replica doesn't start another sync
|
|
# after it resumes after the first one.
|
|
$primary config set repl-diskless-sync-delay 100
|
|
wait_and_resume_process 0
|
|
$replica debug pause-after-fork 0
|
|
set res [wait_for_log_messages -1 {"*Unable to partial resync with replica * for lack of backlog*"} $loglines 20000 1]
|
|
set loglines [lindex $res 0]
|
|
}
|
|
stop_write_load $load_handle0
|
|
stop_write_load $load_handle1
|
|
stop_write_load $load_handle2
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
set loglines [count_log_lines 0]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set loglevel debug
|
|
$primary config set repl-diskless-sync-delay 5
|
|
$primary config set client-output-buffer-limit "replica 0 0 0"
|
|
|
|
# Generating RDB will cost 5s(10000 * 0.0005s)
|
|
$primary debug populate 10000 primary 1
|
|
$primary config set rdb-key-save-delay 500
|
|
$primary config set dual-channel-replication-enabled yes
|
|
|
|
start_server {} {
|
|
set replica1 [srv 0 client]
|
|
$replica1 config set dual-channel-replication-enabled yes
|
|
$replica1 config set loglevel debug
|
|
start_server {} {
|
|
set replica2 [srv 0 client]
|
|
$replica2 config set dual-channel-replication-enabled yes
|
|
$replica2 config set loglevel debug
|
|
$replica2 config set repl-timeout 60
|
|
|
|
set load_handle [start_one_key_write_load $primary_host $primary_port 100 "mykey1"]
|
|
test "Sync should continue if not all slaves dropped" {
|
|
$replica1 replicaof $primary_host $primary_port
|
|
$replica2 replicaof $primary_host $primary_port
|
|
|
|
wait_for_condition 50 1000 {
|
|
[status $primary rdb_bgsave_in_progress] == 1
|
|
} else {
|
|
fail "Sync did not start"
|
|
}
|
|
# Wait for both replicas main conns to establish psync
|
|
wait_for_condition 50 1000 {
|
|
[status $primary sync_partial_ok] == 2
|
|
} else {
|
|
fail "Replicas main conns didn't establish psync [status $primary sync_partial_ok]"
|
|
}
|
|
catch {$replica1 shutdown nosave}
|
|
wait_for_condition 50 2000 {
|
|
[status $replica2 master_link_status] == "up" &&
|
|
[status $primary sync_full] == 2 &&
|
|
([status $primary sync_partial_ok] == 2)
|
|
} else {
|
|
fail "Sync session interapted\n
|
|
sync_full:[status $primary sync_full]\n
|
|
sync_partial_ok:[status $primary sync_partial_ok]"
|
|
}
|
|
}
|
|
|
|
$replica2 replicaof no one
|
|
|
|
# Generating RDB will cost 500s(1000000 * 0.0001s)
|
|
$primary debug populate 1000000 primary 1
|
|
$primary config set rdb-key-save-delay 100
|
|
|
|
test "Primary abort sync if all slaves dropped dual-channel-replication" {
|
|
set cur_psync [status $primary sync_partial_ok]
|
|
$replica2 replicaof $primary_host $primary_port
|
|
|
|
wait_for_condition 50 1000 {
|
|
[status $primary rdb_bgsave_in_progress] == 1
|
|
} else {
|
|
fail "Sync did not start"
|
|
}
|
|
# Wait for both replicas main conns to establish psync
|
|
wait_for_condition 50 1000 {
|
|
[status $primary sync_partial_ok] == $cur_psync + 1
|
|
} else {
|
|
fail "Replicas main conns didn't establish psync [status $primary sync_partial_ok]"
|
|
}
|
|
|
|
catch {$replica2 shutdown nosave}
|
|
wait_for_condition 50 1000 {
|
|
[status $primary rdb_bgsave_in_progress] == 0
|
|
} else {
|
|
fail "Primary should abort the sync"
|
|
}
|
|
}
|
|
stop_write_load $load_handle
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
set loglines [count_log_lines 0]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set loglevel debug
|
|
$primary config set repl-diskless-sync-delay 0
|
|
# Generating RDB will cost 500s(1000000 * 0.0001s)
|
|
$primary debug populate 1000000 primary 1
|
|
$primary config set rdb-key-save-delay 100
|
|
|
|
start_server {} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
|
|
set load_handle [start_write_load $primary_host $primary_port 20]
|
|
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica config set loglevel debug
|
|
$replica config set repl-timeout 10
|
|
test "Test dual-channel-replication replica main channel disconnected" {
|
|
$replica replicaof $primary_host $primary_port
|
|
# Wait for sync session to start
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] &&
|
|
[string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] &&
|
|
[s -1 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't start sync session in time"
|
|
}
|
|
|
|
$primary debug log "killing replica main connection"
|
|
set replica_main_conn_id [get_client_id_by_last_cmd $primary "psync"]
|
|
assert {$replica_main_conn_id != ""}
|
|
set loglines [count_log_lines -1]
|
|
$primary config set repl-diskless-sync-delay 5; # allow catch failed sync before retry
|
|
$primary client kill id $replica_main_conn_id
|
|
# Wait for primary to abort the sync
|
|
wait_for_condition 50 1000 {
|
|
[string match {*replicas_waiting_psync:0*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary did not free repl buf block after sync failure"
|
|
}
|
|
wait_for_log_messages -1 {"*Background RDB transfer error*"} $loglines 1000 10
|
|
}
|
|
|
|
test "Test dual channel replication slave of no one after main conn kill" {
|
|
$replica replicaof no one
|
|
wait_for_condition 500 1000 {
|
|
[s -1 rdb_bgsave_in_progress] eq 0
|
|
} else {
|
|
fail "Primary should abort sync"
|
|
}
|
|
}
|
|
|
|
test "Test dual-channel-replication replica rdb connection disconnected" {
|
|
$primary config set repl-diskless-sync-delay 0
|
|
$replica replicaof $primary_host $primary_port
|
|
# Wait for sync session to start
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] &&
|
|
[string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] &&
|
|
[s -1 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't start sync session in time"
|
|
}
|
|
|
|
set replica_rdb_channel_id [get_client_id_by_last_cmd $primary "sync"]
|
|
$primary debug log "killing replica rdb connection $replica_rdb_channel_id"
|
|
assert {$replica_rdb_channel_id != ""}
|
|
set loglines [count_log_lines -1]
|
|
$primary config set repl-diskless-sync-delay 5; # allow catch failed sync before retry
|
|
$primary client kill id $replica_rdb_channel_id
|
|
# Wait for primary to abort the sync
|
|
wait_for_log_messages -1 {"*Background RDB transfer error*"} $loglines 1000 10
|
|
}
|
|
|
|
test "Test dual channel replication slave of no one after rdb conn kill" {
|
|
$replica replicaof no one
|
|
wait_for_condition 500 1000 {
|
|
[s -1 rdb_bgsave_in_progress] eq 0
|
|
} else {
|
|
fail "Primary should abort sync"
|
|
}
|
|
}
|
|
|
|
test "Test dual-channel-replication primary reject set-rdb-client after client killed" {
|
|
$primary config set repl-diskless-sync-delay 0
|
|
# Ensure replica main channel will not handshake before rdb client is killed
|
|
$replica debug pause-after-fork 1
|
|
$replica replicaof $primary_host $primary_port
|
|
# Wait for sync session to start
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] &&
|
|
[s -1 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't start sync session in time"
|
|
}
|
|
|
|
set replica_rdb_channel_id [get_client_id_by_last_cmd $primary "sync"]
|
|
assert {$replica_rdb_channel_id != ""}
|
|
$primary debug log "killing replica rdb connection $replica_rdb_channel_id"
|
|
$primary config set repl-diskless-sync-delay 5; # allow catch failed sync before retry
|
|
$primary client kill id $replica_rdb_channel_id
|
|
# Wait for primary to abort the sync
|
|
wait_and_resume_process 0
|
|
wait_for_condition 10000000 10 {
|
|
[s -1 rdb_bgsave_in_progress] eq 0 &&
|
|
[string match {*replicas_waiting_psync:0*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary should abort sync"
|
|
}
|
|
# Verify primary reject replconf set-rdb-client-id
|
|
set res [catch {$primary replconf set-rdb-client-id $replica_rdb_channel_id} err]
|
|
assert [string match *ERR* $err]
|
|
}
|
|
stop_write_load $load_handle
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
set loglines [count_log_lines 0]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set loglevel debug
|
|
$primary config set repl-diskless-sync-delay 0; # don't wait for other replicas
|
|
|
|
# Generating RDB will cost 100s
|
|
$primary debug populate 10000 primary 1
|
|
$primary config set rdb-key-save-delay 10000
|
|
|
|
start_server {} {
|
|
set replica_1 [srv 0 client]
|
|
set replica_host_1 [srv 0 host]
|
|
set replica_port_1 [srv 0 port]
|
|
set replica_log_1 [srv 0 stdout]
|
|
|
|
$replica_1 config set dual-channel-replication-enabled yes
|
|
$replica_1 config set loglevel debug
|
|
$replica_1 config set repl-timeout 10
|
|
start_server {} {
|
|
set replica_2 [srv 0 client]
|
|
set replica_host_2 [srv 0 host]
|
|
set replica_port_2 [srv 0 port]
|
|
set replica_log_2 [srv 0 stdout]
|
|
|
|
set load_handle [start_write_load $primary_host $primary_port 20]
|
|
|
|
$replica_2 config set dual-channel-replication-enabled yes
|
|
$replica_2 config set loglevel debug
|
|
$replica_2 config set repl-timeout 10
|
|
test "Test replica unable to join dual channel replication sync after started" {
|
|
$replica_1 replicaof $primary_host $primary_port
|
|
# Wait for sync session to start
|
|
wait_for_condition 50 100 {
|
|
[s -2 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't start sync session in time1"
|
|
}
|
|
$replica_2 replicaof $primary_host $primary_port
|
|
wait_for_log_messages -2 {"*Current BGSAVE has socket target. Waiting for next BGSAVE for SYNC*"} $loglines 100 1000
|
|
}
|
|
stop_write_load $load_handle
|
|
}
|
|
}
|
|
}
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
set loglines [count_log_lines 0]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set loglevel debug
|
|
$primary config set repl-diskless-sync-delay 0
|
|
|
|
# Generating RDB will cost 100 sec to generate
|
|
$primary debug populate 100000 primary 1
|
|
$primary config set rdb-key-save-delay 1000
|
|
|
|
start_server {} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica config set loglevel debug
|
|
$replica config set repl-timeout 10
|
|
set load_handle [start_one_key_write_load $primary_host $primary_port 100 "mykey"]
|
|
test "Replica recover rdb-connection killed" {
|
|
$replica replicaof $primary_host $primary_port
|
|
# Wait for sync session to start
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] &&
|
|
[string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] &&
|
|
[s -1 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't start sync session in time"
|
|
}
|
|
|
|
$primary debug log "killing replica rdb connection"
|
|
set replica_rdb_channel_id [get_client_id_by_last_cmd $primary "sync"]
|
|
assert {$replica_rdb_channel_id != ""}
|
|
set loglines [count_log_lines -1]
|
|
$primary config set repl-diskless-sync-delay 5; # allow catch failed sync before retry
|
|
$primary client kill id $replica_rdb_channel_id
|
|
# Wait for primary to abort the sync
|
|
wait_for_condition 50 1000 {
|
|
[string match {*replicas_waiting_psync:0*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary did not free repl buf block after sync failure"
|
|
}
|
|
$primary config set repl-diskless-sync-delay 0
|
|
wait_for_log_messages -1 {"*Background RDB transfer error*"} $loglines 1000 10
|
|
# Replica should retry
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] &&
|
|
[string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] &&
|
|
[s -1 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't retry after connection close"
|
|
}
|
|
}
|
|
$replica replicaof no one
|
|
wait_for_condition 500 1000 {
|
|
[s -1 rdb_bgsave_in_progress] eq 0
|
|
} else {
|
|
fail "Primary should abort sync"
|
|
}
|
|
test "Replica recover main-connection killed" {
|
|
$replica replicaof $primary_host $primary_port
|
|
# Wait for sync session to start
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] &&
|
|
[string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] &&
|
|
[s -1 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't start sync session in time"
|
|
}
|
|
$primary debug log "killing replica main connection"
|
|
set replica_main_conn_id [get_client_id_by_last_cmd $primary "psync"]
|
|
assert {$replica_main_conn_id != ""}
|
|
set loglines [count_log_lines -1]
|
|
$primary config set repl-diskless-sync-delay 5; # allow catch failed sync before retry
|
|
$primary client kill id $replica_main_conn_id
|
|
# Wait for primary to abort the sync
|
|
wait_for_condition 50 1000 {
|
|
[string match {*replicas_waiting_psync:0*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary did not free repl buf block after sync failure"
|
|
}
|
|
$primary config set repl-diskless-sync-delay 0
|
|
wait_for_log_messages -1 {"*Background RDB transfer error*"} $loglines 1000 10
|
|
# Replica should retry
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] &&
|
|
[string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] &&
|
|
[s -1 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't retry after connection close"
|
|
}
|
|
}
|
|
stop_write_load $load_handle
|
|
}
|
|
}
|
|
|
|
|
|
start_server {tags {"dual-channel-replication external:skip"}} {
|
|
set primary [srv 0 client]
|
|
set primary_host [srv 0 host]
|
|
set primary_port [srv 0 port]
|
|
|
|
$primary config set repl-diskless-sync yes
|
|
$primary config set dual-channel-replication-enabled yes
|
|
$primary config set repl-diskless-sync-delay 0
|
|
|
|
# Generating RDB will take 100 sec to generate
|
|
$primary debug populate 1000000 primary 1
|
|
$primary config set rdb-key-save-delay -10
|
|
|
|
start_server {} {
|
|
set replica [srv 0 client]
|
|
set replica_host [srv 0 host]
|
|
set replica_port [srv 0 port]
|
|
set replica_log [srv 0 stdout]
|
|
|
|
$replica config set dual-channel-replication-enabled yes
|
|
$replica config set loglevel debug
|
|
$replica config set repl-diskless-load flush-before-load
|
|
|
|
if {$::valgrind} {
|
|
$primary config set repl-timeout 100
|
|
$replica config set repl-timeout 100
|
|
set max_tries 5000
|
|
} else {
|
|
$primary config set repl-timeout 10
|
|
$replica config set repl-timeout 10
|
|
set max_tries 500
|
|
}
|
|
|
|
test "Replica notice main-connection killed during rdb load callback" {; # https://github.com/valkey-io/valkey/issues/1152
|
|
set loglines [count_log_lines 0]
|
|
$replica replicaof $primary_host $primary_port
|
|
# Wait for sync session to start
|
|
wait_for_condition 500 1000 {
|
|
[string match "*slave*,state=wait_bgsave*,type=rdb-channel*" [$primary info replication]] &&
|
|
[string match "*slave*,state=bg_transfer*,type=main-channel*" [$primary info replication]] &&
|
|
[s -1 rdb_bgsave_in_progress] eq 1
|
|
} else {
|
|
fail "replica didn't start sync session in time"
|
|
}
|
|
wait_for_log_messages 0 {"*Loading RDB produced by Valkey version*"} $loglines 1000 10
|
|
$primary set key val
|
|
set replica_main_conn_id [get_client_id_by_last_cmd $primary "psync"]
|
|
$primary config set repl-diskless-sync-delay 5; # allow catch failed sync before retry
|
|
$primary debug log "killing replica main connection $replica_main_conn_id"
|
|
assert {$replica_main_conn_id != ""}
|
|
set loglines [count_log_lines 0]
|
|
$primary config set rdb-key-save-delay 0; # disable delay to allow next sync to succeed
|
|
$primary client kill id $replica_main_conn_id
|
|
# Wait for primary to abort the sync
|
|
wait_for_condition 50 1000 {
|
|
[string match {*replicas_waiting_psync:0*} [$primary info replication]]
|
|
} else {
|
|
fail "Primary did not free repl buf block after sync failure"
|
|
}
|
|
wait_for_log_messages 0 {"*Failed trying to load the PRIMARY synchronization DB from socket*"} $loglines $max_tries 10
|
|
verify_replica_online $primary 0 $max_tries
|
|
}
|
|
}
|
|
}
|