From 4aacffa32da07eb09b271c7c3dfbd58c7a2cb8d1 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 11 Nov 2024 21:42:34 +0800 Subject: [PATCH] Stabilize dual replication test to avoid getting LOADING error (#1288) When doing `$replica replicaof no one`, we may get a LOADING error, this is because during the test execution, the replica may reconnect very quickly, and the full sync is initiated, and the replica has entered the LOADING state. In this commit, we make sure the primary is pasued after the fork, so the replica won't enter the LOADING state, and with this fix, this test seems more natural and predictable. Signed-off-by: Binbin --- .../integration/dual-channel-replication.tcl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/integration/dual-channel-replication.tcl b/tests/integration/dual-channel-replication.tcl index 5302030db..05bdc130c 100644 --- a/tests/integration/dual-channel-replication.tcl +++ b/tests/integration/dual-channel-replication.tcl @@ -23,14 +23,20 @@ proc get_client_id_by_last_cmd {r cmd} { return $client_id } -# Wait until the process enters a paused state, then resume the process. -proc wait_and_resume_process idx { +# Wait until the process enters a paused state. +proc wait_process_paused idx { set pid [srv $idx pid] wait_for_condition 50 1000 { [string match "T*" [exec ps -o state= -p $pid]] } else { fail "Process $pid didn't stop, current state is [exec ps -o state= -p $pid]" } +} + +# Wait until the process enters a paused state, then resume the process. +proc wait_and_resume_process idx { + set pid [srv $idx pid] + wait_process_paused $idx resume_process $pid } @@ -790,11 +796,20 @@ start_server {tags {"dual-channel-replication external:skip"}} { } else { fail "Primary did not free repl buf block after sync failure" } + # Full sync will be triggered after the replica is reconnected, pause primary main process after fork. + # In this way, in the subsequent replicaof no one, we won't get the LOADING error if the replica reconnects + # too quickly and enters the loading state. + $primary debug pause-after-fork 1 resume_process $replica_pid set res [wait_for_log_messages -1 {"*Unable to partial resync with replica * for lack of backlog*"} $loglines 2000 10] set loglines [lindex $res 1] } + # Waiting for the primary to enter the paused state, that is, make sure that bgsave is triggered. + wait_process_paused -1 $replica replicaof no one + # Resume the primary and make sure the sync is dropped. + resume_process [srv -1 pid] + $primary debug pause-after-fork 0 wait_for_condition 500 1000 { [s -1 rdb_bgsave_in_progress] eq 0 } else {