From da8f7428fae4ccffa5e07d869f65fe7413898f78 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 29 May 2023 18:43:26 +0800 Subject: [PATCH] Try to fix SENTINEL SIMULATE-FAILURE test by re-source init-tests before each test (#12194) This test was introduced in #12079, it works well most of the time, but occasionally fails: ``` 00:34:45> SENTINEL SIMULATE-FAILURE crash-after-election works: OK 00:34:45> SENTINEL SIMULATE-FAILURE crash-after-promotion works: FAILED: Sentinel set crash-after-promotion but did not exit ``` Don't know the reason, it may be affected by the exit of the previous crash-after-election test. Because it doesn't really make much sense to go deeper into it now, we re-source init-tests to get a clean environment before each test, to try to fix this. After applying this change, we found a new error: ``` 16:39:33> SENTINEL SIMULATE-FAILURE crash-after-election works: FAILED: caught an error in the test couldn't open socket: connection refused couldn't open socket: connection refused ``` I am guessing the sentinel triggers failover and exits before SENTINEL FAILOVER, added a new || condition in wait_for_condition to fix it. --- tests/sentinel/tests/05-manual.tcl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/sentinel/tests/05-manual.tcl b/tests/sentinel/tests/05-manual.tcl index 7e050b0dc..95e8d41e4 100644 --- a/tests/sentinel/tests/05-manual.tcl +++ b/tests/sentinel/tests/05-manual.tcl @@ -63,15 +63,21 @@ test "The old master eventually gets reconfigured as a slave" { } foreach flag {crash-after-election crash-after-promotion} { + # Before each SIMULATE-FAILURE test, re-source init-tests to get a clean environment + source "../tests/includes/init-tests.tcl" + test "SENTINEL SIMULATE-FAILURE $flag works" { assert_equal {OK} [S 0 SENTINEL SIMULATE-FAILURE $flag] # Trigger a failover, failover will trigger leader election, replica promotion + # Sentinel may enter failover and exit before the command, catch it and allow it wait_for_condition 300 50 { [catch {S 0 SENTINEL FAILOVER mymaster}] == 0 + || + ([catch {S 0 SENTINEL FAILOVER mymaster} reply] == 1 && + [string match {*couldn't open socket: connection refused*} $reply]) } else { catch {S 0 SENTINEL FAILOVER mymaster} reply - puts [S 0 SENTINEL REPLICAS mymaster] fail "Sentinel manual failover did not work, got: $reply" }