Fix failing tests due to issues with wait_for_log_message (#7572)

- the test now waits for specific set of log messages rather than wait for timeout looking for just one message. - we don't wanna sample the current length of the log after an action, due to a race, we need to start the search from the line number of the last message we where waiting for. - when attempting to trigger a full sync, use multi-exec to avoid a race where the replica manages to re-connect before we completed the set of actions that should force a full sync. - fix verify_log_message which was broken and unused
2020-07-28 11:15:29 +03:00 · 2020-07-28 11:15:29 +03:00 · 109b5ccdcd
commit 109b5ccdcd
parent f31260b044
3 changed files with 38 additions and 34 deletions
--- a/tests/integration/replication.tcl
+++ b/tests/integration/replication.tcl
@ -440,30 +440,30 @@ test {diskless loading short read} {
            for {set i 0} {$i < $attempts} {incr i} {
                # wait for the replica to start reading the rdb
                # using the log file since the replica only responds to INFO once in 2mb
-                wait_for_log_message -1 "*Loading DB in memory*" $loglines 2000 1
+                set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
+                set loglines [lindex $res 1]

                # add some additional random sleep so that we kill the master on a different place each time
-                after [expr {int(rand()*100)}]
+                after [expr {int(rand()*50)}]

                # kill the replica connection on the master
                set killed [$master client kill type replica]

-                if {[catch {
-                    set res [wait_for_log_message -1 "*Internal error in RDB*" $loglines 100 10]
-                    if {$::verbose} {
-                        puts $res
-                    }
-                }]} {
-                    puts "failed triggering short read"
+                set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 1000 1]
+                if {$::verbose} { puts $res }
+                set log_text [lindex $res 0]
+                set loglines [lindex $res 1]
+                if {![string match "*Internal error in RDB*" $log_text]} {
                    # force the replica to try another full sync
+                    $master multi
                    $master client kill type replica
                    $master set asdf asdf
                    # the side effect of resizing the backlog is that it is flushed (16k is the min size)
                    $master config set repl-backlog-size [expr {16384 + $i}]
+                    $master exec
                }
                # wait for loading to stop (fail)
-                set loglines [count_log_lines -1]
-                wait_for_condition 100 10 {
+                wait_for_condition 1000 1 {
                    [s -1 loading] eq 0
                } else {
                    fail "Replica didn't disconnect"
@ -545,7 +545,7 @@ start_server {tags {"repl"}} {

                    # wait for the replicas to start reading the rdb
                    # using the log file since the replica only responds to INFO once in 2mb
-                    wait_for_log_message -1 "*Loading DB in memory*" $loglines 800 10
+                    wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 800 10

                    if {$measure_time} {
                        set master_statfile "/proc/$master_pid/stat"
@ -580,13 +580,13 @@ start_server {tags {"repl"}} {

                    # make sure we got what we were aiming for, by looking for the message in the log file
                    if {$all_drop == "all"} {
-                        wait_for_log_message -2 "*Diskless rdb transfer, last replica dropped, killing fork child*" $loglines 1 1
+                        wait_for_log_messages -2 {"*Diskless rdb transfer, last replica dropped, killing fork child*"} $loglines 1 1
                    }
                    if {$all_drop == "no"} {
-                        wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 2 replicas still up*" $loglines 1 1
+                        wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 2 replicas still up*"} $loglines 1 1
                    }
                    if {$all_drop == "slow" || $all_drop == "fast"} {
-                        wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 1 replicas still up*" $loglines 1 1
+                        wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
                    }

                    # make sure we don't have a busy loop going thought epoll_wait
--- a/tests/support/util.tcl
+++ b/tests/support/util.tcl
@ -106,31 +106,35 @@ proc count_log_lines {srv_idx} {

 # verify pattern exists in server's sdtout after a certain line number
 proc verify_log_message {srv_idx pattern from_line} {
-    set lines_after [count_log_lines]
-    set lines [expr $lines_after - $from_line]
-    set result [exec tail -$lines < [srv $srv_idx stdout]]
+    incr from_line
+    set result [exec tail -n +$from_line < [srv $srv_idx stdout]]
    if {![string match $pattern $result]} {
        error "assertion:expected message not found in log file: $pattern"
    }
 }

 # wait for pattern to be found in server's stdout after certain line number
-proc wait_for_log_message {srv_idx pattern from_line maxtries delay} {
+# return value is a list containing the line that matched the pattern and the line number
+proc wait_for_log_messages {srv_idx patterns from_line maxtries delay} {
    set retry $maxtries
+    set next_line [expr $from_line + 1] ;# searching form the line after
    set stdout [srv $srv_idx stdout]
    while {$retry} {
-        set result [exec tail -n +$from_line < $stdout]
+        set result [exec tail -n +$next_line < $stdout]
        set result [split $result "\n"]
        foreach line $result {
-            if {[string match $pattern $line]} {
-                return $line
+            foreach pattern $patterns {
+                if {[string match $pattern $line]} {
+                    return [list $line $next_line]
+                }
            }
+            incr next_line
        }
        incr retry -1
        after $delay
    }
    if {$retry == 0} {
-        fail "log message of '$pattern' not found in $stdout after line: $from_line"
+        fail "log message of '$patterns' not found in $stdout after line: $from_line till line: [expr $next_line -1]"
    }
 }

--- a/tests/unit/moduleapi/testrdb.tcl
+++ b/tests/unit/moduleapi/testrdb.tcl
@ -77,30 +77,30 @@ tags "modules" {
                    for {set i 0} {$i < $attempts} {incr i} {
                        # wait for the replica to start reading the rdb
                        # using the log file since the replica only responds to INFO once in 2mb
-                        wait_for_log_message -1 "*Loading DB in memory*" $loglines 2000 1
+                        set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
+                        set loglines [lindex $res 1]

                        # add some additional random sleep so that we kill the master on a different place each time
-                        after [expr {int(rand()*100)}]
+                        after [expr {int(rand()*50)}]

                        # kill the replica connection on the master
                        set killed [$master client kill type replica]

-                        if {[catch {
-                            set res [wait_for_log_message -1 "*Internal error in RDB*" $loglines 100 10]
-                            if {$::verbose} {
-                                puts $res
-                            }
-                        }]} {
-                            puts "failed triggering short read"
+                        set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 1000 1]
+                        if {$::verbose} { puts $res }
+                        set log_text [lindex $res 0]
+                        set loglines [lindex $res 1]
+                        if {![string match "*Internal error in RDB*" $log_text]} {
                            # force the replica to try another full sync
+                            $master multi
                            $master client kill type replica
                            $master set asdf asdf
                            # the side effect of resizing the backlog is that it is flushed (16k is the min size)
                            $master config set repl-backlog-size [expr {16384 + $i}]
+                            $master exec
                        }
                        # wait for loading to stop (fail)
-                        set loglines [count_log_lines -1]
-                        wait_for_condition 100 10 {
+                        wait_for_condition 1000 1 {
                            [s -1 loading] eq 0
                        } else {
                            fail "Replica didn't disconnect"