Fix failing tests due to issues with wait_for_log_message (#7572)

- the test now waits for specific set of log messages rather than wait for
  timeout looking for just one message.
- we don't wanna sample the current length of the log after an action, due
  to a race, we need to start the search from the line number of the last
  message we where waiting for.
- when attempting to trigger a full sync, use multi-exec to avoid a race
  where the replica manages to re-connect before we completed the set of
  actions that should force a full sync.
- fix verify_log_message which was broken and unused
This commit is contained in:
Oran Agra 2020-07-28 11:15:29 +03:00 committed by GitHub
parent 198770751f
commit 06aaeabaea
3 changed files with 38 additions and 34 deletions

View File

@ -440,30 +440,30 @@ test {diskless loading short read} {
for {set i 0} {$i < $attempts} {incr i} { for {set i 0} {$i < $attempts} {incr i} {
# wait for the replica to start reading the rdb # wait for the replica to start reading the rdb
# using the log file since the replica only responds to INFO once in 2mb # using the log file since the replica only responds to INFO once in 2mb
wait_for_log_message -1 "*Loading DB in memory*" $loglines 2000 1 set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
set loglines [lindex $res 1]
# add some additional random sleep so that we kill the master on a different place each time # add some additional random sleep so that we kill the master on a different place each time
after [expr {int(rand()*100)}] after [expr {int(rand()*50)}]
# kill the replica connection on the master # kill the replica connection on the master
set killed [$master client kill type replica] set killed [$master client kill type replica]
if {[catch { set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 1000 1]
set res [wait_for_log_message -1 "*Internal error in RDB*" $loglines 100 10] if {$::verbose} { puts $res }
if {$::verbose} { set log_text [lindex $res 0]
puts $res set loglines [lindex $res 1]
} if {![string match "*Internal error in RDB*" $log_text]} {
}]} {
puts "failed triggering short read"
# force the replica to try another full sync # force the replica to try another full sync
$master multi
$master client kill type replica $master client kill type replica
$master set asdf asdf $master set asdf asdf
# the side effect of resizing the backlog is that it is flushed (16k is the min size) # the side effect of resizing the backlog is that it is flushed (16k is the min size)
$master config set repl-backlog-size [expr {16384 + $i}] $master config set repl-backlog-size [expr {16384 + $i}]
$master exec
} }
# wait for loading to stop (fail) # wait for loading to stop (fail)
set loglines [count_log_lines -1] wait_for_condition 1000 1 {
wait_for_condition 100 10 {
[s -1 loading] eq 0 [s -1 loading] eq 0
} else { } else {
fail "Replica didn't disconnect" fail "Replica didn't disconnect"
@ -545,7 +545,7 @@ start_server {tags {"repl"}} {
# wait for the replicas to start reading the rdb # wait for the replicas to start reading the rdb
# using the log file since the replica only responds to INFO once in 2mb # using the log file since the replica only responds to INFO once in 2mb
wait_for_log_message -1 "*Loading DB in memory*" $loglines 800 10 wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 800 10
if {$measure_time} { if {$measure_time} {
set master_statfile "/proc/$master_pid/stat" set master_statfile "/proc/$master_pid/stat"
@ -580,13 +580,13 @@ start_server {tags {"repl"}} {
# make sure we got what we were aiming for, by looking for the message in the log file # make sure we got what we were aiming for, by looking for the message in the log file
if {$all_drop == "all"} { if {$all_drop == "all"} {
wait_for_log_message -2 "*Diskless rdb transfer, last replica dropped, killing fork child*" $loglines 1 1 wait_for_log_messages -2 {"*Diskless rdb transfer, last replica dropped, killing fork child*"} $loglines 1 1
} }
if {$all_drop == "no"} { if {$all_drop == "no"} {
wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 2 replicas still up*" $loglines 1 1 wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 2 replicas still up*"} $loglines 1 1
} }
if {$all_drop == "slow" || $all_drop == "fast"} { if {$all_drop == "slow" || $all_drop == "fast"} {
wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 1 replicas still up*" $loglines 1 1 wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
} }
# make sure we don't have a busy loop going thought epoll_wait # make sure we don't have a busy loop going thought epoll_wait

View File

@ -106,31 +106,35 @@ proc count_log_lines {srv_idx} {
# verify pattern exists in server's sdtout after a certain line number # verify pattern exists in server's sdtout after a certain line number
proc verify_log_message {srv_idx pattern from_line} { proc verify_log_message {srv_idx pattern from_line} {
set lines_after [count_log_lines] incr from_line
set lines [expr $lines_after - $from_line] set result [exec tail -n +$from_line < [srv $srv_idx stdout]]
set result [exec tail -$lines < [srv $srv_idx stdout]]
if {![string match $pattern $result]} { if {![string match $pattern $result]} {
error "assertion:expected message not found in log file: $pattern" error "assertion:expected message not found in log file: $pattern"
} }
} }
# wait for pattern to be found in server's stdout after certain line number # wait for pattern to be found in server's stdout after certain line number
proc wait_for_log_message {srv_idx pattern from_line maxtries delay} { # return value is a list containing the line that matched the pattern and the line number
proc wait_for_log_messages {srv_idx patterns from_line maxtries delay} {
set retry $maxtries set retry $maxtries
set next_line [expr $from_line + 1] ;# searching form the line after
set stdout [srv $srv_idx stdout] set stdout [srv $srv_idx stdout]
while {$retry} { while {$retry} {
set result [exec tail -n +$from_line < $stdout] set result [exec tail -n +$next_line < $stdout]
set result [split $result "\n"] set result [split $result "\n"]
foreach line $result { foreach line $result {
if {[string match $pattern $line]} { foreach pattern $patterns {
return $line if {[string match $pattern $line]} {
return [list $line $next_line]
}
} }
incr next_line
} }
incr retry -1 incr retry -1
after $delay after $delay
} }
if {$retry == 0} { if {$retry == 0} {
fail "log message of '$pattern' not found in $stdout after line: $from_line" fail "log message of '$patterns' not found in $stdout after line: $from_line till line: [expr $next_line -1]"
} }
} }

View File

@ -77,30 +77,30 @@ tags "modules" {
for {set i 0} {$i < $attempts} {incr i} { for {set i 0} {$i < $attempts} {incr i} {
# wait for the replica to start reading the rdb # wait for the replica to start reading the rdb
# using the log file since the replica only responds to INFO once in 2mb # using the log file since the replica only responds to INFO once in 2mb
wait_for_log_message -1 "*Loading DB in memory*" $loglines 2000 1 set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
set loglines [lindex $res 1]
# add some additional random sleep so that we kill the master on a different place each time # add some additional random sleep so that we kill the master on a different place each time
after [expr {int(rand()*100)}] after [expr {int(rand()*50)}]
# kill the replica connection on the master # kill the replica connection on the master
set killed [$master client kill type replica] set killed [$master client kill type replica]
if {[catch { set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 1000 1]
set res [wait_for_log_message -1 "*Internal error in RDB*" $loglines 100 10] if {$::verbose} { puts $res }
if {$::verbose} { set log_text [lindex $res 0]
puts $res set loglines [lindex $res 1]
} if {![string match "*Internal error in RDB*" $log_text]} {
}]} {
puts "failed triggering short read"
# force the replica to try another full sync # force the replica to try another full sync
$master multi
$master client kill type replica $master client kill type replica
$master set asdf asdf $master set asdf asdf
# the side effect of resizing the backlog is that it is flushed (16k is the min size) # the side effect of resizing the backlog is that it is flushed (16k is the min size)
$master config set repl-backlog-size [expr {16384 + $i}] $master config set repl-backlog-size [expr {16384 + $i}]
$master exec
} }
# wait for loading to stop (fail) # wait for loading to stop (fail)
set loglines [count_log_lines -1] wait_for_condition 1000 1 {
wait_for_condition 100 10 {
[s -1 loading] eq 0 [s -1 loading] eq 0
} else { } else {
fail "Replica didn't disconnect" fail "Replica didn't disconnect"