Fix failing tests due to issues with wait_for_log_message (#7572)

- the test now waits for specific set of log messages rather than wait for
  timeout looking for just one message.
- we don't wanna sample the current length of the log after an action, due
  to a race, we need to start the search from the line number of the last
  message we where waiting for.
- when attempting to trigger a full sync, use multi-exec to avoid a race
  where the replica manages to re-connect before we completed the set of
  actions that should force a full sync.
- fix verify_log_message which was broken and unused
This commit is contained in:
Oran Agra 2020-07-28 11:15:29 +03:00 committed by GitHub
parent f31260b044
commit 109b5ccdcd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 38 additions and 34 deletions

View File

@ -440,30 +440,30 @@ test {diskless loading short read} {
for {set i 0} {$i < $attempts} {incr i} {
# wait for the replica to start reading the rdb
# using the log file since the replica only responds to INFO once in 2mb
wait_for_log_message -1 "*Loading DB in memory*" $loglines 2000 1
set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
set loglines [lindex $res 1]
# add some additional random sleep so that we kill the master on a different place each time
after [expr {int(rand()*100)}]
after [expr {int(rand()*50)}]
# kill the replica connection on the master
set killed [$master client kill type replica]
if {[catch {
set res [wait_for_log_message -1 "*Internal error in RDB*" $loglines 100 10]
if {$::verbose} {
puts $res
}
}]} {
puts "failed triggering short read"
set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 1000 1]
if {$::verbose} { puts $res }
set log_text [lindex $res 0]
set loglines [lindex $res 1]
if {![string match "*Internal error in RDB*" $log_text]} {
# force the replica to try another full sync
$master multi
$master client kill type replica
$master set asdf asdf
# the side effect of resizing the backlog is that it is flushed (16k is the min size)
$master config set repl-backlog-size [expr {16384 + $i}]
$master exec
}
# wait for loading to stop (fail)
set loglines [count_log_lines -1]
wait_for_condition 100 10 {
wait_for_condition 1000 1 {
[s -1 loading] eq 0
} else {
fail "Replica didn't disconnect"
@ -545,7 +545,7 @@ start_server {tags {"repl"}} {
# wait for the replicas to start reading the rdb
# using the log file since the replica only responds to INFO once in 2mb
wait_for_log_message -1 "*Loading DB in memory*" $loglines 800 10
wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 800 10
if {$measure_time} {
set master_statfile "/proc/$master_pid/stat"
@ -580,13 +580,13 @@ start_server {tags {"repl"}} {
# make sure we got what we were aiming for, by looking for the message in the log file
if {$all_drop == "all"} {
wait_for_log_message -2 "*Diskless rdb transfer, last replica dropped, killing fork child*" $loglines 1 1
wait_for_log_messages -2 {"*Diskless rdb transfer, last replica dropped, killing fork child*"} $loglines 1 1
}
if {$all_drop == "no"} {
wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 2 replicas still up*" $loglines 1 1
wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 2 replicas still up*"} $loglines 1 1
}
if {$all_drop == "slow" || $all_drop == "fast"} {
wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 1 replicas still up*" $loglines 1 1
wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
}
# make sure we don't have a busy loop going thought epoll_wait

View File

@ -106,31 +106,35 @@ proc count_log_lines {srv_idx} {
# verify pattern exists in server's sdtout after a certain line number
proc verify_log_message {srv_idx pattern from_line} {
set lines_after [count_log_lines]
set lines [expr $lines_after - $from_line]
set result [exec tail -$lines < [srv $srv_idx stdout]]
incr from_line
set result [exec tail -n +$from_line < [srv $srv_idx stdout]]
if {![string match $pattern $result]} {
error "assertion:expected message not found in log file: $pattern"
}
}
# wait for pattern to be found in server's stdout after certain line number
proc wait_for_log_message {srv_idx pattern from_line maxtries delay} {
# return value is a list containing the line that matched the pattern and the line number
proc wait_for_log_messages {srv_idx patterns from_line maxtries delay} {
set retry $maxtries
set next_line [expr $from_line + 1] ;# searching form the line after
set stdout [srv $srv_idx stdout]
while {$retry} {
set result [exec tail -n +$from_line < $stdout]
set result [exec tail -n +$next_line < $stdout]
set result [split $result "\n"]
foreach line $result {
if {[string match $pattern $line]} {
return $line
foreach pattern $patterns {
if {[string match $pattern $line]} {
return [list $line $next_line]
}
}
incr next_line
}
incr retry -1
after $delay
}
if {$retry == 0} {
fail "log message of '$pattern' not found in $stdout after line: $from_line"
fail "log message of '$patterns' not found in $stdout after line: $from_line till line: [expr $next_line -1]"
}
}

View File

@ -77,30 +77,30 @@ tags "modules" {
for {set i 0} {$i < $attempts} {incr i} {
# wait for the replica to start reading the rdb
# using the log file since the replica only responds to INFO once in 2mb
wait_for_log_message -1 "*Loading DB in memory*" $loglines 2000 1
set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
set loglines [lindex $res 1]
# add some additional random sleep so that we kill the master on a different place each time
after [expr {int(rand()*100)}]
after [expr {int(rand()*50)}]
# kill the replica connection on the master
set killed [$master client kill type replica]
if {[catch {
set res [wait_for_log_message -1 "*Internal error in RDB*" $loglines 100 10]
if {$::verbose} {
puts $res
}
}]} {
puts "failed triggering short read"
set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 1000 1]
if {$::verbose} { puts $res }
set log_text [lindex $res 0]
set loglines [lindex $res 1]
if {![string match "*Internal error in RDB*" $log_text]} {
# force the replica to try another full sync
$master multi
$master client kill type replica
$master set asdf asdf
# the side effect of resizing the backlog is that it is flushed (16k is the min size)
$master config set repl-backlog-size [expr {16384 + $i}]
$master exec
}
# wait for loading to stop (fail)
set loglines [count_log_lines -1]
wait_for_condition 100 10 {
wait_for_condition 1000 1 {
[s -1 loading] eq 0
} else {
fail "Replica didn't disconnect"