Fix failing tests due to issues with wait_for_log_message (#7572)
- the test now waits for specific set of log messages rather than wait for timeout looking for just one message. - we don't wanna sample the current length of the log after an action, due to a race, we need to start the search from the line number of the last message we where waiting for. - when attempting to trigger a full sync, use multi-exec to avoid a race where the replica manages to re-connect before we completed the set of actions that should force a full sync. - fix verify_log_message which was broken and unused
This commit is contained in:
parent
198770751f
commit
06aaeabaea
@ -440,30 +440,30 @@ test {diskless loading short read} {
|
|||||||
for {set i 0} {$i < $attempts} {incr i} {
|
for {set i 0} {$i < $attempts} {incr i} {
|
||||||
# wait for the replica to start reading the rdb
|
# wait for the replica to start reading the rdb
|
||||||
# using the log file since the replica only responds to INFO once in 2mb
|
# using the log file since the replica only responds to INFO once in 2mb
|
||||||
wait_for_log_message -1 "*Loading DB in memory*" $loglines 2000 1
|
set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
|
||||||
|
set loglines [lindex $res 1]
|
||||||
|
|
||||||
# add some additional random sleep so that we kill the master on a different place each time
|
# add some additional random sleep so that we kill the master on a different place each time
|
||||||
after [expr {int(rand()*100)}]
|
after [expr {int(rand()*50)}]
|
||||||
|
|
||||||
# kill the replica connection on the master
|
# kill the replica connection on the master
|
||||||
set killed [$master client kill type replica]
|
set killed [$master client kill type replica]
|
||||||
|
|
||||||
if {[catch {
|
set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 1000 1]
|
||||||
set res [wait_for_log_message -1 "*Internal error in RDB*" $loglines 100 10]
|
if {$::verbose} { puts $res }
|
||||||
if {$::verbose} {
|
set log_text [lindex $res 0]
|
||||||
puts $res
|
set loglines [lindex $res 1]
|
||||||
}
|
if {![string match "*Internal error in RDB*" $log_text]} {
|
||||||
}]} {
|
|
||||||
puts "failed triggering short read"
|
|
||||||
# force the replica to try another full sync
|
# force the replica to try another full sync
|
||||||
|
$master multi
|
||||||
$master client kill type replica
|
$master client kill type replica
|
||||||
$master set asdf asdf
|
$master set asdf asdf
|
||||||
# the side effect of resizing the backlog is that it is flushed (16k is the min size)
|
# the side effect of resizing the backlog is that it is flushed (16k is the min size)
|
||||||
$master config set repl-backlog-size [expr {16384 + $i}]
|
$master config set repl-backlog-size [expr {16384 + $i}]
|
||||||
|
$master exec
|
||||||
}
|
}
|
||||||
# wait for loading to stop (fail)
|
# wait for loading to stop (fail)
|
||||||
set loglines [count_log_lines -1]
|
wait_for_condition 1000 1 {
|
||||||
wait_for_condition 100 10 {
|
|
||||||
[s -1 loading] eq 0
|
[s -1 loading] eq 0
|
||||||
} else {
|
} else {
|
||||||
fail "Replica didn't disconnect"
|
fail "Replica didn't disconnect"
|
||||||
@ -545,7 +545,7 @@ start_server {tags {"repl"}} {
|
|||||||
|
|
||||||
# wait for the replicas to start reading the rdb
|
# wait for the replicas to start reading the rdb
|
||||||
# using the log file since the replica only responds to INFO once in 2mb
|
# using the log file since the replica only responds to INFO once in 2mb
|
||||||
wait_for_log_message -1 "*Loading DB in memory*" $loglines 800 10
|
wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 800 10
|
||||||
|
|
||||||
if {$measure_time} {
|
if {$measure_time} {
|
||||||
set master_statfile "/proc/$master_pid/stat"
|
set master_statfile "/proc/$master_pid/stat"
|
||||||
@ -580,13 +580,13 @@ start_server {tags {"repl"}} {
|
|||||||
|
|
||||||
# make sure we got what we were aiming for, by looking for the message in the log file
|
# make sure we got what we were aiming for, by looking for the message in the log file
|
||||||
if {$all_drop == "all"} {
|
if {$all_drop == "all"} {
|
||||||
wait_for_log_message -2 "*Diskless rdb transfer, last replica dropped, killing fork child*" $loglines 1 1
|
wait_for_log_messages -2 {"*Diskless rdb transfer, last replica dropped, killing fork child*"} $loglines 1 1
|
||||||
}
|
}
|
||||||
if {$all_drop == "no"} {
|
if {$all_drop == "no"} {
|
||||||
wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 2 replicas still up*" $loglines 1 1
|
wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 2 replicas still up*"} $loglines 1 1
|
||||||
}
|
}
|
||||||
if {$all_drop == "slow" || $all_drop == "fast"} {
|
if {$all_drop == "slow" || $all_drop == "fast"} {
|
||||||
wait_for_log_message -2 "*Diskless rdb transfer, done reading from pipe, 1 replicas still up*" $loglines 1 1
|
wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
|
||||||
}
|
}
|
||||||
|
|
||||||
# make sure we don't have a busy loop going thought epoll_wait
|
# make sure we don't have a busy loop going thought epoll_wait
|
||||||
|
@ -106,31 +106,35 @@ proc count_log_lines {srv_idx} {
|
|||||||
|
|
||||||
# verify pattern exists in server's sdtout after a certain line number
|
# verify pattern exists in server's sdtout after a certain line number
|
||||||
proc verify_log_message {srv_idx pattern from_line} {
|
proc verify_log_message {srv_idx pattern from_line} {
|
||||||
set lines_after [count_log_lines]
|
incr from_line
|
||||||
set lines [expr $lines_after - $from_line]
|
set result [exec tail -n +$from_line < [srv $srv_idx stdout]]
|
||||||
set result [exec tail -$lines < [srv $srv_idx stdout]]
|
|
||||||
if {![string match $pattern $result]} {
|
if {![string match $pattern $result]} {
|
||||||
error "assertion:expected message not found in log file: $pattern"
|
error "assertion:expected message not found in log file: $pattern"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# wait for pattern to be found in server's stdout after certain line number
|
# wait for pattern to be found in server's stdout after certain line number
|
||||||
proc wait_for_log_message {srv_idx pattern from_line maxtries delay} {
|
# return value is a list containing the line that matched the pattern and the line number
|
||||||
|
proc wait_for_log_messages {srv_idx patterns from_line maxtries delay} {
|
||||||
set retry $maxtries
|
set retry $maxtries
|
||||||
|
set next_line [expr $from_line + 1] ;# searching form the line after
|
||||||
set stdout [srv $srv_idx stdout]
|
set stdout [srv $srv_idx stdout]
|
||||||
while {$retry} {
|
while {$retry} {
|
||||||
set result [exec tail -n +$from_line < $stdout]
|
set result [exec tail -n +$next_line < $stdout]
|
||||||
set result [split $result "\n"]
|
set result [split $result "\n"]
|
||||||
foreach line $result {
|
foreach line $result {
|
||||||
if {[string match $pattern $line]} {
|
foreach pattern $patterns {
|
||||||
return $line
|
if {[string match $pattern $line]} {
|
||||||
|
return [list $line $next_line]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
incr next_line
|
||||||
}
|
}
|
||||||
incr retry -1
|
incr retry -1
|
||||||
after $delay
|
after $delay
|
||||||
}
|
}
|
||||||
if {$retry == 0} {
|
if {$retry == 0} {
|
||||||
fail "log message of '$pattern' not found in $stdout after line: $from_line"
|
fail "log message of '$patterns' not found in $stdout after line: $from_line till line: [expr $next_line -1]"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,30 +77,30 @@ tags "modules" {
|
|||||||
for {set i 0} {$i < $attempts} {incr i} {
|
for {set i 0} {$i < $attempts} {incr i} {
|
||||||
# wait for the replica to start reading the rdb
|
# wait for the replica to start reading the rdb
|
||||||
# using the log file since the replica only responds to INFO once in 2mb
|
# using the log file since the replica only responds to INFO once in 2mb
|
||||||
wait_for_log_message -1 "*Loading DB in memory*" $loglines 2000 1
|
set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
|
||||||
|
set loglines [lindex $res 1]
|
||||||
|
|
||||||
# add some additional random sleep so that we kill the master on a different place each time
|
# add some additional random sleep so that we kill the master on a different place each time
|
||||||
after [expr {int(rand()*100)}]
|
after [expr {int(rand()*50)}]
|
||||||
|
|
||||||
# kill the replica connection on the master
|
# kill the replica connection on the master
|
||||||
set killed [$master client kill type replica]
|
set killed [$master client kill type replica]
|
||||||
|
|
||||||
if {[catch {
|
set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 1000 1]
|
||||||
set res [wait_for_log_message -1 "*Internal error in RDB*" $loglines 100 10]
|
if {$::verbose} { puts $res }
|
||||||
if {$::verbose} {
|
set log_text [lindex $res 0]
|
||||||
puts $res
|
set loglines [lindex $res 1]
|
||||||
}
|
if {![string match "*Internal error in RDB*" $log_text]} {
|
||||||
}]} {
|
|
||||||
puts "failed triggering short read"
|
|
||||||
# force the replica to try another full sync
|
# force the replica to try another full sync
|
||||||
|
$master multi
|
||||||
$master client kill type replica
|
$master client kill type replica
|
||||||
$master set asdf asdf
|
$master set asdf asdf
|
||||||
# the side effect of resizing the backlog is that it is flushed (16k is the min size)
|
# the side effect of resizing the backlog is that it is flushed (16k is the min size)
|
||||||
$master config set repl-backlog-size [expr {16384 + $i}]
|
$master config set repl-backlog-size [expr {16384 + $i}]
|
||||||
|
$master exec
|
||||||
}
|
}
|
||||||
# wait for loading to stop (fail)
|
# wait for loading to stop (fail)
|
||||||
set loglines [count_log_lines -1]
|
wait_for_condition 1000 1 {
|
||||||
wait_for_condition 100 10 {
|
|
||||||
[s -1 loading] eq 0
|
[s -1 loading] eq 0
|
||||||
} else {
|
} else {
|
||||||
fail "Replica didn't disconnect"
|
fail "Replica didn't disconnect"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user