Skip tracking clients OOM test when I/O threads are enabled (#764)
Fix feedback loop in key eviction with tracking clients when using I/O threads. Current issue: Evicting keys while tracking clients or key space-notification exist creates a feedback loop when using I/O threads: While evicting keys we send tracking async writes to I/O threads, preventing immediate release of tracking clients' COB memory consumption. Before the I/O thread finishes its write, we recheck used_memory, which now includes the tracking clients' COB and thus continue to evict more keys. **Fix:** We will skip the test for now while IO threads are active. We may consider avoiding sending writes in `processPendingWrites` to I/O threads for tracking clients when we are out of memory. --------- Signed-off-by: Uri Yagelnik <uriy@amazon.com> Signed-off-by: Madelyn Olson <madelyneolson@gmail.com> Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
This commit is contained in:
parent
0f551a2dca
commit
2e937773ec
4
.github/workflows/daily.yml
vendored
4
.github/workflows/daily.yml
vendored
@ -379,10 +379,10 @@ jobs:
|
||||
run: sudo apt-get install tcl8.6 tclx
|
||||
- name: test
|
||||
if: true && !contains(github.event.inputs.skiptests, 'valkey')
|
||||
run: ./runtest --config io-threads 2 --config events-per-io-thread 0 --accurate --verbose --tags network --dump-logs ${{github.event.inputs.test_args}}
|
||||
run: ./runtest --io-threads --accurate --verbose --tags network --dump-logs ${{github.event.inputs.test_args}}
|
||||
- name: cluster tests
|
||||
if: true && !contains(github.event.inputs.skiptests, 'cluster')
|
||||
run: ./runtest-cluster --config io-threads 2 --config events-per-io-thread 0 ${{github.event.inputs.cluster_test_args}}
|
||||
run: ./runtest-cluster --io-threads ${{github.event.inputs.cluster_test_args}}
|
||||
|
||||
test-ubuntu-reclaim-cache:
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -241,6 +241,11 @@ proc tags_acceptable {tags err_return} {
|
||||
return 0
|
||||
}
|
||||
|
||||
if {$::io_threads && [lsearch $tags "io-threads:skip"] >= 0} {
|
||||
set err "Not supported in io-threads mode"
|
||||
return 0
|
||||
}
|
||||
|
||||
if {$::tcl_version < 8.6 && [lsearch $tags "ipv6"] >= 0} {
|
||||
set err "TCL version is too low and does not support this"
|
||||
return 0
|
||||
@ -502,6 +507,12 @@ proc start_server {options {code undefined}} {
|
||||
dict set config "tls-ca-cert-file" [format "%s/tests/tls/ca.crt" [pwd]]
|
||||
dict set config "loglevel" "debug"
|
||||
}
|
||||
|
||||
if {$::io_threads} {
|
||||
dict set config "io-threads" 2
|
||||
dict set config "events-per-io-thread" 0
|
||||
}
|
||||
|
||||
foreach line $data {
|
||||
if {[string length $line] > 0 && [string index $line 0] ne "#"} {
|
||||
set elements [split $line " "]
|
||||
|
@ -41,6 +41,7 @@ set ::traceleaks 0
|
||||
set ::valgrind 0
|
||||
set ::durable 0
|
||||
set ::tls 0
|
||||
set ::io_threads 0
|
||||
set ::tls_module 0
|
||||
set ::stack_logging 0
|
||||
set ::verbose 0
|
||||
@ -576,6 +577,7 @@ proc print_help_screen {} {
|
||||
"--loops <count> Execute the specified set of tests several times."
|
||||
"--wait-server Wait after server is started (so that you can attach a debugger)."
|
||||
"--dump-logs Dump server log on test failure."
|
||||
"--io-threads Run tests with IO threads."
|
||||
"--tls Run tests in TLS mode."
|
||||
"--tls-module Run tests in TLS mode with Valkey module."
|
||||
"--host <addr> Run tests against an external host."
|
||||
@ -630,6 +632,8 @@ for {set j 0} {$j < [llength $argv]} {incr j} {
|
||||
}
|
||||
} elseif {$opt eq {--quiet}} {
|
||||
set ::quiet 1
|
||||
} elseif {$opt eq {--io-threads}} {
|
||||
set ::io_threads 1
|
||||
} elseif {$opt eq {--tls} || $opt eq {--tls-module}} {
|
||||
package require tls 1.6
|
||||
set ::tls 1
|
||||
|
@ -308,51 +308,49 @@ start_server {tags {"info" "external:skip"}} {
|
||||
assert_equal "count=2" [errorstat ERR]
|
||||
}
|
||||
|
||||
# skip the following 2 tests if we are running with io-threads as the eventloop metrics are different in that case.
|
||||
if {[r config get io-threads] eq 0} {
|
||||
test {stats: eventloop metrics} {
|
||||
set info1 [r info stats]
|
||||
set cycle1 [getInfoProperty $info1 eventloop_cycles]
|
||||
set el_sum1 [getInfoProperty $info1 eventloop_duration_sum]
|
||||
set cmd_sum1 [getInfoProperty $info1 eventloop_duration_cmd_sum]
|
||||
assert_morethan $cycle1 0
|
||||
assert_morethan $el_sum1 0
|
||||
assert_morethan $cmd_sum1 0
|
||||
after 110 ;# default hz is 10, wait for a cron tick.
|
||||
set info2 [r info stats]
|
||||
set cycle2 [getInfoProperty $info2 eventloop_cycles]
|
||||
set el_sum2 [getInfoProperty $info2 eventloop_duration_sum]
|
||||
set cmd_sum2 [getInfoProperty $info2 eventloop_duration_cmd_sum]
|
||||
if {$::verbose} { puts "eventloop metrics cycle1: $cycle1, cycle2: $cycle2" }
|
||||
assert_morethan $cycle2 $cycle1
|
||||
assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance
|
||||
if {$::verbose} { puts "eventloop metrics el_sum1: $el_sum1, el_sum2: $el_sum2" }
|
||||
assert_morethan $el_sum2 $el_sum1
|
||||
assert_lessthan $el_sum2 [expr $el_sum1+30000] ;# we expect roughly 100ms here, but allow some tolerance
|
||||
if {$::verbose} { puts "eventloop metrics cmd_sum1: $cmd_sum1, cmd_sum2: $cmd_sum2" }
|
||||
assert_morethan $cmd_sum2 $cmd_sum1
|
||||
assert_lessthan $cmd_sum2 [expr $cmd_sum1+15000] ;# we expect about tens of ms here, but allow some tolerance
|
||||
test {stats: eventloop metrics} {
|
||||
set info1 [r info stats]
|
||||
set cycle1 [getInfoProperty $info1 eventloop_cycles]
|
||||
set el_sum1 [getInfoProperty $info1 eventloop_duration_sum]
|
||||
set cmd_sum1 [getInfoProperty $info1 eventloop_duration_cmd_sum]
|
||||
assert_morethan $cycle1 0
|
||||
assert_morethan $el_sum1 0
|
||||
assert_morethan $cmd_sum1 0
|
||||
after 110 ;# default hz is 10, wait for a cron tick.
|
||||
set info2 [r info stats]
|
||||
set cycle2 [getInfoProperty $info2 eventloop_cycles]
|
||||
set el_sum2 [getInfoProperty $info2 eventloop_duration_sum]
|
||||
set cmd_sum2 [getInfoProperty $info2 eventloop_duration_cmd_sum]
|
||||
if {$::verbose} { puts "eventloop metrics cycle1: $cycle1, cycle2: $cycle2" }
|
||||
assert_morethan $cycle2 $cycle1
|
||||
assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance
|
||||
if {$::verbose} { puts "eventloop metrics el_sum1: $el_sum1, el_sum2: $el_sum2" }
|
||||
assert_morethan $el_sum2 $el_sum1
|
||||
assert_lessthan $el_sum2 [expr $el_sum1+30000] ;# we expect roughly 100ms here, but allow some tolerance
|
||||
if {$::verbose} { puts "eventloop metrics cmd_sum1: $cmd_sum1, cmd_sum2: $cmd_sum2" }
|
||||
assert_morethan $cmd_sum2 $cmd_sum1
|
||||
assert_lessthan $cmd_sum2 [expr $cmd_sum1+15000] ;# we expect about tens of ms here, but allow some tolerance
|
||||
} {} {io-threads:skip} ; # skip with io-threads as the eventloop metrics are different in that case.
|
||||
|
||||
test {stats: instantaneous metrics} {
|
||||
r config resetstat
|
||||
set retries 0
|
||||
for {set retries 1} {$retries < 4} {incr retries} {
|
||||
after 1600 ;# hz is 10, wait for 16 cron tick so that sample array is fulfilled
|
||||
set value [s instantaneous_eventloop_cycles_per_sec]
|
||||
if {$value > 0} break
|
||||
}
|
||||
|
||||
test {stats: instantaneous metrics} {
|
||||
r config resetstat
|
||||
set retries 0
|
||||
for {set retries 1} {$retries < 4} {incr retries} {
|
||||
after 1600 ;# hz is 10, wait for 16 cron tick so that sample array is fulfilled
|
||||
set value [s instantaneous_eventloop_cycles_per_sec]
|
||||
if {$value > 0} break
|
||||
}
|
||||
|
||||
assert_lessthan $retries 4
|
||||
if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_cycles_per_sec: $value" }
|
||||
assert_morethan $value 0
|
||||
assert_lessthan $value [expr $retries*15] ;# default hz is 10
|
||||
set value [s instantaneous_eventloop_duration_usec]
|
||||
if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_duration_usec: $value" }
|
||||
assert_morethan $value 0
|
||||
assert_lessthan $value [expr $retries*22000] ;# default hz is 10, so duration < 1000 / 10, allow some tolerance
|
||||
}
|
||||
}
|
||||
|
||||
assert_lessthan $retries 4
|
||||
if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_cycles_per_sec: $value" }
|
||||
assert_morethan $value 0
|
||||
assert_lessthan $value [expr $retries*15] ;# default hz is 10
|
||||
set value [s instantaneous_eventloop_duration_usec]
|
||||
if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_duration_usec: $value" }
|
||||
assert_morethan $value 0
|
||||
assert_lessthan $value [expr $retries*22000] ;# default hz is 10, so duration < 1000 / 10, allow some tolerance
|
||||
} {} {io-threads:skip} ; # skip with io-threads as the eventloop metrics are different in that case.
|
||||
|
||||
|
||||
test {stats: debug metrics} {
|
||||
# make sure debug info is hidden
|
||||
|
@ -456,13 +456,15 @@ start_server {tags {"maxmemory external:skip"}} {
|
||||
} {4098}
|
||||
}
|
||||
|
||||
start_server {tags {"maxmemory external:skip"}} {
|
||||
# Skip the following test when running with IO threads
|
||||
# With IO threads, we asynchronously write to tracking clients.
|
||||
# This invalidates the assumption that their output buffers will be free within the same event loop.
|
||||
start_server {tags {"maxmemory external:skip io-threads:skip"}} {
|
||||
test {client tracking don't cause eviction feedback loop} {
|
||||
r config set latency-tracking no
|
||||
r config set maxmemory 0
|
||||
r config set maxmemory-policy allkeys-lru
|
||||
r config set maxmemory-eviction-tenacity 100
|
||||
|
||||
# 10 clients listening on tracking messages
|
||||
set clients {}
|
||||
for {set j 0} {$j < 10} {incr j} {
|
||||
|
@ -404,8 +404,6 @@ run_solo {defrag} {
|
||||
r save ;# saving an rdb iterates over all the data / pointers
|
||||
} {OK}
|
||||
|
||||
# Skip the following two tests if we are running with IO threads, as the IO threads allocate the command arguments in a different arena. As a result, fragmentation is not as expected.
|
||||
if {[r config get io-threads] eq 0} {
|
||||
test "Active defrag pubsub: $type" {
|
||||
r flushdb
|
||||
r config resetstat
|
||||
@ -503,8 +501,7 @@ run_solo {defrag} {
|
||||
$rd_pubsub read
|
||||
}
|
||||
$rd_pubsub close
|
||||
}
|
||||
} ;# io-threads
|
||||
} {0} {io-threads:skip} ; # skip with io-threads as the threads may allocate the command arguments in a different arena. As a result, fragmentation is not as expected.
|
||||
|
||||
if {$type eq "standalone"} { ;# skip in cluster mode
|
||||
test "Active defrag big list: $type" {
|
||||
|
Loading…
x
Reference in New Issue
Block a user