
In #11012, we changed the way command durations were computed to handle the same command being executed multiple times. In #11970, we added an assert if the duration is not properly reset, potentially indicating that a call to report statistics was missed. I found an edge case where this happens - easily reproduced by blocking a client on `XGROUPREAD` and migrating the stream's slot. This causes the engine to process the `XGROUPREAD` command twice: 1. First time, we are blocked on the stream, so we wait for unblock to come back to it a second time. In most cases, when we come back to process the command second time after unblock, we process the command normally, which includes recording the duration and then resetting it. 2. After unblocking we come back to process the command, and this is where we hit the edge case - at this point, we had already migrated the slot to another node, so we return a `MOVED` response. But when we do that, we don’t reset the duration field. Fix: also reset the duration when returning a `MOVED` response. I think this is right, because the client should redirect the command to the right node, which in turn will calculate the execution duration. Also wrote a test which reproduces this, it fails without the fix and passes with it. --------- Signed-off-by: Nitai Caro <caronita@amazon.com> Co-authored-by: Nitai Caro <caronita@amazon.com> Signed-off-by: Ping Xie <pingxie@google.com>
47 lines
1.7 KiB
Tcl
47 lines
1.7 KiB
Tcl
start_cluster 2 2 {tags {external:skip cluster}} {
|
|
test {Key lazy expires during key migration} {
|
|
R 0 DEBUG SET-ACTIVE-EXPIRE 0
|
|
|
|
set key_slot [R 0 CLUSTER KEYSLOT FOO]
|
|
R 0 set FOO BAR PX 10
|
|
set src_id [R 0 CLUSTER MYID]
|
|
set trg_id [R 1 CLUSTER MYID]
|
|
R 0 CLUSTER SETSLOT $key_slot MIGRATING $trg_id
|
|
R 1 CLUSTER SETSLOT $key_slot IMPORTING $src_id
|
|
after 11
|
|
assert_error {ASK*} {R 0 GET FOO}
|
|
R 0 ping
|
|
} {PONG}
|
|
|
|
test "Coverage: Basic cluster commands" {
|
|
assert_equal {OK} [R 0 CLUSTER saveconfig]
|
|
|
|
set id [R 0 CLUSTER MYID]
|
|
assert_equal {0} [R 0 CLUSTER count-failure-reports $id]
|
|
|
|
R 0 flushall
|
|
assert_equal {OK} [R 0 CLUSTER flushslots]
|
|
}
|
|
}
|
|
|
|
start_cluster 2 0 {tags {external:skip cluster regression} overrides {cluster-allow-replica-migration no cluster-node-timeout 1000} } {
|
|
# Issue #563 regression test
|
|
test "Client blocked on XREADGROUP while stream's slot is migrated" {
|
|
set stream_name aga
|
|
set slot 609
|
|
|
|
# Start a deferring client to simulate a blocked client on XREADGROUP
|
|
R 0 XGROUP CREATE $stream_name mygroup $ MKSTREAM
|
|
set rd [redis_deferring_client]
|
|
$rd xreadgroup GROUP mygroup consumer BLOCK 0 streams $stream_name >
|
|
wait_for_blocked_client
|
|
|
|
# Migrate the slot to the target node
|
|
R 0 CLUSTER SETSLOT $slot MIGRATING [dict get [cluster_get_myself 1] id]
|
|
R 1 CLUSTER SETSLOT $slot IMPORTING [dict get [cluster_get_myself 0] id]
|
|
|
|
# This line should cause the crash
|
|
R 0 MIGRATE 127.0.0.1 [lindex [R 1 CONFIG GET port] 1] $stream_name 0 5000
|
|
}
|
|
}
|