From 8226f39fb200a6a2d6f57d027ffc55319fb85d92 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Sat, 5 Aug 2023 14:52:03 +0800 Subject: [PATCH] do not call handleClientsBlockedOnKeys inside yielding command (#12459) Fix the assertion when a busy script (timeout) signal ready keys (like LPUSH), and then an arbitrary client's `allow-busy` command steps into `handleClientsBlockedOnKeys` try wake up clients blocked on keys (like BLPOP). Reproduction process: 1. start a redis with aof `./redis-server --appendonly yes` 2. exec blpop `127.0.0.1:6379> blpop a 0` 3. use another client call a busy script and this script push the blocked key `127.0.0.1:6379> eval "redis.call('lpush','a','b') while(1) do end" 0` 4. user a new client call an allow-busy command like auth `127.0.0.1:6379> auth a` BTW, this issue also break the atomicity of script. This bug has been around for many years, the old versions only have the atomic problem, only 7.0/7.2 has the assertion problem. Co-authored-by: Oran Agra --- src/server.c | 2 +- tests/unit/scripting.tcl | 47 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index 838695b88..438325f28 100644 --- a/src/server.c +++ b/src/server.c @@ -4158,7 +4158,7 @@ int processCommand(client *c) { int flags = CMD_CALL_FULL; if (client_reprocessing_command) flags |= CMD_CALL_REPROCESSING; call(c,flags); - if (listLength(server.ready_keys)) + if (listLength(server.ready_keys) && !isInsideYieldingLongCommand()) handleClientsBlockedOnKeys(); } diff --git a/tests/unit/scripting.tcl b/tests/unit/scripting.tcl index cedf2b4f7..c2f79a742 100644 --- a/tests/unit/scripting.tcl +++ b/tests/unit/scripting.tcl @@ -1119,6 +1119,53 @@ start_server {tags {"scripting"}} { r ping } {PONG} + test {Timedout scripts and unblocked command} { + # make sure a command that's allowed during BUSY doesn't trigger an unblocked command + + # enable AOF to also expose an assertion if the bug would happen + r flushall + r config set appendonly yes + + # create clients, and set one to block waiting for key 'x' + set rd [redis_deferring_client] + set rd2 [redis_deferring_client] + set r3 [redis_client] + $rd2 blpop x 0 + wait_for_blocked_clients_count 1 + + # hack: allow the script to use client list command so that we can control when it aborts + r DEBUG set-disable-deny-scripts 1 + r config set lua-time-limit 10 + run_script_on_connection $rd { + local clients + redis.call('lpush',KEYS[1],'y'); + while true do + clients = redis.call('client','list') + if string.find(clients, 'abortscript') ~= nil then break end + end + redis.call('lpush',KEYS[1],'z'); + return clients + } 1 x + + # wait for the script to be busy + after 200 + catch {r ping} e + assert_match {BUSY*} $e + + # run cause the script to abort, and run a command that could have processed + # unblocked clients (due to a bug) + $r3 hello 2 setname abortscript + + # make sure the script completed before the pop was processed + assert_equal [$rd2 read] {x z} + assert_match {*abortscript*} [$rd read] + + $rd close + $rd2 close + $r3 close + r DEBUG set-disable-deny-scripts 0 + } {OK} {external:skip needs:debug} + test {Timedout scripts that modified data can't be killed by SCRIPT KILL} { set rd [redis_deferring_client] r config set lua-time-limit 10