Implements the soft shutdown feature to allow clients to cooperatively disconnect preventing disruption during shutdown

This commit is contained in:
John Sully 2022-05-24 03:04:57 +00:00
parent 15d5c3b0f9
commit 84b37edfcf
8 changed files with 193 additions and 4 deletions

View File

@ -2897,6 +2897,7 @@ standardConfig configs[] = {
createBoolConfig("multi-master-no-forward", NULL, MODIFIABLE_CONFIG, cserver.multimaster_no_forward, 0, validateMultiMasterNoForward, NULL), createBoolConfig("multi-master-no-forward", NULL, MODIFIABLE_CONFIG, cserver.multimaster_no_forward, 0, validateMultiMasterNoForward, NULL),
createBoolConfig("allow-write-during-load", NULL, MODIFIABLE_CONFIG, g_pserver->fWriteDuringActiveLoad, 0, NULL, NULL), createBoolConfig("allow-write-during-load", NULL, MODIFIABLE_CONFIG, g_pserver->fWriteDuringActiveLoad, 0, NULL, NULL),
createBoolConfig("force-backlog-disk-reserve", NULL, MODIFIABLE_CONFIG, cserver.force_backlog_disk, 0, NULL, NULL), createBoolConfig("force-backlog-disk-reserve", NULL, MODIFIABLE_CONFIG, cserver.force_backlog_disk, 0, NULL, NULL),
createBoolConfig("soft-shutdown", NULL, MODIFIABLE_CONFIG, g_pserver->config_soft_shutdown, 0, NULL, NULL),
#ifdef USE_OPENSSL #ifdef USE_OPENSSL
createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->tls_port, 0, INTEGER_CONFIG, NULL, updateTLSPort), /* TCP port. */ createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->tls_port, 0, INTEGER_CONFIG, NULL, updateTLSPort), /* TCP port. */

View File

@ -1462,6 +1462,11 @@ void shutdownCommand(client *c) {
flags |= SHUTDOWN_NOSAVE; flags |= SHUTDOWN_NOSAVE;
} else if (!strcasecmp(szFromObj(c->argv[1]),"save")) { } else if (!strcasecmp(szFromObj(c->argv[1]),"save")) {
flags |= SHUTDOWN_SAVE; flags |= SHUTDOWN_SAVE;
} else if (!strcasecmp(szFromObj(c->argv[1]), "soft")) {
g_pserver->soft_shutdown = true;
serverLog(LL_WARNING, "Soft Shutdown Initiated");
addReply(c, shared.ok);
return;
} else { } else {
addReplyErrorObject(c,shared.syntaxerr); addReplyErrorObject(c,shared.syntaxerr);
return; return;

View File

@ -1071,7 +1071,7 @@ struct commandHelp {
1, 1,
"2.2.0" }, "2.2.0" },
{ "SHUTDOWN", { "SHUTDOWN",
"[NOSAVE|SAVE]", "[NOSAVE|SAVE|SOFT]",
"Synchronously save the dataset to disk and then shut down the server", "Synchronously save the dataset to disk and then shut down the server",
9, 9,
"1.0.0" }, "1.0.0" },

View File

@ -1256,6 +1256,20 @@ static void acceptCommonHandler(connection *conn, int flags, char *ip, int iel)
return; return;
} }
/* Prevent new connections if we're in a soft shutdown situation */
if (g_pserver->soft_shutdown) {
const char *err = "-SHUTDOWN";
/* That's a best effort error message, don't check write errors.
* Note that for TLS connections, no handshake was done yet so nothing
* is written and the connection will just drop. */
if (connWrite(conn,err,strlen(err)) == -1) {
/* Nothing to do, Just to avoid the warning... */
}
g_pserver->stat_rejected_conn++;
connClose(conn);
return;
}
/* Limit the number of connections we take at the same time. /* Limit the number of connections we take at the same time.
* *
* Admission control will happen before a client is created and connAccept() * Admission control will happen before a client is created and connAccept()

View File

@ -779,7 +779,7 @@ struct redisCommand redisCommandTable[] = {
0,NULL,0,0,0,0,0,0}, 0,NULL,0,0,0,0,0,0},
{"shutdown",shutdownCommand,-1, {"shutdown",shutdownCommand,-1,
"admin no-script ok-loading ok-stale", "admin no-script ok-loading ok-stale noprop",
0,NULL,0,0,0,0,0,0}, 0,NULL,0,0,0,0,0,0},
{"lastsave",lastsaveCommand,1, {"lastsave",lastsaveCommand,1,
@ -2648,6 +2648,24 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
} }
} }
if (g_pserver->soft_shutdown) {
/* Loop through our clients list and see if there are any active clients */
listIter li;
listNode *ln;
listRewind(g_pserver->clients, &li);
bool fActiveClient = false;
while ((ln = listNext(&li)) && !fActiveClient) {
client *c = (client*)listNodeValue(ln);
if (c->flags & (CLIENT_MASTER | CLIENT_SLAVE | CLIENT_BLOCKED | CLIENT_MONITOR))
continue;
fActiveClient = true;
}
if (!fActiveClient) {
serverLog(LL_WARNING, "All active clients have disconnected while a soft shutdown is pending. Shutting down now.");
throw ShutdownException();
}
}
g_pserver->cronloops++; g_pserver->cronloops++;
return 1000/g_pserver->hz; return 1000/g_pserver->hz;
} }
@ -5297,6 +5315,11 @@ void pingCommand(client *c) {
return; return;
} }
if (g_pserver->soft_shutdown) {
addReplyError(c, "-SHUTDOWN PENDING");
return;
}
if (c->flags & CLIENT_PUBSUB && c->resp == 2) { if (c->flags & CLIENT_PUBSUB && c->resp == 2) {
addReply(c,shared.mbulkhdr[2]); addReply(c,shared.mbulkhdr[2]);
addReplyBulkCBuffer(c,"pong",4); addReplyBulkCBuffer(c,"pong",4);
@ -6691,7 +6714,7 @@ static void sigShutdownHandler(int sig) {
* If we receive the signal the second time, we interpret this as * If we receive the signal the second time, we interpret this as
* the user really wanting to quit ASAP without waiting to persist * the user really wanting to quit ASAP without waiting to persist
* on disk. */ * on disk. */
if (g_pserver->shutdown_asap && sig == SIGINT) { if ((g_pserver->shutdown_asap || g_pserver->soft_shutdown) && sig == SIGINT) {
serverLogFromHandler(LL_WARNING, "You insist... exiting now."); serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
rdbRemoveTempFile(g_pserver->rdbThreadVars.tmpfileNum, 1); rdbRemoveTempFile(g_pserver->rdbThreadVars.tmpfileNum, 1);
g_pserver->garbageCollector.shutdown(); g_pserver->garbageCollector.shutdown();
@ -6702,7 +6725,10 @@ static void sigShutdownHandler(int sig) {
} }
serverLogFromHandler(LL_WARNING, msg); serverLogFromHandler(LL_WARNING, msg);
g_pserver->shutdown_asap = 1; if (g_pserver->config_soft_shutdown)
g_pserver->soft_shutdown = true;
else
g_pserver->shutdown_asap = 1;
} }
void setupSignalHandlers(void) { void setupSignalHandlers(void) {

View File

@ -2721,6 +2721,9 @@ struct redisServer {
long long repl_batch_offStart = -1; long long repl_batch_offStart = -1;
long long repl_batch_idxStart = -1; long long repl_batch_idxStart = -1;
int config_soft_shutdown = false;
bool soft_shutdown = false;
/* Lock Contention Ring Buffer */ /* Lock Contention Ring Buffer */
static const size_t s_lockContentionSamples = 64; static const size_t s_lockContentionSamples = 64;
uint16_t rglockSamples[s_lockContentionSamples]; uint16_t rglockSamples[s_lockContentionSamples];

View File

@ -89,6 +89,7 @@ set ::all_tests {
integration/logging integration/logging
integration/corrupt-dump integration/corrupt-dump
integration/corrupt-dump-fuzzer integration/corrupt-dump-fuzzer
unit/soft_shutdown
} }
# Index to the next test to run in the ::all_tests list. # Index to the next test to run in the ::all_tests list.
set ::next_test 0 set ::next_test 0

View File

@ -0,0 +1,139 @@
start_server {tags {"soft_shutdown"} } {
test {soft shutdown command replies} {
assert_equal [r shutdown soft] "OK"
}
test {soft shutdown errors on ping} {
catch {[r ping]} e
assert_match {SHUTDOWN PENDING} $e
}
}
start_server {tags {"soft_shutdown"} } {
test {soft shutdown prevents new connections} {
assert_equal [r shutdown soft] "OK"
# reconnect
set catch_res [catch {set rd [redis_deferring_client]} e]
if {$::tls} {
assert_equal $catch_res 1
} else {
assert_match {*SHUTDOWN*} $e
}
}
}
start_server {tags {"soft_shutdown"} } {
test {soft shutdown prevents allows commands to execute while waiting} {
assert_equal [r shutdown soft] "OK"
r set test val
assert_equal [r get test] {val}
}
}
start_server {tags {"soft_shutdown"} } {
test {soft shutdown shuts down after all clients exit} {
assert_equal [r shutdown soft] "OK"
r close
after 500
catch {set rd [redis_deferring_client]} e
assert_match {*refused*} $e
}
}
start_server {tags {"soft_shutdown"} overrides {soft-shutdown yes} } {
test {soft shutdown triggered by SIGINT} {
exec kill -SIGINT [s process_id]
catch {[r ping]} e
assert_match {SHUTDOWN PENDING} $e
}
test {second SIGINT forces a shutdown during a soft shutdown} {
exec kill -SIGINT [s process_id]
catch {[r ping]} e
assert_match {*I/O*} $e
}
}
start_server {tags {"soft_shutdown"} } {
test {monitor does not prevent soft shutdown} {
set monitor [redis_deferring_client]
$monitor monitor
assert_equal [r shutdown soft] "OK"
r close
after 500
catch {set rd [redis_deferring_client]} e
assert_match {*refused*} $e
}
}
start_server {tags {"soft_shutdown"} } {
start_server {} {
set node_0 [srv 0 client]
set node_0_host [srv 0 host]
set node_0_port [srv 0 port]
set node_0_pid [srv 0 pid]
set node_1 [srv -1 client]
set node_1_host [srv -1 host]
set node_1_port [srv -1 port]
set node_1_pid [srv -1 pid]
$node_0 replicaof $node_1_host $node_1_port
wait_for_sync $node_0
test {soft shutdown works for with master} {
$node_1 shutdown soft
} {OK}
test {soft shutdown on master doesn't affect replica} {
assert_equal [$node_0 ping] {PONG}
}
test {soft shutdown on master updates ping response} {
catch {$node_1 ping} e
assert_equal $e {SHUTDOWN PENDING}
}
test {master prevents new connections with soft shutdown} {
set c1 [redis $node_1_host $node_1_port 1 $::tls]
set catch_res [catch {$c1 read} e]
if {$::tls} {
assert_equal $catch_res 1
} else {
assert_match {*SHUTDOWN*} $e
}
}
test {master soft shutdown works after all clients disconnect} {
$node_1 close
after 500
catch {set c1 [redis $node_1_host $node_1_port 1 $::tls]} e
assert_match {*refused*} $e
}
}
}
start_server {tags {"soft_shutdown"} } {
start_server {} {
set node_0 [srv 0 client]
set node_0_host [srv 0 host]
set node_0_port [srv 0 port]
set node_0_pid [srv 0 pid]
set node_1 [srv -1 client]
set node_1_host [srv -1 host]
set node_1_port [srv -1 port]
set node_1_pid [srv -1 pid]
$node_0 replicaof $node_1_host $node_1_port
wait_for_sync $node_0
test {soft shutdown on replica is not blocked by master} {
assert_equal [$node_0 shutdown soft] {OK}
$node_0 close
after 500
catch {set c0 [redis $node_0_host $node_0_port 1 $::tls]} e
assert_match {*refused*} $e
}
}
}