Merge pull request #72 from Snapchat/soft_shutdown

Implements the soft shutdown feature
This commit is contained in:
John Sully 2022-05-25 15:13:54 -04:00 committed by GitHub Enterprise
commit 9024d1320e
8 changed files with 196 additions and 4 deletions

View File

@ -2897,6 +2897,7 @@ standardConfig configs[] = {
createBoolConfig("multi-master-no-forward", NULL, MODIFIABLE_CONFIG, cserver.multimaster_no_forward, 0, validateMultiMasterNoForward, NULL),
createBoolConfig("allow-write-during-load", NULL, MODIFIABLE_CONFIG, g_pserver->fWriteDuringActiveLoad, 0, NULL, NULL),
createBoolConfig("force-backlog-disk-reserve", NULL, MODIFIABLE_CONFIG, cserver.force_backlog_disk, 0, NULL, NULL),
createBoolConfig("soft-shutdown", NULL, MODIFIABLE_CONFIG, g_pserver->config_soft_shutdown, 0, NULL, NULL),
#ifdef USE_OPENSSL
createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->tls_port, 0, INTEGER_CONFIG, NULL, updateTLSPort), /* TCP port. */

View File

@ -1462,6 +1462,11 @@ void shutdownCommand(client *c) {
flags |= SHUTDOWN_NOSAVE;
} else if (!strcasecmp(szFromObj(c->argv[1]),"save")) {
flags |= SHUTDOWN_SAVE;
} else if (!strcasecmp(szFromObj(c->argv[1]), "soft")) {
g_pserver->soft_shutdown = true;
serverLog(LL_WARNING, "Soft Shutdown Initiated");
addReply(c, shared.ok);
return;
} else {
addReplyErrorObject(c,shared.syntaxerr);
return;

View File

@ -1071,7 +1071,7 @@ struct commandHelp {
1,
"2.2.0" },
{ "SHUTDOWN",
"[NOSAVE|SAVE]",
"[NOSAVE|SAVE|SOFT]",
"Synchronously save the dataset to disk and then shut down the server",
9,
"1.0.0" },

View File

@ -1256,6 +1256,20 @@ static void acceptCommonHandler(connection *conn, int flags, char *ip, int iel)
return;
}
/* Prevent new connections if we're in a soft shutdown situation */
if (g_pserver->soft_shutdown) {
const char *err = "-SHUTDOWN";
/* That's a best effort error message, don't check write errors.
* Note that for TLS connections, no handshake was done yet so nothing
* is written and the connection will just drop. */
if (connWrite(conn,err,strlen(err)) == -1) {
/* Nothing to do, Just to avoid the warning... */
}
g_pserver->stat_rejected_conn++;
connClose(conn);
return;
}
/* Limit the number of connections we take at the same time.
*
* Admission control will happen before a client is created and connAccept()

View File

@ -779,7 +779,7 @@ struct redisCommand redisCommandTable[] = {
0,NULL,0,0,0,0,0,0},
{"shutdown",shutdownCommand,-1,
"admin no-script ok-loading ok-stale",
"admin no-script ok-loading ok-stale noprop",
0,NULL,0,0,0,0,0,0},
{"lastsave",lastsaveCommand,1,
@ -2648,6 +2648,26 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
}
}
if (g_pserver->soft_shutdown) {
/* Loop through our clients list and see if there are any active clients */
listIter li;
listNode *ln;
listRewind(g_pserver->clients, &li);
bool fActiveClient = false;
while ((ln = listNext(&li)) && !fActiveClient) {
client *c = (client*)listNodeValue(ln);
if (c->flags & CLIENT_IGNORE_SOFT_SHUTDOWN)
continue;
fActiveClient = true;
}
if (!fActiveClient) {
if (prepareForShutdown(SHUTDOWN_NOFLAGS) == C_OK) {
serverLog(LL_WARNING, "All active clients have disconnected while a soft shutdown is pending. Shutting down now.");
throw ShutdownException();
}
}
}
g_pserver->cronloops++;
return 1000/g_pserver->hz;
}
@ -5297,6 +5317,11 @@ void pingCommand(client *c) {
return;
}
if (g_pserver->soft_shutdown && !(c->flags & CLIENT_IGNORE_SOFT_SHUTDOWN)) {
addReplyError(c, "-SHUTDOWN PENDING");
return;
}
if (c->flags & CLIENT_PUBSUB && c->resp == 2) {
addReply(c,shared.mbulkhdr[2]);
addReplyBulkCBuffer(c,"pong",4);
@ -6691,7 +6716,7 @@ static void sigShutdownHandler(int sig) {
* If we receive the signal the second time, we interpret this as
* the user really wanting to quit ASAP without waiting to persist
* on disk. */
if (g_pserver->shutdown_asap && sig == SIGINT) {
if ((g_pserver->shutdown_asap || g_pserver->soft_shutdown) && sig == SIGINT) {
serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
rdbRemoveTempFile(g_pserver->rdbThreadVars.tmpfileNum, 1);
g_pserver->garbageCollector.shutdown();
@ -6702,7 +6727,10 @@ static void sigShutdownHandler(int sig) {
}
serverLogFromHandler(LL_WARNING, msg);
g_pserver->shutdown_asap = 1;
if (g_pserver->config_soft_shutdown)
g_pserver->soft_shutdown = true;
else
g_pserver->shutdown_asap = 1;
}
void setupSignalHandlers(void) {

View File

@ -508,6 +508,7 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
#define CLIENT_PREVENT_AOF_PROP (1<<19) /* Don't propagate to AOF. */
#define CLIENT_PREVENT_REPL_PROP (1<<20) /* Don't propagate to slaves. */
#define CLIENT_PREVENT_PROP (CLIENT_PREVENT_AOF_PROP|CLIENT_PREVENT_REPL_PROP)
#define CLIENT_IGNORE_SOFT_SHUTDOWN (CLIENT_MASTER | CLIENT_SLAVE | CLIENT_BLOCKED | CLIENT_MONITOR)
#define CLIENT_PENDING_WRITE (1<<21) /* Client has output to send but a write
handler is yet not installed. */
#define CLIENT_REPLY_OFF (1<<22) /* Don't send replies to client. */
@ -2721,6 +2722,9 @@ struct redisServer {
long long repl_batch_offStart = -1;
long long repl_batch_idxStart = -1;
int config_soft_shutdown = false;
bool soft_shutdown = false;
/* Lock Contention Ring Buffer */
static const size_t s_lockContentionSamples = 64;
uint16_t rglockSamples[s_lockContentionSamples];

View File

@ -89,6 +89,7 @@ set ::all_tests {
integration/logging
integration/corrupt-dump
integration/corrupt-dump-fuzzer
unit/soft_shutdown
}
# Index to the next test to run in the ::all_tests list.
set ::next_test 0

View File

@ -0,0 +1,139 @@
start_server {tags {"soft_shutdown"} } {
test {soft shutdown command replies} {
assert_equal [r shutdown soft] "OK"
}
test {soft shutdown errors on ping} {
catch {[r ping]} e
assert_match {SHUTDOWN PENDING} $e
}
}
start_server {tags {"soft_shutdown"} } {
test {soft shutdown prevents new connections} {
assert_equal [r shutdown soft] "OK"
# reconnect
set catch_res [catch {set rd [redis_deferring_client]} e]
if {$::tls} {
assert_equal $catch_res 1
} else {
assert_match {*SHUTDOWN*} $e
}
}
}
start_server {tags {"soft_shutdown"} } {
test {soft shutdown allows commands to execute while waiting} {
assert_equal [r shutdown soft] "OK"
r set test val
assert_equal [r get test] {val}
}
}
start_server {tags {"soft_shutdown"} } {
test {soft shutdown shuts down after all clients exit} {
assert_equal [r shutdown soft] "OK"
r close
after 500
catch {set rd [redis_deferring_client]} e
assert_match {*refused*} $e
}
}
start_server {tags {"soft_shutdown"} overrides {soft-shutdown yes} } {
test {soft shutdown triggered by SIGINT} {
exec kill -SIGINT [s process_id]
catch {[r ping]} e
assert_match {SHUTDOWN PENDING} $e
}
test {second SIGINT forces a shutdown during a soft shutdown} {
exec kill -SIGINT [s process_id]
catch {[r ping]} e
assert_match {*I/O*} $e
}
}
start_server {tags {"soft_shutdown"} } {
test {monitor does not prevent soft shutdown} {
set monitor [redis_deferring_client]
$monitor monitor
assert_equal [r shutdown soft] "OK"
r close
after 500
catch {set rd [redis_deferring_client]} e
assert_match {*refused*} $e
}
}
start_server {tags {"soft_shutdown"} } {
start_server {} {
set node_0 [srv 0 client]
set node_0_host [srv 0 host]
set node_0_port [srv 0 port]
set node_0_pid [srv 0 pid]
set node_1 [srv -1 client]
set node_1_host [srv -1 host]
set node_1_port [srv -1 port]
set node_1_pid [srv -1 pid]
$node_0 replicaof $node_1_host $node_1_port
wait_for_sync $node_0
test {soft shutdown works for with master} {
$node_1 shutdown soft
} {OK}
test {soft shutdown on master doesn't affect replica} {
assert_equal [$node_0 ping] {PONG}
}
test {soft shutdown on master updates ping response} {
catch {$node_1 ping} e
assert_equal $e {SHUTDOWN PENDING}
}
test {master prevents new connections with soft shutdown} {
set c1 [redis $node_1_host $node_1_port 1 $::tls]
set catch_res [catch {$c1 read} e]
if {$::tls} {
assert_equal $catch_res 1
} else {
assert_match {*SHUTDOWN*} $e
}
}
test {master soft shutdown works after all clients disconnect} {
$node_1 close
after 500
catch {set c1 [redis $node_1_host $node_1_port 1 $::tls]} e
assert_match {*refused*} $e
}
}
}
start_server {tags {"soft_shutdown"} } {
start_server {} {
set node_0 [srv 0 client]
set node_0_host [srv 0 host]
set node_0_port [srv 0 port]
set node_0_pid [srv 0 pid]
set node_1 [srv -1 client]
set node_1_host [srv -1 host]
set node_1_port [srv -1 port]
set node_1_pid [srv -1 pid]
$node_0 replicaof $node_1_host $node_1_port
wait_for_sync $node_0
test {soft shutdown on replica is not blocked by master} {
assert_equal [$node_0 shutdown soft] {OK}
$node_0 close
after 500
catch {set c0 [redis $node_0_host $node_0_port 1 $::tls]} e
assert_match {*refused*} $e
}
}
}