From b002d2b4f1415f4db805081bc8f5b85d00f30e33 Mon Sep 17 00:00:00 2001 From: Wang Yuan Date: Thu, 17 Sep 2020 23:20:10 +0800 Subject: [PATCH] Remove tmp rdb file in background thread (#7762) We're already using bg_unlink in several places to delete the rdb file in the background, and avoid paying the cost of the deletion from our main thread. This commit uses bg_unlink to remove the temporary rdb file in the background too. However, in case we delete that rdb file just before exiting, we don't actually wait for the background thread or the main thread to delete it, and just let the OS clean up after us. i.e. we open the file, unlink it and exit with the fd still open. Furthermore, rdbRemoveTempFile can be called from a thread and was using snprintf which is not async-signal-safe, we now use ll2string instead. --- src/rdb.c | 29 +++++++++++++++++++----- src/rdb.h | 2 +- src/server.c | 6 ++++- src/server.h | 1 + tests/test_helper.tcl | 1 + tests/unit/shutdown.tcl | 49 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 tests/unit/shutdown.tcl diff --git a/src/rdb.c b/src/rdb.c index 37e09ef96..d0a3c3210 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -34,6 +34,7 @@ #include "stream.h" #include +#include #include #include #include @@ -1413,11 +1414,29 @@ int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) { return C_OK; /* unreached */ } -void rdbRemoveTempFile(pid_t childpid) { +/* Note that we may call this function in signal handle 'sigShutdownHandler', + * so we need guarantee all functions we call are async-signal-safe. + * If we call this function from signal handle, we won't call bg_unlik that + * is not async-signal-safe. */ +void rdbRemoveTempFile(pid_t childpid, int from_signal) { char tmpfile[256]; + char pid[32]; - snprintf(tmpfile,sizeof(tmpfile),"temp-%d.rdb", (int) childpid); - unlink(tmpfile); + /* Generate temp rdb file name using aync-signal safe functions. */ + int pid_len = ll2string(pid, sizeof(pid), childpid); + strcpy(tmpfile, "temp-"); + strncpy(tmpfile+5, pid, pid_len); + strcpy(tmpfile+5+pid_len, ".rdb"); + + if (from_signal) { + /* bg_unlink is not async-signal-safe, but in this case we don't really + * need to close the fd, it'll be released when the process exists. */ + int fd = open(tmpfile, O_RDONLY|O_NONBLOCK); + UNUSED(fd); + unlink(tmpfile); + } else { + bg_unlink(tmpfile); + } } /* This function is called by rdbLoadObject() when the code is in RDB-check @@ -2419,7 +2438,7 @@ void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) { serverLog(LL_WARNING, "Background saving terminated by signal %d", bysignal); latencyStartMonitor(latency); - rdbRemoveTempFile(server.rdb_child_pid); + rdbRemoveTempFile(server.rdb_child_pid, 0); latencyEndMonitor(latency); latencyAddSampleIfNeeded("rdb-unlink-temp-file",latency); /* SIGUSR1 is whitelisted, so we have a way to kill a child without @@ -2488,7 +2507,7 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) { * the cleanup needed. */ void killRDBChild(void) { kill(server.rdb_child_pid,SIGUSR1); - rdbRemoveTempFile(server.rdb_child_pid); + rdbRemoveTempFile(server.rdb_child_pid, 0); closeChildInfoPipe(); updateDictResizePolicy(); } diff --git a/src/rdb.h b/src/rdb.h index aae682dbc..885cf49c6 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -141,7 +141,7 @@ int rdbLoadObjectType(rio *rdb); int rdbLoad(char *filename, rdbSaveInfo *rsi, int rdbflags); int rdbSaveBackground(char *filename, rdbSaveInfo *rsi); int rdbSaveToSlavesSockets(rdbSaveInfo *rsi); -void rdbRemoveTempFile(pid_t childpid); +void rdbRemoveTempFile(pid_t childpid, int from_signal); int rdbSave(char *filename, rdbSaveInfo *rsi); ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key); size_t rdbSavedObjectLen(robj *o, robj *key); diff --git a/src/server.c b/src/server.c index 31e082510..1ce20334c 100644 --- a/src/server.c +++ b/src/server.c @@ -3878,6 +3878,10 @@ int prepareForShutdown(int flags) { overwrite the synchronous saving did by SHUTDOWN. */ if (server.rdb_child_pid != -1) { serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!"); + /* Note that, in killRDBChild, we call rdbRemoveTempFile that will + * do close fd(in order to unlink file actully) in background thread. + * The temp rdb file fd may won't be closed when redis exits quickly, + * but OS will close this fd when process exits. */ killRDBChild(); } @@ -4934,7 +4938,7 @@ static void sigShutdownHandler(int sig) { * on disk. */ if (server.shutdown_asap && sig == SIGINT) { serverLogFromHandler(LL_WARNING, "You insist... exiting now."); - rdbRemoveTempFile(getpid()); + rdbRemoveTempFile(getpid(), 1); exit(1); /* Exit with an error since this was not a clean shutdown. */ } else if (server.loading) { serverLogFromHandler(LL_WARNING, "Received shutdown signal during loading, exiting now."); diff --git a/src/server.h b/src/server.h index cac8544b9..a6859f554 100644 --- a/src/server.h +++ b/src/server.h @@ -1880,6 +1880,7 @@ int writeCommandsDeniedByDiskError(void); /* RDB persistence */ #include "rdb.h" void killRDBChild(void); +int bg_unlink(const char *filename); /* AOF persistence */ void flushAppendOnlyFile(int force); diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 7e1c5c88f..b60adb881 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -69,6 +69,7 @@ set ::all_tests { unit/tls unit/tracking unit/oom-score-adj + unit/shutdown } # Index to the next test to run in the ::all_tests list. set ::next_test 0 diff --git a/tests/unit/shutdown.tcl b/tests/unit/shutdown.tcl new file mode 100644 index 000000000..21ea8545d --- /dev/null +++ b/tests/unit/shutdown.tcl @@ -0,0 +1,49 @@ +start_server {tags {"shutdown"}} { + test {Temp rdb will be deleted if we use bg_unlink when shutdown} { + for {set i 0} {$i < 20} {incr i} { + r set $i $i + } + # It will cost 2s(20 * 100ms) to dump rdb + r config set rdb-key-save-delay 100000 + + # Child is dumping rdb + r bgsave + after 100 + set dir [lindex [r config get dir] 1] + set child_pid [get_child_pid 0] + set temp_rdb [file join [lindex [r config get dir] 1] temp-${child_pid}.rdb] + # Temp rdb must be existed + assert {[file exists $temp_rdb]} + + catch {r shutdown nosave} + # Make sure the server was killed + catch {set rd [redis_deferring_client]} e + assert_match {*connection refused*} $e + + # Temp rdb file must be deleted + assert {![file exists $temp_rdb]} + } +} + +start_server {tags {"shutdown"}} { + test {Temp rdb will be deleted in signal handle} { + for {set i 0} {$i < 20} {incr i} { + r set $i $i + } + # It will cost 2s(20 * 100ms) to dump rdb + r config set rdb-key-save-delay 100000 + + set pid [s process_id] + set temp_rdb [file join [lindex [r config get dir] 1] temp-${pid}.rdb] + + exec kill -SIGINT $pid + after 100 + # Temp rdb must be existed + assert {[file exists $temp_rdb]} + + # Temp rdb file must be deleted + exec kill -SIGINT $pid + after 100 + assert {![file exists $temp_rdb]} + } +}