From 247df4e83b87e85e8aaa1048b73d2561428c5750 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 14 Jan 2013 10:29:14 +0100 Subject: [PATCH] Whitelist SIGUSR1 to avoid auto-triggering errors. This commit fixes issue #875 that was caused by the following events: 1) There is an active child doing BGSAVE. 2) flushall is called (or any other condition that makes Redis killing the saving child process). 3) An error is sensed by Redis as the child exited with an error (killed by a singal), that stops accepting write commands until a BGSAVE happens to be executed with success. Whitelisting SIGUSR1 and making sure Redis always uses this signal in order to kill its own children fixes the issue. --- src/aof.c | 2 +- src/db.c | 2 +- src/rdb.c | 5 ++++- src/redis.c | 4 ++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/aof.c b/src/aof.c index 7e1512ebd..8dc1dd188 100644 --- a/src/aof.c +++ b/src/aof.c @@ -177,7 +177,7 @@ void stopAppendOnly(void) { redisLog(REDIS_NOTICE,"Killing running AOF rewrite child: %ld", (long) server.aof_child_pid); - if (kill(server.aof_child_pid,SIGKILL) != -1) + if (kill(server.aof_child_pid,SIGUSR1) != -1) wait3(&statloc,0,NULL); /* reset the buffer accumulating changes while the child saves */ aofRewriteBufferReset(); diff --git a/src/db.c b/src/db.c index eda05c85f..d1b22af69 100644 --- a/src/db.c +++ b/src/db.c @@ -221,7 +221,7 @@ void flushallCommand(redisClient *c) { server.dirty += emptyDb(); addReply(c,shared.ok); if (server.rdb_child_pid != -1) { - kill(server.rdb_child_pid,SIGKILL); + kill(server.rdb_child_pid,SIGUSR1); rdbRemoveTempFile(server.rdb_child_pid); } if (server.saveparamslen > 0) { diff --git a/src/rdb.c b/src/rdb.c index 97492f4ec..13abecced 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1194,7 +1194,10 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) { redisLog(REDIS_WARNING, "Background saving terminated by signal %d", bysignal); rdbRemoveTempFile(server.rdb_child_pid); - server.lastbgsave_status = REDIS_ERR; + /* SIGUSR1 is whitelisted, so we have a way to kill a child without + * tirggering an error conditon. */ + if (bysignal != SIGUSR1) + server.lastbgsave_status = REDIS_ERR; } server.rdb_child_pid = -1; server.rdb_save_time_last = time(NULL)-server.rdb_save_time_start; diff --git a/src/redis.c b/src/redis.c index a2e0a1b13..20186cae2 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1755,7 +1755,7 @@ int prepareForShutdown(int flags) { overwrite the synchronous saving did by SHUTDOWN. */ if (server.rdb_child_pid != -1) { redisLog(REDIS_WARNING,"There is a child saving an .rdb. Killing it!"); - kill(server.rdb_child_pid,SIGKILL); + kill(server.rdb_child_pid,SIGUSR1); rdbRemoveTempFile(server.rdb_child_pid); } if (server.aof_state != REDIS_AOF_OFF) { @@ -1764,7 +1764,7 @@ int prepareForShutdown(int flags) { if (server.aof_child_pid != -1) { redisLog(REDIS_WARNING, "There is a child rewriting the AOF. Killing it!"); - kill(server.aof_child_pid,SIGKILL); + kill(server.aof_child_pid,SIGUSR1); } /* Append only file: fsync() the AOF and exit */ redisLog(REDIS_NOTICE,"Calling fsync() on the AOF file.");