From 49b36633324074edd3e06c334d23562edc49159d Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" <276441700@qq.com> Date: Fri, 29 Jan 2021 14:35:10 +0800 Subject: [PATCH] AOF: recover from last write error after turn on appendonly again (#8030) The key point is how to recover from last AOF write error, for example: 1. start redis with appendonly yes, and append some write commands 2. short write or something else error happen, `server.aof_last_write_status` changed to `C_ERR`, now redis doesn't accept write commands 3. execute `CONFIG SET appendonly no` to avoid the above problem, now redis can accept write commands again 4. disk error resolved, and execute `CONFIG SET appendonly yes` to reopen AOF, but `server.aof_last_write_status` cannot be changed to `C_OK` (if background aof rewrite run less then 1 second, it will free `server.aof_buf` and then serverCron cannot fix `aof_last_write_status`), then redis cannot accept write commands forever. This PR use a simple way to fix it: 1. just free `server.aof_buf` when stop appendonly to save memory, if error happens in `flushAppendOnlyFile(1)`, the `server.aof_buf` may contains some data which has not be written to aof, I think we can ignore it because we turn off the appendonly. 2. reset fsync status after stop appendonly and call `flushAppendOnlyFile` only when `aof_state` is ON 3. reset `server.last_write_status` when reopen aof to accept write commands --- src/aof.c | 19 +++++++++++++++++-- src/server.c | 8 +++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/aof.c b/src/aof.c index f07f7ca2e..6753e8bcc 100644 --- a/src/aof.c +++ b/src/aof.c @@ -235,6 +235,8 @@ void stopAppendOnly(void) { serverAssert(server.aof_state != AOF_OFF); flushAppendOnlyFile(1); redis_fsync(server.aof_fd); + server.aof_fsync_offset = server.aof_current_size; + server.aof_last_fsync = server.unixtime; close(server.aof_fd); server.aof_fd = -1; @@ -242,6 +244,8 @@ void stopAppendOnly(void) { server.aof_state = AOF_OFF; server.aof_rewrite_scheduled = 0; killAppendOnlyChild(); + sdsfree(server.aof_buf); + server.aof_buf = sdsempty(); } /* Called when the user switches from "appendonly no" to "appendonly yes" @@ -285,6 +289,12 @@ int startAppendOnly(void) { server.aof_state = AOF_WAIT_REWRITE; server.aof_last_fsync = server.unixtime; server.aof_fd = newfd; + + /* If AOF was in error state, we just ignore it and log the event. */ + if (server.aof_last_write_status == C_ERR) { + serverLog(LL_WARNING,"AOF reopen, just ignore the last error."); + server.aof_last_write_status = C_OK; + } return C_OK; } @@ -1855,18 +1865,22 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { close(newfd); goto cleanup; } + latencyEndMonitor(latency); + latencyAddSampleIfNeeded("aof-rewrite-diff-write",latency); + if (server.aof_fsync == AOF_FSYNC_EVERYSEC) { aof_background_fsync(newfd); } else if (server.aof_fsync == AOF_FSYNC_ALWAYS) { + latencyStartMonitor(latency); if (redis_fsync(newfd) == -1) { serverLog(LL_WARNING, "Error trying to fsync the parent diff to the rewritten AOF: %s", strerror(errno)); close(newfd); goto cleanup; } + latencyEndMonitor(latency); + latencyAddSampleIfNeeded("aof-rewrite-done-fsync",latency); } - latencyEndMonitor(latency); - latencyAddSampleIfNeeded("aof-rewrite-diff-write",latency); serverLog(LL_NOTICE, "Residual parent diff successfully flushed to the rewritten AOF (%.2f MB)", (double) aofRewriteBufferSize() / (1024*1024)); @@ -1938,6 +1952,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { aofUpdateCurrentSize(); server.aof_rewrite_base_size = server.aof_current_size; server.aof_fsync_offset = server.aof_current_size; + server.aof_last_fsync = server.unixtime; /* Clear regular AOF buffer since its contents was just written to * the new AOF from the background rewrite buffer. */ diff --git a/src/server.c b/src/server.c index 1166c4b3e..0a12e2e31 100644 --- a/src/server.c +++ b/src/server.c @@ -2174,14 +2174,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { /* AOF postponed flush: Try at every cron cycle if the slow fsync * completed. */ - if (server.aof_flush_postponed_start) flushAppendOnlyFile(0); + if (server.aof_state == AOF_ON && server.aof_flush_postponed_start) + flushAppendOnlyFile(0); /* AOF write errors: in this case we have a buffer to flush as well and * clear the AOF error in case of success to make the DB writable again, * however to try every second is enough in case of 'hz' is set to * a higher frequency. */ run_with_period(1000) { - if (server.aof_last_write_status == C_ERR) + if (server.aof_state == AOF_ON && server.aof_last_write_status == C_ERR) flushAppendOnlyFile(0); } @@ -2418,7 +2419,8 @@ void beforeSleep(struct aeEventLoop *eventLoop) { trackingBroadcastInvalidationMessages(); /* Write the AOF buffer on disk */ - flushAppendOnlyFile(0); + if (server.aof_state == AOF_ON) + flushAppendOnlyFile(0); /* Handle writes with pending output buffers. */ handleClientsWithPendingWritesUsingThreads();