diff --git a/src/aof.cpp b/src/aof.cpp index 9fa9290bd..94229169b 100644 --- a/src/aof.cpp +++ b/src/aof.cpp @@ -285,7 +285,7 @@ int startAppendOnly(void) { strerror(errno)); return C_ERR; } - if (hasActiveChildProcess() && g_pserver->child_type != CHILD_TYPE_AOF) { + if (hasActiveChildProcessOrBGSave() && g_pserver->child_type != CHILD_TYPE_AOF) { g_pserver->aof_rewrite_scheduled = 1; serverLog(LL_WARNING,"AOF was enabled but there is already another background operation. An AOF background was scheduled to start when possible."); } else { @@ -438,7 +438,7 @@ void flushAppendOnlyFile(int force) { * useful for graphing / monitoring purposes. */ if (sync_in_progress) { latencyAddSampleIfNeeded("aof-write-pending-fsync",latency); - } else if (hasActiveChildProcess()) { + } else if (hasActiveChildProcessOrBGSave()) { latencyAddSampleIfNeeded("aof-write-active-child",latency); } else { latencyAddSampleIfNeeded("aof-write-alone",latency); @@ -535,7 +535,7 @@ void flushAppendOnlyFile(int force) { try_fsync: /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are * children doing I/O in the background. */ - if (g_pserver->aof_no_fsync_on_rewrite && hasActiveChildProcess()) + if (g_pserver->aof_no_fsync_on_rewrite && hasActiveChildProcessOrBGSave()) return; /* Perform the fsync if needed. */ @@ -1887,7 +1887,7 @@ void aofClosePipes(void) { int rewriteAppendOnlyFileBackground(void) { pid_t childpid; - if (hasActiveChildProcess()) return C_ERR; + if (hasActiveChildProcessOrBGSave()) return C_ERR; if (aofCreatePipes() != C_OK) return C_ERR; if ((childpid = redisFork(CHILD_TYPE_AOF)) == 0) { char tmpfile[256]; @@ -1930,7 +1930,7 @@ int rewriteAppendOnlyFileBackground(void) { void bgrewriteaofCommand(client *c) { if (g_pserver->child_type == CHILD_TYPE_AOF) { addReplyError(c,"Background append only file rewriting already in progress"); - } else if (hasActiveChildProcess()) { + } else if (hasActiveChildProcessOrBGSave()) { g_pserver->aof_rewrite_scheduled = 1; addReplyStatus(c,"Background append only file rewriting scheduled"); } else if (rewriteAppendOnlyFileBackground() == C_OK) { diff --git a/src/db.cpp b/src/db.cpp index 0446e224f..81bf96d44 100644 --- a/src/db.cpp +++ b/src/db.cpp @@ -93,7 +93,7 @@ static void lookupKeyUpdateObj(robj *val, int flags) /* Update the access time for the ageing algorithm. * Don't do it if we have a saving child, as this will trigger * a copy on write madness. */ - if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)) + if (!hasActiveChildProcessOrBGSave() && !(flags & LOOKUP_NOTOUCH)) { if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LFU) { updateLFU(val); diff --git a/src/rdb.cpp b/src/rdb.cpp index d91093b68..f3ab5898b 100644 --- a/src/rdb.cpp +++ b/src/rdb.cpp @@ -1679,17 +1679,15 @@ int rdbSaveBackground(rdbSaveInfo *rsi) { pthread_t child; long long start; - if (hasActiveChildProcess()) return C_ERR; + if (hasActiveChildProcessOrBGSave()) return C_ERR; g_pserver->dirty_before_bgsave = g_pserver->dirty; g_pserver->lastbgsave_try = time(NULL); openChildInfoPipe(); start = ustime(); + latencyStartMonitor(g_pserver->rdb_save_latency); - - g_pserver->stat_fork_time = ustime()-start; - g_pserver->stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / g_pserver->stat_fork_time / (1024*1024*1024); /* GB per second. */ if (launchRdbSaveThread(child, rsi) != C_OK) { closeChildInfoPipe(); g_pserver->lastbgsave_status = C_ERR; @@ -1697,6 +1695,9 @@ int rdbSaveBackground(rdbSaveInfo *rsi) { strerror(errno)); return C_ERR; } + + g_pserver->stat_fork_time = ustime()-start; + g_pserver->stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / g_pserver->stat_fork_time / (1024*1024*1024); /* GB per second. */ latencyAddSampleIfNeeded("fork",g_pserver->stat_fork_time/1000); serverLog(LL_NOTICE,"Background saving started"); g_pserver->rdb_save_time_start = time(NULL); @@ -3601,6 +3602,8 @@ static void backgroundSaveDoneHandlerDisk(int exitcode, bool fCancelled) { g_pserver->dirty = g_pserver->dirty - g_pserver->dirty_before_bgsave; g_pserver->lastsave = time(NULL); g_pserver->lastbgsave_status = C_OK; + latencyEndMonitor(g_pserver->rdb_save_latency); + latencyAddSampleIfNeeded("rdb-save",g_pserver->rdb_save_latency); } else if (!fCancelled && exitcode != 0) { serverLog(LL_WARNING, "Background saving error"); g_pserver->lastbgsave_status = C_ERR; @@ -3782,7 +3785,7 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) { int pipefds[2]; rdbSaveSocketThreadArgs *args = nullptr; - if (hasActiveChildProcess()) return C_ERR; + if (hasActiveChildProcessOrBGSave()) return C_ERR; /* Even if the previous fork child exited, don't start a new one until we * drained the pipe. */ diff --git a/src/replication.cpp b/src/replication.cpp index 7afe1d22b..6c70ba4be 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -1538,7 +1538,7 @@ void syncCommand(client *c) { } else { /* We don't have a BGSAVE in progress, let's start one. Diskless * or disk-based mode is determined by replica's capacity. */ - if (!hasActiveChildProcess()) { + if (!hasActiveChildProcessOrBGSave()) { startBgsaveForReplication(c->slave_capa); } else { serverLog(LL_NOTICE, @@ -4984,7 +4984,7 @@ void replicationStartPendingFork(void) { * In case of diskless replication, we make sure to wait the specified * number of seconds (according to configuration) so that other slaves * have the time to arrive before we start streaming. */ - if (!hasActiveChildProcess()) { + if (!hasActiveChildProcessOrBGSave()) { time_t idle, max_idle = 0; int slaves_waiting = 0; int mincapa = -1; diff --git a/src/server.cpp b/src/server.cpp index b498571db..8b62d88d5 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -1706,7 +1706,7 @@ int redisDbPersistentData::incrementallyRehash() { * for dict.c to resize the hash tables accordingly to the fact we have an * active fork child running. */ void updateDictResizePolicy(void) { - if (!hasActiveChildProcess() || (g_pserver->FRdbSaveInProgress() && !cserver.fForkBgSave)) + if (!hasActiveChildProcess()) dictEnableResize(); else dictDisableResize(); @@ -1725,7 +1725,11 @@ const char *strChildType(int type) { /* Return true if there are active children processes doing RDB saving, * AOF rewriting, or some side process spawned by a loaded module. */ int hasActiveChildProcess() { - return g_pserver->FRdbSaveInProgress() || g_pserver->child_pid != -1; + return g_pserver->child_pid != -1; +} + +int hasActiveChildProcessOrBGSave() { + return g_pserver->FRdbSaveInProgress() || hasActiveChildProcess(); } void resetChildState() { @@ -2074,7 +2078,7 @@ void databasesCron(bool fMainThread) { /* Perform hash tables rehashing if needed, but only if there are no * other processes saving the DB on disk. Otherwise rehashing is bad * as will cause a lot of copy-on-write of memory pages. */ - if (!hasActiveChildProcess() || g_pserver->FRdbSaveInProgress()) { + if (!hasActiveChildProcess()) { /* We use global counters so if we stop the computation at a given * DB we'll be able to start from the successive in the next * cron loop iteration. */ @@ -2468,14 +2472,14 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { /* Start a scheduled AOF rewrite if this was requested by the user while * a BGSAVE was in progress. */ - if (!hasActiveChildProcess() && + if (!hasActiveChildProcessOrBGSave() && g_pserver->aof_rewrite_scheduled) { rewriteAppendOnlyFileBackground(); } /* Check if a background saving or AOF rewrite in progress terminated. */ - if (hasActiveChildProcess() || ldbPendingChildren()) + if (hasActiveChildProcessOrBGSave() || ldbPendingChildren()) { run_with_period(1000) receiveChildInfo(); checkChildrenDone(); @@ -2517,7 +2521,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { /* Trigger an AOF rewrite if needed. */ if (g_pserver->aof_state == AOF_ON && - !hasActiveChildProcess() && + !hasActiveChildProcessOrBGSave() && g_pserver->aof_rewrite_perc && g_pserver->aof_current_size > g_pserver->aof_rewrite_min_size) { @@ -2601,7 +2605,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { * Note: this code must be after the replicationCron() call above so * make sure when refactoring this file to keep this order. This is useful * because we want to give priority to RDB savings for replication. */ - if (!hasActiveChildProcess() && + if (!hasActiveChildProcessOrBGSave() && g_pserver->rdb_bgsave_scheduled && (g_pserver->unixtime-g_pserver->lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY || g_pserver->lastbgsave_status == C_OK)) diff --git a/src/server.h b/src/server.h index eee584f2d..f052d0595 100644 --- a/src/server.h +++ b/src/server.h @@ -2470,7 +2470,8 @@ struct redisServer { time_t lastbgsave_try; /* Unix time of last attempted bgsave */ time_t rdb_save_time_last; /* Time used by last RDB save run. */ time_t rdb_save_time_start; /* Current RDB save start time. */ - pid_t rdb_child_pid = -1; /* Used only during fork bgsave */ + mstime_t rdb_save_latency; /* Used to track end to end latency of rdb save*/ + pid_t rdb_child_pid = -1; /* Used only during fork bgsave */ int rdb_bgsave_scheduled; /* BGSAVE when possible if true. */ int rdb_child_type; /* Type of save by active child. */ int lastbgsave_status; /* C_OK or C_ERR */ @@ -3254,6 +3255,7 @@ void receiveChildInfo(void); void executeWithoutGlobalLock(std::function func); int redisFork(int type); int hasActiveChildProcess(); +int hasActiveChildProcessOrBGSave(); void resetChildState(); int isMutuallyExclusiveChildType(int type);