change hasActiveChildProcess to return true only when there is an actual child process (#558)

change hasActiveChildProcess to return true only when there is an actual child process, add hasActiveChildProcessOrBGSave to catch case of forkless bgsave
This commit is contained in:
Malavan Sotheeswaran 2023-02-03 13:36:06 -05:00 committed by GitHub
parent 687850a612
commit 5123e2b3a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 30 additions and 21 deletions

View File

@ -285,7 +285,7 @@ int startAppendOnly(void) {
strerror(errno));
return C_ERR;
}
if (hasActiveChildProcess() && g_pserver->child_type != CHILD_TYPE_AOF) {
if (hasActiveChildProcessOrBGSave() && g_pserver->child_type != CHILD_TYPE_AOF) {
g_pserver->aof_rewrite_scheduled = 1;
serverLog(LL_WARNING,"AOF was enabled but there is already another background operation. An AOF background was scheduled to start when possible.");
} else {
@ -438,7 +438,7 @@ void flushAppendOnlyFile(int force) {
* useful for graphing / monitoring purposes. */
if (sync_in_progress) {
latencyAddSampleIfNeeded("aof-write-pending-fsync",latency);
} else if (hasActiveChildProcess()) {
} else if (hasActiveChildProcessOrBGSave()) {
latencyAddSampleIfNeeded("aof-write-active-child",latency);
} else {
latencyAddSampleIfNeeded("aof-write-alone",latency);
@ -535,7 +535,7 @@ void flushAppendOnlyFile(int force) {
try_fsync:
/* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are
* children doing I/O in the background. */
if (g_pserver->aof_no_fsync_on_rewrite && hasActiveChildProcess())
if (g_pserver->aof_no_fsync_on_rewrite && hasActiveChildProcessOrBGSave())
return;
/* Perform the fsync if needed. */
@ -1887,7 +1887,7 @@ void aofClosePipes(void) {
int rewriteAppendOnlyFileBackground(void) {
pid_t childpid;
if (hasActiveChildProcess()) return C_ERR;
if (hasActiveChildProcessOrBGSave()) return C_ERR;
if (aofCreatePipes() != C_OK) return C_ERR;
if ((childpid = redisFork(CHILD_TYPE_AOF)) == 0) {
char tmpfile[256];
@ -1930,7 +1930,7 @@ int rewriteAppendOnlyFileBackground(void) {
void bgrewriteaofCommand(client *c) {
if (g_pserver->child_type == CHILD_TYPE_AOF) {
addReplyError(c,"Background append only file rewriting already in progress");
} else if (hasActiveChildProcess()) {
} else if (hasActiveChildProcessOrBGSave()) {
g_pserver->aof_rewrite_scheduled = 1;
addReplyStatus(c,"Background append only file rewriting scheduled");
} else if (rewriteAppendOnlyFileBackground() == C_OK) {

View File

@ -93,7 +93,7 @@ static void lookupKeyUpdateObj(robj *val, int flags)
/* Update the access time for the ageing algorithm.
* Don't do it if we have a saving child, as this will trigger
* a copy on write madness. */
if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH))
if (!hasActiveChildProcessOrBGSave() && !(flags & LOOKUP_NOTOUCH))
{
if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LFU) {
updateLFU(val);

View File

@ -1679,17 +1679,15 @@ int rdbSaveBackground(rdbSaveInfo *rsi) {
pthread_t child;
long long start;
if (hasActiveChildProcess()) return C_ERR;
if (hasActiveChildProcessOrBGSave()) return C_ERR;
g_pserver->dirty_before_bgsave = g_pserver->dirty;
g_pserver->lastbgsave_try = time(NULL);
openChildInfoPipe();
start = ustime();
latencyStartMonitor(g_pserver->rdb_save_latency);
g_pserver->stat_fork_time = ustime()-start;
g_pserver->stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / g_pserver->stat_fork_time / (1024*1024*1024); /* GB per second. */
if (launchRdbSaveThread(child, rsi) != C_OK) {
closeChildInfoPipe();
g_pserver->lastbgsave_status = C_ERR;
@ -1697,6 +1695,9 @@ int rdbSaveBackground(rdbSaveInfo *rsi) {
strerror(errno));
return C_ERR;
}
g_pserver->stat_fork_time = ustime()-start;
g_pserver->stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / g_pserver->stat_fork_time / (1024*1024*1024); /* GB per second. */
latencyAddSampleIfNeeded("fork",g_pserver->stat_fork_time/1000);
serverLog(LL_NOTICE,"Background saving started");
g_pserver->rdb_save_time_start = time(NULL);
@ -3601,6 +3602,8 @@ static void backgroundSaveDoneHandlerDisk(int exitcode, bool fCancelled) {
g_pserver->dirty = g_pserver->dirty - g_pserver->dirty_before_bgsave;
g_pserver->lastsave = time(NULL);
g_pserver->lastbgsave_status = C_OK;
latencyEndMonitor(g_pserver->rdb_save_latency);
latencyAddSampleIfNeeded("rdb-save",g_pserver->rdb_save_latency);
} else if (!fCancelled && exitcode != 0) {
serverLog(LL_WARNING, "Background saving error");
g_pserver->lastbgsave_status = C_ERR;
@ -3782,7 +3785,7 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
int pipefds[2];
rdbSaveSocketThreadArgs *args = nullptr;
if (hasActiveChildProcess()) return C_ERR;
if (hasActiveChildProcessOrBGSave()) return C_ERR;
/* Even if the previous fork child exited, don't start a new one until we
* drained the pipe. */

View File

@ -1538,7 +1538,7 @@ void syncCommand(client *c) {
} else {
/* We don't have a BGSAVE in progress, let's start one. Diskless
* or disk-based mode is determined by replica's capacity. */
if (!hasActiveChildProcess()) {
if (!hasActiveChildProcessOrBGSave()) {
startBgsaveForReplication(c->slave_capa);
} else {
serverLog(LL_NOTICE,
@ -4984,7 +4984,7 @@ void replicationStartPendingFork(void) {
* In case of diskless replication, we make sure to wait the specified
* number of seconds (according to configuration) so that other slaves
* have the time to arrive before we start streaming. */
if (!hasActiveChildProcess()) {
if (!hasActiveChildProcessOrBGSave()) {
time_t idle, max_idle = 0;
int slaves_waiting = 0;
int mincapa = -1;

View File

@ -1706,7 +1706,7 @@ int redisDbPersistentData::incrementallyRehash() {
* for dict.c to resize the hash tables accordingly to the fact we have an
* active fork child running. */
void updateDictResizePolicy(void) {
if (!hasActiveChildProcess() || (g_pserver->FRdbSaveInProgress() && !cserver.fForkBgSave))
if (!hasActiveChildProcess())
dictEnableResize();
else
dictDisableResize();
@ -1725,7 +1725,11 @@ const char *strChildType(int type) {
/* Return true if there are active children processes doing RDB saving,
* AOF rewriting, or some side process spawned by a loaded module. */
int hasActiveChildProcess() {
return g_pserver->FRdbSaveInProgress() || g_pserver->child_pid != -1;
return g_pserver->child_pid != -1;
}
int hasActiveChildProcessOrBGSave() {
return g_pserver->FRdbSaveInProgress() || hasActiveChildProcess();
}
void resetChildState() {
@ -2074,7 +2078,7 @@ void databasesCron(bool fMainThread) {
/* Perform hash tables rehashing if needed, but only if there are no
* other processes saving the DB on disk. Otherwise rehashing is bad
* as will cause a lot of copy-on-write of memory pages. */
if (!hasActiveChildProcess() || g_pserver->FRdbSaveInProgress()) {
if (!hasActiveChildProcess()) {
/* We use global counters so if we stop the computation at a given
* DB we'll be able to start from the successive in the next
* cron loop iteration. */
@ -2468,14 +2472,14 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* Start a scheduled AOF rewrite if this was requested by the user while
* a BGSAVE was in progress. */
if (!hasActiveChildProcess() &&
if (!hasActiveChildProcessOrBGSave() &&
g_pserver->aof_rewrite_scheduled)
{
rewriteAppendOnlyFileBackground();
}
/* Check if a background saving or AOF rewrite in progress terminated. */
if (hasActiveChildProcess() || ldbPendingChildren())
if (hasActiveChildProcessOrBGSave() || ldbPendingChildren())
{
run_with_period(1000) receiveChildInfo();
checkChildrenDone();
@ -2517,7 +2521,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* Trigger an AOF rewrite if needed. */
if (g_pserver->aof_state == AOF_ON &&
!hasActiveChildProcess() &&
!hasActiveChildProcessOrBGSave() &&
g_pserver->aof_rewrite_perc &&
g_pserver->aof_current_size > g_pserver->aof_rewrite_min_size)
{
@ -2601,7 +2605,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
* Note: this code must be after the replicationCron() call above so
* make sure when refactoring this file to keep this order. This is useful
* because we want to give priority to RDB savings for replication. */
if (!hasActiveChildProcess() &&
if (!hasActiveChildProcessOrBGSave() &&
g_pserver->rdb_bgsave_scheduled &&
(g_pserver->unixtime-g_pserver->lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY ||
g_pserver->lastbgsave_status == C_OK))

View File

@ -2470,7 +2470,8 @@ struct redisServer {
time_t lastbgsave_try; /* Unix time of last attempted bgsave */
time_t rdb_save_time_last; /* Time used by last RDB save run. */
time_t rdb_save_time_start; /* Current RDB save start time. */
pid_t rdb_child_pid = -1; /* Used only during fork bgsave */
mstime_t rdb_save_latency; /* Used to track end to end latency of rdb save*/
pid_t rdb_child_pid = -1; /* Used only during fork bgsave */
int rdb_bgsave_scheduled; /* BGSAVE when possible if true. */
int rdb_child_type; /* Type of save by active child. */
int lastbgsave_status; /* C_OK or C_ERR */
@ -3254,6 +3255,7 @@ void receiveChildInfo(void);
void executeWithoutGlobalLock(std::function<void()> func);
int redisFork(int type);
int hasActiveChildProcess();
int hasActiveChildProcessOrBGSave();
void resetChildState();
int isMutuallyExclusiveChildType(int type);