From 23c7f27df25527f98e6aad66010d5bb10556ce0f Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 3 Jan 2021 16:09:29 +0200 Subject: [PATCH] Fix rare assertion as a result of: active defrag while loading (#8281) In #7726 (part of 6.2), we added a mechanism for whileBlockedCron, this mechanism has an assertion to make sure the timestamp in whileBlockedCron was always set correctly before the blocking operation starts. I now found (thanks to our CI) two bugs in that area: 1) CONFIG RESETSTAT (if it was allowed during loading) would have cleared this var 2) the call stopLoading (which calls whileBlockedCron) was made too early, while the rio is still in use, in which case the update_cksum (rdbLoadProgressCallback) may still be called and whileBlockedCron can assert. --- src/replication.c | 4 +++- src/server.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index 520c43fa4..20eb83e72 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1690,7 +1690,6 @@ void readSyncBulkPayload(connection *conn) { * gets promoted. */ return; } - stopLoading(1); /* RDB loading succeeded if we reach this point. */ if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) { @@ -1705,6 +1704,7 @@ void readSyncBulkPayload(connection *conn) { if (!rioRead(&rdb,buf,CONFIG_RUN_ID_SIZE) || memcmp(buf,eofmark,CONFIG_RUN_ID_SIZE) != 0) { + stopLoading(0); serverLog(LL_WARNING,"Replication stream EOF marker is broken"); cancelReplicationHandshake(1); rioFreeConn(&rdb, NULL); @@ -1712,6 +1712,8 @@ void readSyncBulkPayload(connection *conn) { } } + stopLoading(1); + /* Cleanup and restore the socket to the original state to continue * with the normal replication. */ rioFreeConn(&rdb, NULL); diff --git a/src/server.c b/src/server.c index 49ce319c4..7ecfa8d1b 100644 --- a/src/server.c +++ b/src/server.c @@ -2955,7 +2955,6 @@ void resetServerStats(void) { server.stat_total_error_replies = 0; server.stat_dump_payload_sanitizations = 0; server.aof_delayed_fsync = 0; - server.blocked_last_cron = 0; } /* Make the thread killable at any time, so that kill threads functions @@ -3004,6 +3003,7 @@ void initServer(void) { server.clients_paused = 0; server.events_processed_while_blocked = 0; server.system_memory_size = zmalloc_get_memory_size(); + server.blocked_last_cron = 0; if ((server.tls_port || server.tls_replication || server.tls_cluster) && tlsConfigure(&server.tls_ctx_config) == C_ERR) {