From f518193862fc226b3ea93b4d25cbfb4d362f5255 Mon Sep 17 00:00:00 2001 From: Malavan Sotheeswaran Date: Wed, 14 Jul 2021 23:41:24 +0000 Subject: [PATCH 1/9] test CI do not merge Former-commit-id: ccdf18b1bef07ba076e5f86d74fe1e1f6ae50a8c --- .gitlab-ci.yml | 147 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 000000000..cc67d754a --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,147 @@ +build: + rules: + - if: '$COVERAGE' + when: never + - if: '$ENDURANCE' + when: never + - when: always + tags: + - docker + stage: build + script: + - git submodule init && git submodule update + - make distclean + - make -j + +make-test: + rules: + - if: '$COVERAGE' + when: never + - if: '$ENDURANCE' + when: never + - when: always + tags: + - docker + stage: test + script: + - git submodule init && git submodule update + - make distclean + - make -j + - make test -j + +node-redis-test: + rules: + - if: '$COVERAGE' + when: never + - if: '$ENDURANCE' + when: never + - when: always + tags: + - docker + - ipv6 + stage: test + script: + - git submodule init && git submodule update + - make distclean + - make -j + - make install + - git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.eqalpha.com/keydb-dev/node-redis.git + - cd node-redis + - npm install + - npm run test + +jedis-test: + rules: + - if: '$COVERAGE' + when: never + - if: '$ENDURANCE' + when: never + - when: always + tags: + - docker + - ipv4 + stage: test + script: + - git submodule init && git submodule update + - make distclean + - make -j + - make install + - git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.eqalpha.com/keydb-dev/jedis.git + - cd jedis + - make test + +redis-rs-test: + rules: + - if: '$COVERAGE' + when: never + - if: '$ENDURANCE' + when: never + - when: always + tags: + - docker + stage: test + script: + - git submodule init && git submodule update + - make distclean + - make -j + - make install + - git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.eqalpha.com/keydb-dev/redis-rs.git + - cd redis-rs + - make test + +endurance-test: + rules: + - if: '$ENDURANCE' + tags: + - docker + stage: test + script: + - git submodule init && git submodule update + - make distclean + - make -j + - ./runtest --loop --stop + +coverage-test: + rules: + - if: '$COVERAGE' + tags: + - docker + stage: test + script: + - git submodule init && git submodule update + - make distclean + - make gcov -j + - make install + - ./runtest || true + - pkill keydb-server || true + - pkill stunnel || true + - ./runtest-cluster || true + - pkill keydb-server || true + - pkill stunnel || true + - ./runtest-sentinel || true + - pkill keydb-server || true + - pkill stunnel || true + - ./runtest-moduleapi || true + - pkill keydb-server || true + - pkill stunnel || true + - git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.eqalpha.com/keydb-dev/redis-rs.git + - cd redis-rs + - make test || true + - pkill keydb-server || true + - pkill stunnel || true + - cd .. + - git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.eqalpha.com/keydb-dev/jedis.git + - cd jedis + - make test || true + - pkill keydb-server || true + - pkill stunnel || true + - cd .. + - git clone https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.eqalpha.com/keydb-dev/node-redis.git + - cd node-redis + - npm install + - npm run test || true + - pkill keydb-server || true + - pkill stunnel || true + - cd .. + - geninfo -o KeyDB.info --no-external . + - genhtml --legend -o lcov-html KeyDB.info \ No newline at end of file From 0b220bddc7ab4f08cd412616ab85b96f3f633966 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 18 Jul 2021 20:28:42 +0000 Subject: [PATCH 2/9] Do not update batch variables when not in a batch Former-commit-id: ad1e0286cf9b2d9de33c65e8e798a05ead3f7d5a --- src/replication.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/replication.cpp b/src/replication.cpp index b9465680e..7eabeef6b 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -254,9 +254,11 @@ void resizeReplicationBacklog(long long newsize) { zfree(g_pserver->repl_backlog); g_pserver->repl_backlog = backlog; g_pserver->repl_backlog_idx = g_pserver->repl_backlog_histlen; - g_pserver->repl_batch_idxStart -= earliest_idx; - if (g_pserver->repl_batch_idxStart < 0) - g_pserver->repl_batch_idxStart += g_pserver->repl_backlog_size; + if (g_pserver->repl_batch_idxStart >= 0) { + g_pserver->repl_batch_idxStart -= earliest_idx; + if (g_pserver->repl_batch_idxStart < 0) + g_pserver->repl_batch_idxStart += g_pserver->repl_backlog_size; + } g_pserver->repl_backlog_start = earliest_off; } else { zfree(g_pserver->repl_backlog); From 5f72ce931752c9aa674caa214c092116285246d3 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 18 Jul 2021 20:45:32 +0000 Subject: [PATCH 3/9] Return the ring buffer to its original size if we temporarily resized it Former-commit-id: a12ce4a0d105bf7d6ccff95f7dc0044c4676b0a7 --- src/config.cpp | 1 + src/replication.cpp | 16 ++++++++++++++++ src/server.cpp | 2 ++ src/server.h | 3 +++ 4 files changed, 22 insertions(+) diff --git a/src/config.cpp b/src/config.cpp index a10cdbe12..6fc957485 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -2470,6 +2470,7 @@ static int updateReplBacklogSize(long long val, long long prev, const char **err * being able to tell when the size changes, so restore prev before calling it. */ UNUSED(err); g_pserver->repl_backlog_size = prev; + g_pserver->repl_backlog_config_size = val; resizeReplicationBacklog(val); return 1; } diff --git a/src/replication.cpp b/src/replication.cpp index 7eabeef6b..c3b902db4 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -4321,6 +4321,8 @@ void replicationCron(void) { replicationStartPendingFork(); + trimReplicationBacklog(); + /* Remove the RDB file used for replication if Redis is not running * with any persistence. */ removeRDBUsedToSyncReplicas(); @@ -5066,3 +5068,17 @@ void updateFailoverStatus(void) { g_pserver->target_replica_port); } } + +// If we automatically grew the backlog we need to trim it back to +// the config setting when possible +void trimReplicationBacklog() { + serverAssert(GlobalLocksAcquired()); + serverAssert(g_pserver->repl_batch_offStart < 0); // we shouldn't be in a batch + if (g_pserver->repl_backlog_size <= g_pserver->repl_backlog_config_size) + return; // We're already a good size + if (g_pserver->repl_lowest_off > 0 && (g_pserver->master_repl_offset - g_pserver->repl_lowest_off + 1) > g_pserver->repl_backlog_config_size) + return; // There is untransmitted data we can't truncate + + serverLog(LL_NOTICE, "Reclaiming %lld replication backlog bytes", g_pserver->repl_backlog_size - g_pserver->repl_backlog_config_size); + resizeReplicationBacklog(g_pserver->repl_backlog_config_size); +} \ No newline at end of file diff --git a/src/server.cpp b/src/server.cpp index 3d3b07172..183440f3b 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -7084,6 +7084,8 @@ static void validateConfiguration() serverLog(LL_WARNING, "\tKeyDB will now exit. Please update your configuration file."); exit(EXIT_FAILURE); } + + g_pserver->repl_backlog_config_size = g_pserver->repl_backlog_size; // this is normally set in the update logic, but not on initial config } int iAmMaster(void) { diff --git a/src/server.h b/src/server.h index 32040fb72..5178e86ba 100644 --- a/src/server.h +++ b/src/server.h @@ -2358,6 +2358,7 @@ struct redisServer { int repl_ping_slave_period; /* Master pings the replica every N seconds */ char *repl_backlog; /* Replication backlog for partial syncs */ long long repl_backlog_size; /* Backlog circular buffer size */ + long long repl_backlog_config_size; /* The repl backlog may grow but we want to know what the user set it to */ long long repl_backlog_histlen; /* Backlog actual data length */ long long repl_backlog_idx; /* Backlog circular buffer current offset, that is the next byte will'll write to.*/ @@ -3024,6 +3025,8 @@ void clearFailoverState(void); void updateFailoverStatus(void); void abortFailover(redisMaster *mi, const char *err); const char *getFailoverStateString(); +int canFeedReplicaReplBuffer(client *replica); +void trimReplicationBacklog(); /* Generic persistence functions */ void startLoadingFile(FILE* fp, const char * filename, int rdbflags); From 4000334b1f7589645b2f816cbc948b662fe1a302 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 18 Jul 2021 20:48:08 +0000 Subject: [PATCH 4/9] Do not resize replica buffer past the max client limit Former-commit-id: ba116500ca4fd53e4e40f04fc33981e60bb21ab7 --- src/replication.cpp | 51 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 7 deletions(-) diff --git a/src/replication.cpp b/src/replication.cpp index c3b902db4..12142f9a5 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -303,19 +303,56 @@ void feedReplicationBacklog(const void *ptr, size_t len) { if (lower_bound == -1) lower_bound = g_pserver->repl_batch_offStart; long long minimumsize = g_pserver->master_repl_offset + len - lower_bound + 1; + if (minimumsize > g_pserver->repl_backlog_size) { - flushReplBacklogToClients(); - lower_bound = g_pserver->repl_lowest_off.load(std::memory_order_seq_cst); - if (lower_bound == -1) - lower_bound = g_pserver->repl_batch_offStart; + listIter li; + listNode *ln; + listRewind(g_pserver->slaves, &li); + long long maxClientBuffer = (long long)cserver.client_obuf_limits[CLIENT_TYPE_SLAVE].hard_limit_bytes; + if (maxClientBuffer <= 0) + maxClientBuffer = LLONG_MAX; // infinite essentially + long long min_offset = LLONG_MAX; + int listening_replicas = 0; + while ((ln = listNext(&li))) { + client *replica = (client*)listNodeValue(ln); + if (!canFeedReplicaReplBuffer(replica)) continue; + if (replica->flags & CLIENT_CLOSE_ASAP) continue; - minimumsize = g_pserver->master_repl_offset + len - lower_bound + 1; + std::unique_lock ul(replica->lock); - if (minimumsize > g_pserver->repl_backlog_size) { + // Would this client overflow? If so close it + long long neededBuffer = g_pserver->master_repl_offset + len - replica->repl_curr_off + 1; + if (neededBuffer > maxClientBuffer) { + + sds clientInfo = catClientInfoString(sdsempty(),replica); + freeClientAsync(replica); + serverLog(LL_WARNING,"Client %s scheduled to be closed ASAP due to exceeding output buffer hard limit.", clientInfo); + sdsfree(clientInfo); + continue; + } + min_offset = std::min(min_offset, replica->repl_curr_off); + ++listening_replicas; + } + + if (min_offset == LLONG_MAX) { + min_offset = g_pserver->repl_batch_offStart; + g_pserver->repl_lowest_off = -1; + } else { + g_pserver->repl_lowest_off = min_offset; + } + + minimumsize = g_pserver->master_repl_offset + len - min_offset + 1; + serverAssert(listening_replicas == 0 || minimumsize <= maxClientBuffer); + + if (minimumsize > g_pserver->repl_backlog_size && listening_replicas) { // This is an emergency overflow, we better resize to fit long long newsize = std::max(g_pserver->repl_backlog_size*2, minimumsize); - serverLog(LL_WARNING, "Replication backlog is too small, resizing to: %lld", newsize); + serverLog(LL_WARNING, "Replication backlog is too small, resizing to: %lld bytes", newsize); resizeReplicationBacklog(newsize); + } else if (!listening_replicas) { + // We need to update a few variables or later asserts will notice we dropped data + g_pserver->repl_batch_offStart = g_pserver->master_repl_offset + len; + g_pserver->repl_lowest_off = -1; } } } From 614860ce3c97e46466fd635e75ed260e13e4d5ac Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 19 Jul 2021 15:10:48 +0000 Subject: [PATCH 5/9] StorageCache dtor leaks Former-commit-id: 0262c4dc76a320141b8a4454df2f6baab4f74ab3 --- src/StorageCache.cpp | 6 ++++++ src/StorageCache.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/StorageCache.cpp b/src/StorageCache.cpp index 29908c7f5..6e1d4af9a 100644 --- a/src/StorageCache.cpp +++ b/src/StorageCache.cpp @@ -25,6 +25,12 @@ StorageCache::StorageCache(IStorage *storage, bool fCache) m_pdict = dictCreate(&dbStorageCacheType, nullptr); } +StorageCache::~StorageCache() +{ + if (m_pdict != nullptr) + dictRelease(m_pdict); +} + void StorageCache::clear() { std::unique_lock ul(m_lock); diff --git a/src/StorageCache.h b/src/StorageCache.h index ed868e74b..9f92f75c0 100644 --- a/src/StorageCache.h +++ b/src/StorageCache.h @@ -29,6 +29,8 @@ class StorageCache } public: + ~StorageCache(); + static StorageCache *create(IStorageFactory *pfactory, int db, IStorageFactory::key_load_iterator fn, void *privdata) { StorageCache *cache = new StorageCache(nullptr, pfactory->FSlow() /*fCache*/); load_iter_data data = {cache, fn, privdata}; From d3793efb337af7d5d4478b040804a013620b84af Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 19 Jul 2021 15:11:33 +0000 Subject: [PATCH 6/9] Info command should show how many keys are cached in RAM vs storage provider Former-commit-id: 08597bee69bc16ca7c3d5ff31020472774c6eec9 --- src/db.cpp | 6 +++--- src/server.cpp | 7 ++++--- src/server.h | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/db.cpp b/src/db.cpp index 947f03427..98b70574d 100644 --- a/src/db.cpp +++ b/src/db.cpp @@ -2963,13 +2963,13 @@ dict_iter redisDbPersistentData::random() return dict_iter(m_pdict, de); } -size_t redisDbPersistentData::size() const +size_t redisDbPersistentData::size(bool fCachedOnly) const { - if (m_spstorage != nullptr && !m_fAllChanged) + if (m_spstorage != nullptr && !m_fAllChanged && !fCachedOnly) return m_spstorage->count() + m_cnewKeysPending; return dictSize(m_pdict) - + (m_pdbSnapshot ? (m_pdbSnapshot->size() - dictSize(m_pdictTombstone)) : 0); + + (m_pdbSnapshot ? (m_pdbSnapshot->size(fCachedOnly) - dictSize(m_pdictTombstone)) : 0); } bool redisDbPersistentData::removeCachedValue(const char *key) diff --git a/src/server.cpp b/src/server.cpp index 183440f3b..ed153d599 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -6096,10 +6096,11 @@ sds genRedisInfoString(const char *section) { if (sections++) info = sdscat(info,"\r\n"); info = sdscatprintf(info, "# Keyspace\r\n"); for (j = 0; j < cserver.dbnum; j++) { - long long keys, vkeys; + long long keys, vkeys, cachedKeys; keys = g_pserver->db[j]->size(); vkeys = g_pserver->db[j]->expireSize(); + cachedKeys = g_pserver->db[j]->size(true /* fCachedOnly */); // Adjust TTL by the current time mstime_t mstime; @@ -6111,8 +6112,8 @@ sds genRedisInfoString(const char *section) { if (keys || vkeys) { info = sdscatprintf(info, - "db%d:keys=%lld,expires=%lld,avg_ttl=%lld\r\n", - j, keys, vkeys, static_cast(g_pserver->db[j]->avg_ttl)); + "db%d:keys=%lld,expires=%lld,avg_ttl=%lld,cached_keys=%lld\r\n", + j, keys, vkeys, static_cast(g_pserver->db[j]->avg_ttl), cachedKeys); } } } diff --git a/src/server.h b/src/server.h index 5178e86ba..edbcb4c8a 100644 --- a/src/server.h +++ b/src/server.h @@ -1095,7 +1095,7 @@ public: redisDbPersistentData(redisDbPersistentData &&) = default; size_t slots() const { return dictSlots(m_pdict); } - size_t size() const; + size_t size(bool fCachedOnly = false) const; void expand(uint64_t slots) { dictExpand(m_pdict, slots); } void trackkey(robj_roptr o, bool fUpdate) From 88f5bf1d90f47d58028712af37bc046173ab9f70 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 19 Jul 2021 16:50:48 +0000 Subject: [PATCH 7/9] We need to periodically flush the GC or we'll end up blocking with a huge backlog at the end of load Former-commit-id: 29c0bf79ad1a810e808790de2f7db24f3cc603e8 --- src/rdb.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/rdb.cpp b/src/rdb.cpp index 4975918a8..957c3fa88 100644 --- a/src/rdb.cpp +++ b/src/rdb.cpp @@ -2884,6 +2884,8 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { do this every 16 keys to limit the perf impact */ if (g_pserver->m_pstorageFactory && (ckeysLoaded % 128) == 0) { + g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch); + serverTL->gcEpoch = g_pserver->garbageCollector.startEpoch(); bool fHighMemory = (getMaxmemoryState(NULL,NULL,NULL,NULL) != C_OK); if (fHighMemory || (ckeysLoaded % (1024)) == 0) { From 345ec75a36ef5e9d3ddcc7c23d3fcaed1e7af535 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 19 Jul 2021 18:01:39 +0000 Subject: [PATCH 8/9] We need to free in order since the first big async free is likely the largest, so don't set the hipri bit Former-commit-id: 76a9cefa94e0f446e12a690909cbda15d03ca211 --- src/db.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/db.cpp b/src/db.cpp index 98b70574d..56556ff7f 100644 --- a/src/db.cpp +++ b/src/db.cpp @@ -3026,7 +3026,7 @@ void redisDbPersistentData::removeAllCachedValues() dictExpand(m_pdict, dictSize(dT)/2, false); // Make room for about half so we don't excessively rehash g_pserver->asyncworkqueue->AddWorkFunction([dT]{ dictRelease(dT); - }, true); + }, false); } else { dictEmpty(m_pdict, nullptr); } From bacaa204cfc6db6f9e69249b6f5063cc25d9e368 Mon Sep 17 00:00:00 2001 From: John Sully Date: Mon, 19 Jul 2021 18:17:54 +0000 Subject: [PATCH 9/9] Disable async rehash during load as it interferes with eviction Former-commit-id: 54b4f39e9d634bf53b04cd94433b051b14323bc6 --- src/db.cpp | 2 +- src/server.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/db.cpp b/src/db.cpp index 56556ff7f..adcf4456f 100644 --- a/src/db.cpp +++ b/src/db.cpp @@ -3020,7 +3020,7 @@ void redisDbPersistentData::removeAllCachedValues() trackChanges(false); } - if (m_pdict->pauserehash == 0) { + if (m_pdict->pauserehash == 0 && m_pdict->refcount == 1) { dict *dT = m_pdict; m_pdict = dictCreate(&dbDictType, this); dictExpand(m_pdict, dictSize(dT)/2, false); // Make room for about half so we don't excessively rehash diff --git a/src/server.cpp b/src/server.cpp index ed153d599..d32bec710 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -2124,7 +2124,7 @@ void databasesCron(bool fMainThread) { ::dict *dict = g_pserver->db[rehash_db]->dictUnsafeKeyOnly(); /* Are we async rehashing? And if so is it time to re-calibrate? */ /* The recalibration limit is a prime number to ensure balancing across threads */ - if (rehashes_per_ms > 0 && async_rehashes < 131 && !cserver.active_defrag_enabled && cserver.cthreads > 1) { + if (rehashes_per_ms > 0 && async_rehashes < 131 && !cserver.active_defrag_enabled && cserver.cthreads > 1 && dictSize(dict) > 2048 && dictIsRehashing(dict) && !g_pserver->loading) { serverTL->rehashCtl = dictRehashAsyncStart(dict, rehashes_per_ms); ++async_rehashes; }