Merge branch 'keydbpro' into PRO_RELEASE_6

Former-commit-id: ed98be0ba81ffdc501847ea0d2486f5f01391319
This commit is contained in:
John Sully 2020-08-17 02:26:51 +00:00
commit dcf607622c
16 changed files with 454 additions and 86 deletions

View File

@ -1,6 +1,5 @@
![Current Release](https://img.shields.io/github/release/JohnSully/KeyDB.svg)
![CI](https://github.com/JohnSully/KeyDB/workflows/CI/badge.svg?branch=unstable)
[![Join the chat at https://gitter.im/KeyDB/community](https://badges.gitter.im/KeyDB/community.svg)](https://gitter.im/KeyDB/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![StackShare](http://img.shields.io/badge/tech-stack-0690fa.svg?style=flat)](https://stackshare.io/eq-alpha-technology-inc/eq-alpha-technology-inc)
##### New! Want to extend KeyDB with Javascript? Try [ModJS](https://github.com/JohnSully/ModJS)

5
deps/Makefile vendored
View File

@ -1,6 +1,7 @@
# Redis dependency Makefile
uname_S:= $(shell sh -c 'uname -s 2>/dev/null || echo not')
uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
CCCOLOR="\033[34m"
LINKCOLOR="\033[34;1m"
@ -94,6 +95,10 @@ jemalloc: .make-prerequisites
rocksdb: .make-prerequisites
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
ifeq ($(uname_M),x86_64)
cd rocksdb && PORTABLE=1 USE_SSE=1 FORCE_SSE42=1 $(MAKE) static_lib
else
cd rocksdb && PORTABLE=1 $(MAKE) static_lib
endif
.PHONY: rocksdb

View File

@ -45,7 +45,7 @@ void AsyncWorkQueue::WorkerThreadMain()
ProcessPendingAsyncWrites();
aeReleaseLock();
g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch);
serverTL->gcEpoch = 0;
serverTL->gcEpoch.reset();
}
listRelease(vars.clients_pending_asyncwrite);

View File

@ -47,6 +47,11 @@ endif
USEASM?=true
ifeq ($(NOMVCC),)
CFLAGS+= -DENABLE_MVCC
CXXFLAGS+= -DENABLE_MVCC
endif
ifneq ($(SANITIZE),)
CFLAGS+= -fsanitize=$(SANITIZE) -DSANITIZE
CXXFLAGS+= -fsanitize=$(SANITIZE) -DSANITIZE

View File

@ -261,9 +261,11 @@ int aeCreateRemoteFileEvent(aeEventLoop *eventLoop, int fd, int mask,
if (fSynchronous)
{
{
std::unique_lock<std::mutex> ulock(cmd.pctl->mutexcv, std::adopt_lock);
cmd.pctl->cv.wait(ulock);
ret = cmd.pctl->rval;
}
delete cmd.pctl;
}
@ -315,9 +317,11 @@ int aePostFunction(aeEventLoop *eventLoop, std::function<void()> fn, bool fSynch
int ret = AE_OK;
if (fSynchronous)
{
{
std::unique_lock<std::mutex> ulock(cmd.pctl->mutexcv, std::adopt_lock);
cmd.pctl->cv.wait(ulock);
ret = cmd.pctl->rval;
}
delete cmd.pctl;
}
return ret;

View File

@ -4920,9 +4920,11 @@ void createDumpPayload(rio *payload, robj_roptr o, robj *key) {
rioInitWithBuffer(payload,sdsempty());
serverAssert(rdbSaveObjectType(payload,o));
serverAssert(rdbSaveObject(payload,o,key));
#ifdef ENABLE_MVCC
char szT[32];
snprintf(szT, 32, "%" PRIu64, o->mvcc_tstamp);
serverAssert(rdbSaveAuxFieldStrStr(payload,"mvcc-tstamp", szT) != -1);
#endif
/* Write the footer, this is how it looks like:
* ----------------+---------------------+---------------+
@ -5064,9 +5066,11 @@ void restoreCommand(client *c) {
decrRefCount(auxkey);
goto eoferr;
}
#ifdef ENABLE_MVCC
if (strcasecmp(szFromObj(auxkey), "mvcc-tstamp") == 0) {
obj->mvcc_tstamp = strtoull(szFromObj(auxval), nullptr, 10);
}
#endif
decrRefCount(auxkey);
decrRefCount(auxval);
}

View File

@ -91,7 +91,9 @@ static robj* lookupKey(redisDb *db, robj *key, int flags) {
robj *val = itr.val();
lookupKeyUpdateObj(val, flags);
if (flags & LOOKUP_UPDATEMVCC) {
#ifdef ENABLE_MVCC
val->mvcc_tstamp = getMvccTstamp();
#endif
db->trackkey(key, true /* fUpdate */);
}
return val;
@ -218,8 +220,10 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) {
bool dbAddCore(redisDb *db, robj *key, robj *val, bool fAssumeNew = false) {
serverAssert(!val->FExpires());
sds copy = sdsdupshared(szFromObj(key));
#ifdef ENABLE_MVCC
if (g_pserver->fActiveReplica)
val->mvcc_tstamp = key->mvcc_tstamp = getMvccTstamp();
#endif
bool fInserted = db->insert(copy, val, fAssumeNew);
@ -270,7 +274,9 @@ void redisDb::dbOverwriteCore(redisDb::iter itr, robj *key, robj *val, bool fUpd
if (fUpdateMvcc) {
if (val->getrefcount(std::memory_order_relaxed) == OBJ_SHARED_REFCOUNT)
val = dupStringObject(val);
#ifdef ENABLE_MVCC
val->mvcc_tstamp = getMvccTstamp();
#endif
}
if (g_pserver->lazyfree_lazy_server_del)
@ -303,13 +309,15 @@ int dbMerge(redisDb *db, robj *key, robj *val, int fReplace)
if (itr == nullptr)
return (dbAddCore(db, key, val) == true);
#ifdef ENABLE_MVCC
robj *old = itr.val();
if (old->mvcc_tstamp <= val->mvcc_tstamp)
{
db->dbOverwriteCore(itr, key, val, false, true);
return true;
}
#endif
return false;
}
else
@ -330,6 +338,7 @@ int dbMerge(redisDb *db, robj *key, robj *val, int fReplace)
* The client 'c' argument may be set to NULL if the operation is performed
* in a context where there is no clear client performing the operation. */
void genericSetKey(client *c, redisDb *db, robj *key, robj *val, int keepttl, int signal) {
db->prepOverwriteForSnapshot(szFromObj(key));
if (!dbAddCore(db, key, val)) {
dbOverwrite(db, key, val, !keepttl);
}
@ -421,8 +430,9 @@ bool redisDbPersistentData::syncDelete(robj *key)
auto itr = m_pdbSnapshot->find_cached_threadsafe(szFromObj(key));
if (itr != nullptr)
{
sds keyTombstone = sdsdup(szFromObj(key));
if (dictAdd(m_pdictTombstone, keyTombstone, nullptr) != DICT_OK)
sds keyTombstone = sdsdupshared(itr.key());
uint64_t hash = dictGetHash(m_pdict, keyTombstone);
if (dictAdd(m_pdictTombstone, keyTombstone, (void*)hash) != DICT_OK)
sdsfree(keyTombstone);
}
}
@ -2290,7 +2300,7 @@ void redisDbPersistentData::initialize()
{
m_pdbSnapshot = nullptr;
m_pdict = dictCreate(&dbDictType,this);
m_pdictTombstone = dictCreate(&dbDictType,this);
m_pdictTombstone = dictCreate(&dbTombstoneDictType,this);
m_setexpire = new(MALLOC_LOCAL) expireset();
m_fAllChanged = 0;
m_fTrackingChanges = 0;
@ -2349,6 +2359,24 @@ bool redisDbPersistentData::insert(char *key, robj *o, bool fAssumeNew)
return (res == DICT_OK);
}
// This is a performance tool to prevent us copying over an object we're going to overwrite anyways
void redisDbPersistentData::prepOverwriteForSnapshot(char *key)
{
if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LFU)
return;
if (m_pdbSnapshot != nullptr)
{
auto itr = m_pdbSnapshot->find_cached_threadsafe(key);
if (itr.key() != nullptr)
{
sds keyNew = sdsdupshared(itr.key());
if (dictAdd(m_pdictTombstone, keyNew, (void*)dictHashKey(m_pdict, key)) != DICT_OK)
sdsfree(keyNew);
}
}
}
void redisDbPersistentData::tryResize()
{
if (htNeedsResize(m_pdict))
@ -2470,15 +2498,20 @@ void redisDbPersistentData::ensure(const char *sdsKey, dictEntry **pde)
sdsfree(strT);
dictAdd(m_pdict, keyNew, objNew);
serverAssert(objNew->getrefcount(std::memory_order_relaxed) == 1);
#ifdef ENABLE_MVCC
serverAssert(objNew->mvcc_tstamp == itr.val()->mvcc_tstamp);
#endif
}
}
else
{
dictAdd(m_pdict, keyNew, nullptr);
}
*pde = dictFind(m_pdict, sdsKey);
dictAdd(m_pdictTombstone, sdsdupshared(itr.key()), nullptr);
uint64_t hash = dictGetHash(m_pdict, sdsKey);
dictEntry **deT;
dictht *ht;
*pde = dictFindWithPrev(m_pdict, sdsKey, hash, &deT, &ht);
dictAdd(m_pdictTombstone, sdsdupshared(itr.key()), (void*)hash);
}
}

View File

@ -179,6 +179,9 @@ int dictExpand(dict *d, unsigned long size)
int dictMerge(dict *dst, dict *src)
{
#define MERGE_BLOCK_SIZE 4
dictEntry *rgdeT[MERGE_BLOCK_SIZE];
assert(dst != src);
if (dictSize(src) == 0)
return DICT_OK;
@ -197,6 +200,8 @@ int dictMerge(dict *dst, dict *src)
std::swap(dst->iterators, src->iterators);
}
src->rehashidx = -1;
if (!dictIsRehashing(dst) && !dictIsRehashing(src))
{
if (dst->ht[0].size >= src->ht[0].size)
@ -210,6 +215,50 @@ int dictMerge(dict *dst, dict *src)
}
_dictReset(&src->ht[0]);
dst->rehashidx = 0;
assert(dictIsRehashing(dst));
assert((dictSize(src)+dictSize(dst)) == expectedSize);
return DICT_OK;
}
if (!dictIsRehashing(src) && dictSize(src) > 0 &&
(src->ht[0].size == dst->ht[0].size || src->ht[0].size == dst->ht[1].size))
{
auto &htDst = (src->ht[0].size == dst->ht[0].size) ? dst->ht[0] : dst->ht[1];
assert(src->ht[0].size == htDst.size);
for (size_t ide = 0; ide < src->ht[0].size; ide += MERGE_BLOCK_SIZE)
{
if (src->ht[0].used == 0)
break;
for (int dde = 0; dde < MERGE_BLOCK_SIZE; ++dde) {
rgdeT[dde] = src->ht[0].table[ide + dde];
src->ht[0].table[ide + dde] = nullptr;
}
for (;;) {
bool fAnyFound = false;
for (int dde = 0; dde < MERGE_BLOCK_SIZE; ++dde) {
if (rgdeT[dde] == nullptr)
continue;
dictEntry *deNext = rgdeT[dde]->next;
rgdeT[dde]->next = htDst.table[ide+dde];
htDst.table[ide+dde] = rgdeT[dde];
rgdeT[dde] = deNext;
htDst.used++;
src->ht[0].used--;
fAnyFound = fAnyFound || (deNext != nullptr);
}
if (!fAnyFound)
break;
}
}
// If we copied to the base hash table of a rehashing dst, reset the rehash
if (dictIsRehashing(dst) && src->ht[0].size == dst->ht[0].size)
dst->rehashidx = 0;
assert(dictSize(src) == 0);
assert((dictSize(src)+dictSize(dst)) == expectedSize);
return DICT_OK;
}
@ -218,10 +267,34 @@ int dictMerge(dict *dst, dict *src)
auto &htDst = dictIsRehashing(dst) ? dst->ht[1] : dst->ht[0];
for (int iht = 0; iht < 2; ++iht)
{
for (size_t ide = 0; ide < src->ht[iht].size; ++ide)
for (size_t ide = 0; ide < src->ht[iht].size; ide += MERGE_BLOCK_SIZE)
{
if (src->ht[iht].used == 0)
break;
for (int dde = 0; dde < MERGE_BLOCK_SIZE; ++dde) {
rgdeT[dde] = src->ht[iht].table[ide + dde];
src->ht[iht].table[ide + dde] = nullptr;
}
for (;;) {
bool fAnyFound = false;
for (int dde = 0; dde < MERGE_BLOCK_SIZE; ++dde) {
if (rgdeT[dde] == nullptr)
continue;
uint64_t h = dictHashKey(dst, rgdeT[dde]->key) & htDst.sizemask;
dictEntry *deNext = rgdeT[dde]->next;
rgdeT[dde]->next = htDst.table[h];
htDst.table[h] = rgdeT[dde];
rgdeT[dde] = deNext;
htDst.used++;
src->ht[iht].used--;
fAnyFound = fAnyFound || (deNext != nullptr);
}
if (!fAnyFound)
break;
}
#if 0
dictEntry *de = src->ht[iht].table[ide];
src->ht[iht].table[ide] = nullptr;
while (de != nullptr)
@ -236,6 +309,7 @@ int dictMerge(dict *dst, dict *src)
de = deNext;
src->ht[iht].used--;
}
#endif
}
}
assert((dictSize(src)+dictSize(dst)) == expectedSize);
@ -326,7 +400,7 @@ int dictRehashMilliseconds(dict *d, int ms) {
static void _dictRehashStep(dict *d) {
unsigned long iterators;
__atomic_load(&d->iterators, &iterators, __ATOMIC_RELAXED);
if (iterators == 0) dictRehash(d,1);
if (iterators == 0) dictRehash(d,2);
}
/* Add an element to the target hash table */
@ -541,21 +615,20 @@ void dictRelease(dict *d)
zfree(d);
}
dictEntry *dictFindWithPrev(dict *d, const void *key, dictEntry ***dePrevPtr, dictht **pht)
dictEntry *dictFindWithPrev(dict *d, const void *key, uint64_t h, dictEntry ***dePrevPtr, dictht **pht, bool fShallowCompare)
{
dictEntry *he;
uint64_t h, idx, table;
uint64_t idx, table;
if (dictSize(d) == 0) return NULL; /* dict is empty */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
*pht = d->ht + table;
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
*dePrevPtr = &d->ht[table].table[idx];
while(he) {
if (key==he->key || dictCompareKeys(d, key, he->key)) {
if (key==he->key || (!fShallowCompare && dictCompareKeys(d, key, he->key))) {
return he;
}
*dePrevPtr = &he->next;
@ -570,7 +643,8 @@ dictEntry *dictFind(dict *d, const void *key)
{
dictEntry **deT;
dictht *ht;
return dictFindWithPrev(d, key, &deT, &ht);
uint64_t h = dictHashKey(d, key);
return dictFindWithPrev(d, key, h, &deT, &ht);
}
void *dictFetchValue(dict *d, const void *key) {
@ -1220,7 +1294,9 @@ void dictGetStats(char *buf, size_t bufsize, dict *d) {
void dictForceRehash(dict *d)
{
while (dictIsRehashing(d)) _dictRehashStep(d);
unsigned long iterators;
__atomic_load(&d->iterators, &iterators, __ATOMIC_RELAXED);
while (iterators == 0 && dictIsRehashing(d)) _dictRehashStep(d);
}
/* ------------------------------- Benchmark ---------------------------------*/

View File

@ -167,7 +167,7 @@ dictEntry *dictUnlink(dict *ht, const void *key);
void dictFreeUnlinkedEntry(dict *d, dictEntry *he);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
dictEntry * dictFindWithPrev(dict *d, const void *key, dictEntry ***dePrevPtr, dictht **ht);
dictEntry * dictFindWithPrev(dict *d, const void *key, uint64_t h, dictEntry ***dePrevPtr, dictht **ht, bool fShallowCompare = false);
void *dictFetchValue(dict *d, const void *key);
int dictResize(dict *d);
dictIterator *dictGetIterator(dict *d);

View File

@ -3,6 +3,12 @@
#include <assert.h>
#include <unordered_set>
struct ICollectable
{
virtual ~ICollectable() {}
bool FWillFreeChildDebug() { return false; }
};
template<typename T>
class GarbageCollector
{

View File

@ -62,7 +62,10 @@ bool redisDbPersistentData::asyncDelete(robj *key) {
dictEntry *de = dictUnlink(m_pdict,ptrFromObj(key));
if (de) {
if (m_pdbSnapshot != nullptr && m_pdbSnapshot->find_cached_threadsafe(szFromObj(key)) != nullptr)
dictAdd(m_pdictTombstone, sdsdup((sds)dictGetKey(de)), nullptr);
{
uint64_t hash = dictGetHash(m_pdict, szFromObj(key));
dictAdd(m_pdictTombstone, sdsdup((sds)dictGetKey(de)), (void*)hash);
}
robj *val = (robj*)dictGetVal(de);
if (val->FExpires())

View File

@ -46,7 +46,9 @@ robj *createObject(int type, void *ptr) {
o->encoding = OBJ_ENCODING_RAW;
o->m_ptr = ptr;
o->setrefcount(1);
#ifdef ENABLE_MVCC
o->mvcc_tstamp = OBJ_MVCC_INVALID;
#endif
/* Set the LRU to the current lruclock (minutes resolution), or
* alternatively the LFU counter. */
@ -92,6 +94,7 @@ robj *createRawStringObject(const char *ptr, size_t len) {
* an object where the sds string is actually an unmodifiable string
* allocated in the same chunk as the object itself. */
robj *createEmbeddedStringObject(const char *ptr, size_t len) {
serverAssert(len <= UINT8_MAX);
size_t allocsize = sizeof(struct sdshdr8)+len+1;
if (allocsize < sizeof(void*))
allocsize = sizeof(void*);
@ -101,7 +104,9 @@ robj *createEmbeddedStringObject(const char *ptr, size_t len) {
o->type = OBJ_STRING;
o->encoding = OBJ_ENCODING_EMBSTR;
o->setrefcount(1);
#ifdef ENABLE_MVCC
o->mvcc_tstamp = OBJ_MVCC_INVALID;
#endif
if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LFU) {
o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
@ -129,7 +134,12 @@ robj *createEmbeddedStringObject(const char *ptr, size_t len) {
*
* The current limit of 52 is chosen so that the biggest string object
* we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
#ifdef ENABLE_MVCC
#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 48
#else
#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 56
#endif
static_assert((sizeof(redisObject)+OBJ_ENCODING_EMBSTR_SIZE_LIMIT-8) == 64, "Max EMBSTR obj should be 64 bytes total");
robj *createStringObject(const char *ptr, size_t len) {
if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
@ -1316,10 +1326,12 @@ NULL
* because we update the access time only
* when the key is read or overwritten. */
addReplyLongLong(c,LFUDecrAndReturn(o.unsafe_robjcast()));
#ifdef ENABLE_MVCC
} else if (!strcasecmp(szFromObj(c->argv[1]), "lastmodified") && c->argc == 3) {
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.null[c->resp]))
== nullptr) return;
addReplyLongLong(c, (g_pserver->mstime - (o->mvcc_tstamp >> MVCC_MS_SHIFT)) / 1000);
#endif
} else {
addReplySubcommandSyntaxError(c);
}
@ -1579,9 +1591,11 @@ robj *deserializeStoredObjectCore(const void *data, size_t cb)
decrRefCount(auxkey);
goto eoferr;
}
#ifdef ENABLE_MVCC
if (strcasecmp(szFromObj(auxkey), "mvcc-tstamp") == 0) {
obj->mvcc_tstamp = strtoull(szFromObj(auxval), nullptr, 10);
}
#endif
decrRefCount(auxkey);
decrRefCount(auxval);
}

View File

@ -349,20 +349,24 @@ writeerr:
}
ssize_t rdbSaveLzfStringObject(rio *rdb, const unsigned char *s, size_t len) {
char rgbuf[2048];
size_t comprlen, outlen;
void *out;
void *out = rgbuf;
/* We require at least four bytes compression for this to be worth it */
if (len <= 4) return 0;
outlen = len-4;
if ((out = zmalloc(outlen+1, MALLOC_LOCAL)) == NULL) return 0;
if (outlen >= sizeof(rgbuf))
if ((out = zmalloc(outlen+1, MALLOC_LOCAL)) == NULL) return 0;
comprlen = lzf_compress(s, len, out, outlen);
if (comprlen == 0) {
zfree(out);
if (out != rgbuf)
zfree(out);
return 0;
}
ssize_t nwritten = rdbSaveLzfBlob(rdb, out, comprlen, len);
zfree(out);
if (out != rgbuf)
zfree(out);
return nwritten;
}
@ -1092,8 +1096,12 @@ int rdbSaveKeyValuePair(rio *rdb, robj_roptr key, robj_roptr val, const expireEn
}
char szT[32];
snprintf(szT, 32, "%" PRIu64, val->mvcc_tstamp);
if (rdbSaveAuxFieldStrStr(rdb,"mvcc-tstamp", szT) == -1) return -1;
#ifdef ENABLE_MVCC
if (g_pserver->fActiveReplica) {
snprintf(szT, 32, "%" PRIu64, val->mvcc_tstamp);
if (rdbSaveAuxFieldStrStr(rdb,"mvcc-tstamp", szT) == -1) return -1;
}
#endif
/* Save type, key, value */
if (rdbSaveObjectType(rdb,val) == -1) return -1;
@ -2131,7 +2139,9 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, uint64_t mvcc_tstamp) {
return NULL;
}
#ifdef ENABLE_MVCC
o->mvcc_tstamp = mvcc_tstamp;
#endif
serverAssert(!o->FExpires());
return o;
}
@ -2489,7 +2499,11 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
key = nullptr;
goto eoferr;
}
#ifdef ENABLE_MVCC
bool fStaleMvccKey = (rsi) ? val->mvcc_tstamp < rsi->mvccMinThreshold : false;
#else
bool fStaleMvccKey = false;
#endif
/* Check if the key already expired. This function is used when loading
* an RDB file from disk, either at startup, or when an RDB was

View File

@ -1387,14 +1387,14 @@ dictType dbDictType = {
dictObjectDestructor /* val destructor */
};
/* db->pdict, keys are sds strings, vals uints. */
dictType dbDictTypeTombstone = {
/* db->pdict, keys are sds strings, vals are Redis objects. */
dictType dbTombstoneDictType = {
dictSdsHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
dictSdsKeyCompare, /* key compare */
dictDbKeyDestructor, /* key destructor */
NULL /* val destructor */
dictDbKeyDestructor, /* key destructor */
NULL /* val destructor */
};
dictType dbSnapshotDictType = {
@ -1539,8 +1539,9 @@ void tryResizeHashTables(int dbid) {
* is returned. */
int redisDbPersistentData::incrementallyRehash() {
/* Keys dictionary */
if (dictIsRehashing(m_pdict)) {
if (dictIsRehashing(m_pdict) || dictIsRehashing(m_pdictTombstone)) {
dictRehashMilliseconds(m_pdict,1);
dictRehashMilliseconds(m_pdictTombstone,1);
return 1; /* already used our millisecond for this loop... */
}
return 0;
@ -2219,11 +2220,22 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
CONFIG_BGSAVE_RETRY_DELAY ||
g_pserver->lastbgsave_status == C_OK))
{
serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
sp->changes, (int)sp->seconds);
rdbSaveInfo rsi, *rsiptr;
rsiptr = rdbPopulateSaveInfo(&rsi);
rdbSaveBackground(rsiptr);
// Ensure rehashing is complete
bool fRehashInProgress = false;
if (g_pserver->activerehashing) {
for (int idb = 0; idb < cserver.dbnum && !fRehashInProgress; ++idb) {
if (g_pserver->db[idb]->FRehashing())
fRehashInProgress = true;
}
}
if (!fRehashInProgress) {
serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
sp->changes, (int)sp->seconds);
rdbSaveInfo rsi, *rsiptr;
rsiptr = rdbPopulateSaveInfo(&rsi);
rdbSaveBackground(rsiptr);
}
break;
}
}
@ -2312,14 +2324,16 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
}
}
bool fAnySnapshots = false;
for (int idb = 0; idb < cserver.dbnum && !fAnySnapshots; ++idb)
fAnySnapshots = fAnySnapshots || g_pserver->db[0]->FSnapshot();
if (fAnySnapshots)
{
g_pserver->asyncworkqueue->AddWorkFunction([]{
g_pserver->db[0]->consolidate_snapshot();
}, true /*HiPri*/);
run_with_period(100) {
bool fAnySnapshots = false;
for (int idb = 0; idb < cserver.dbnum && !fAnySnapshots; ++idb)
fAnySnapshots = fAnySnapshots || g_pserver->db[0]->FSnapshot();
if (fAnySnapshots)
{
g_pserver->asyncworkqueue->AddWorkFunction([]{
g_pserver->db[0]->consolidate_snapshot();
}, true /*HiPri*/);
}
}
/* Fire the cron loop modules event. */
@ -2477,17 +2491,17 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
latencyAddSampleIfNeeded("storage-commit", commit_latency);
handleClientsWithPendingWrites(iel, aof_state);
if (serverTL->gcEpoch != 0)
if (!serverTL->gcEpoch.isReset())
g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch, true /*fNoFree*/);
serverTL->gcEpoch = 0;
serverTL->gcEpoch.reset();
aeAcquireLock();
/* Close clients that need to be closed asynchronous */
freeClientsInAsyncFreeQueue(iel);
if (serverTL->gcEpoch != 0)
if (!serverTL->gcEpoch.isReset())
g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch, true /*fNoFree*/);
serverTL->gcEpoch = 0;
serverTL->gcEpoch.reset();
/* Before we are going to sleep, let the threads access the dataset by
* releasing the GIL. Redis main thread will not touch anything at this
@ -2503,7 +2517,7 @@ void afterSleep(struct aeEventLoop *eventLoop) {
UNUSED(eventLoop);
if (moduleCount()) moduleAcquireGIL(TRUE /*fServerThread*/);
serverAssert(serverTL->gcEpoch == 0);
serverAssert(serverTL->gcEpoch.isReset());
serverTL->gcEpoch = g_pserver->garbageCollector.startEpoch();
aeAcquireLock();
for (int idb = 0; idb < cserver.dbnum; ++idb)
@ -5159,12 +5173,20 @@ sds genRedisInfoString(const char *section) {
}
if (allsections || defsections || !strcasecmp(section,"keydb")) {
// Compute the MVCC depth
int mvcc_depth = 0;
for (int idb = 0; idb < cserver.dbnum; ++idb) {
mvcc_depth = std::max(mvcc_depth, g_pserver->db[idb]->snapshot_depth());
}
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info,
"# KeyDB\r\n"
"variant:pro\r\n"
"license_status:%s\r\n",
cserver.license_key ? "OK" : "Trial"
"license_status:%s\r\n"
"mvcc_depth:%d\r\n",
cserver.license_key ? "OK" : "Trial",
mvcc_depth
);
}

View File

@ -877,7 +877,9 @@ typedef struct redisObject {
private:
mutable std::atomic<unsigned> refcount {0};
public:
#ifdef ENABLE_MVCC
uint64_t mvcc_tstamp;
#endif
void *m_ptr;
inline bool FExpires() const { return refcount.load(std::memory_order_relaxed) >> 31; }
@ -888,7 +890,11 @@ public:
void addref() const { refcount.fetch_add(1, std::memory_order_relaxed); }
unsigned release() const { return refcount.fetch_sub(1, std::memory_order_seq_cst) & ~(1U << 31); }
} robj;
#ifdef ENABLE_MVCC
static_assert(sizeof(redisObject) == 24, "object size is critical, don't increase");
#else
static_assert(sizeof(redisObject) == 16, "object size is critical, don't increase");
#endif
__attribute__((always_inline)) inline const void *ptrFromObj(robj_roptr &o)
{
@ -1320,6 +1326,9 @@ public:
void setExpire(robj *key, robj *subkey, long long when);
void setExpire(expireEntry &&e);
void initialize();
void prepOverwriteForSnapshot(char *key);
bool FRehashing() const { return dictIsRehashing(m_pdict) || dictIsRehashing(m_pdictTombstone); }
void setStorageProvider(StorageCache *pstorage);
@ -1411,14 +1420,16 @@ private:
class redisDbPersistentDataSnapshot : protected redisDbPersistentData
{
friend class redisDbPersistentData;
private:
bool iterate_threadsafe_core(std::function<bool(const char*, robj_roptr o)> &fn, bool fKeyOnly, bool fCacheOnly, bool fTop) const;
protected:
bool m_fConsolidated = false;
static void gcDisposeSnapshot(redisDbPersistentDataSnapshot *psnapshot);
int snapshot_depth() const;
void consolidate_children(redisDbPersistentData *pdbPrimary, bool fForce);
void freeTombstoneObjects(int depth);
bool freeTombstoneObjects(int depth);
public:
int snapshot_depth() const;
bool FWillFreeChildDebug() const { return m_spdbSnapshotHOLDER != nullptr; }
bool iterate_threadsafe(std::function<bool(const char*, robj_roptr o)> fn, bool fKeyOnly = false, bool fCacheOnly = false) const;
@ -1521,6 +1532,8 @@ struct redisDb : public redisDbPersistentDataSnapshot
using redisDbPersistentData::dictUnsafeKeyOnly;
using redisDbPersistentData::resortExpire;
using redisDbPersistentData::prefetchKeysAsync;
using redisDbPersistentData::prepOverwriteForSnapshot;
using redisDbPersistentData::FRehashing;
public:
expireset::setiter expireitr;
@ -1949,6 +1962,58 @@ struct clusterState;
#define MAX_EVENT_LOOPS 16
#define IDX_EVENT_LOOP_MAIN 0
class GarbageCollectorCollection
{
GarbageCollector<redisDbPersistentDataSnapshot> garbageCollectorSnapshot;
GarbageCollector<ICollectable> garbageCollectorGeneric;
public:
struct Epoch
{
uint64_t epochSnapshot = 0;
uint64_t epochGeneric = 0;
void reset() {
epochSnapshot = 0;
epochGeneric = 0;
}
bool isReset() const {
return epochSnapshot == 0 && epochGeneric == 0;
}
};
Epoch startEpoch()
{
Epoch e;
e.epochSnapshot = garbageCollectorSnapshot.startEpoch();
e.epochGeneric = garbageCollectorGeneric.startEpoch();
return e;
}
void endEpoch(Epoch e, bool fNoFree = false)
{
garbageCollectorSnapshot.endEpoch(e.epochSnapshot, fNoFree);
garbageCollectorGeneric.endEpoch(e.epochGeneric, fNoFree);
}
void shutdown()
{
garbageCollectorSnapshot.shutdown();
garbageCollectorGeneric.shutdown();
}
void enqueue(Epoch e, std::unique_ptr<redisDbPersistentDataSnapshot> &&sp)
{
garbageCollectorSnapshot.enqueue(e.epochSnapshot, std::move(sp));
}
void enqueue(Epoch e, std::unique_ptr<ICollectable> &&sp)
{
garbageCollectorGeneric.enqueue(e.epochGeneric, std::move(sp));
}
};
// Per-thread variabels that may be accessed without a lock
struct redisServerThreadVars {
aeEventLoop *el;
@ -1970,7 +2035,7 @@ struct redisServerThreadVars {
struct fastlock lockPendingWrite { "thread pending write" };
char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
long unsigned commandsExecuted = 0;
uint64_t gcEpoch = 0;
GarbageCollectorCollection::Epoch gcEpoch;
const redisDbPersistentDataSnapshot **rgdbSnapshot = nullptr;
bool fRetrySetAofEvent = false;
@ -2424,7 +2489,7 @@ struct redisServer {
/* System hardware info */
size_t system_memory_size; /* Total memory in system as reported by OS */
GarbageCollector<redisDbPersistentDataSnapshot> garbageCollector;
GarbageCollectorCollection garbageCollector;
IStorageFactory *m_pstorageFactory = nullptr;
int storage_flush_period; // The time between flushes in the CRON job
@ -2553,7 +2618,7 @@ extern dictType zsetDictType;
extern dictType clusterNodesDictType;
extern dictType clusterNodesBlackListDictType;
extern dictType dbDictType;
extern dictType dbDictTypeTombstone;
extern dictType dbTombstoneDictType;
extern dictType dbSnapshotDictType;
extern dictType shaScriptObjectDictType;
extern double R_Zero, R_PosInf, R_NegInf, R_Nan;

View File

@ -2,6 +2,29 @@
#include "aelocker.h"
static const size_t c_elementsSmallLimit = 500000;
static fastlock s_lock {"consolidate_children"}; // this lock ensures only one thread is consolidating at a time
class LazyFree : public ICollectable
{
public:
virtual ~LazyFree()
{
for (auto *de : vecde)
{
dbDictType.keyDestructor(nullptr, dictGetKey(de));
dbDictType.valDestructor(nullptr, dictGetVal(de));
zfree(de);
}
for (robj *o : vecobjLazyFree)
decrRefCount(o);
for (dict *d : vecdictLazyFree)
dictRelease(d);
}
std::vector<dict*> vecdictLazyFree;
std::vector<robj*> vecobjLazyFree;
std::vector<dictEntry*> vecde;
};
const redisDbPersistentDataSnapshot *redisDbPersistentData::createSnapshot(uint64_t mvccCheckpoint, bool fOptional)
{
@ -70,6 +93,7 @@ const redisDbPersistentDataSnapshot *redisDbPersistentData::createSnapshot(uint6
auto spdb = std::unique_ptr<redisDbPersistentDataSnapshot>(new (MALLOC_LOCAL) redisDbPersistentDataSnapshot());
dictRehashMilliseconds(m_pdict, 50); // Give us the best chance at a fast cleanup
spdb->m_fAllChanged = false;
spdb->m_fTrackingChanges = 0;
spdb->m_pdict = m_pdict;
@ -90,8 +114,13 @@ const redisDbPersistentDataSnapshot *redisDbPersistentData::createSnapshot(uint6
spdb->m_setexpire->pause_rehash(); // needs to be const
}
if (dictIsRehashing(spdb->m_pdict) || dictIsRehashing(spdb->m_pdictTombstone)) {
serverLog(LL_NOTICE, "NOTICE: Suboptimal snapshot");
}
m_pdict = dictCreate(&dbDictType,this);
m_pdictTombstone = dictCreate(&dbDictTypeTombstone, this);
dictExpand(m_pdict, 1024); // minimize rehash overhead
m_pdictTombstone = dictCreate(&dbTombstoneDictType, this);
serverAssert(spdb->m_pdict->iterators == 1);
@ -183,7 +212,18 @@ void redisDbPersistentData::restoreSnapshot(const redisDbPersistentDataSnapshot
void redisDbPersistentData::endSnapshotAsync(const redisDbPersistentDataSnapshot *psnapshot)
{
mstime_t latency;
aeAcquireLock(); latencyStartMonitor(latency);
aeAcquireLock();
while (dictIsRehashing(m_pdict) || dictIsRehashing(m_pdictTombstone)) {
dictRehashMilliseconds(m_pdict, 1);
dictRehashMilliseconds(m_pdictTombstone, 1);
// Give someone else a chance
aeReleaseLock();
usleep(300);
aeAcquireLock();
}
latencyStartMonitor(latency);
if (m_pdbSnapshotASYNC && m_pdbSnapshotASYNC->m_mvccCheckpoint <= psnapshot->m_mvccCheckpoint)
{
// Free a stale async snapshot so consolidate_children can clean it up later
@ -209,11 +249,22 @@ void redisDbPersistentData::endSnapshotAsync(const redisDbPersistentDataSnapshot
auto psnapshotT = createSnapshot(LLONG_MAX, false);
endSnapshot(psnapshot); // this will just dec the ref count since our new snapshot has a ref
psnapshot = nullptr;
aeReleaseLock(); latencyEndMonitor(latency);
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("end-snapshot-async-phase-1", latency);
aeReleaseLock();
// do the expensive work of merging snapshots outside the ref
const_cast<redisDbPersistentDataSnapshot*>(psnapshotT)->freeTombstoneObjects(1); // depth is one because we just creted it
if (const_cast<redisDbPersistentDataSnapshot*>(psnapshotT)->freeTombstoneObjects(1)) // depth is one because we just creted it
{
aeAcquireLock();
if (m_pdbSnapshotASYNC != nullptr)
endSnapshot(m_pdbSnapshotASYNC);
m_pdbSnapshotASYNC = nullptr;
endSnapshot(psnapshotT);
aeReleaseLock();
return;
}
const_cast<redisDbPersistentDataSnapshot*>(psnapshotT)->consolidate_children(this, true);
// Final Cleanup
@ -222,33 +273,80 @@ void redisDbPersistentData::endSnapshotAsync(const redisDbPersistentDataSnapshot
m_pdbSnapshotASYNC = psnapshotT;
else
endSnapshot(psnapshotT); // finally clean up our temp snapshot
aeReleaseLock(); latencyEndMonitor(latency);
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("end-snapshot-async-phase-2", latency);
aeReleaseLock();
}
void redisDbPersistentDataSnapshot::freeTombstoneObjects(int depth)
bool redisDbPersistentDataSnapshot::freeTombstoneObjects(int depth)
{
if (m_pdbSnapshot == nullptr)
return;
{
serverAssert(dictSize(m_pdictTombstone) == 0);
return true;
}
const_cast<redisDbPersistentDataSnapshot*>(m_pdbSnapshot)->freeTombstoneObjects(depth+1);
if (!const_cast<redisDbPersistentDataSnapshot*>(m_pdbSnapshot)->freeTombstoneObjects(depth+1))
return false;
{
AeLocker ae;
ae.arm(nullptr);
if (m_pdbSnapshot->m_refCount != depth && (m_pdbSnapshot->m_refCount != (m_refCount+1)))
return;
return false;
ae.disarm();
}
std::unique_lock<fastlock> lock(s_lock, std::defer_lock);
if (!lock.try_lock())
return false; // this is a best effort function
std::unique_ptr<LazyFree> splazy = std::make_unique<LazyFree>();
dict *dictTombstoneNew = dictCreate(&dbTombstoneDictType, nullptr);
dictIterator *di = dictGetIterator(m_pdictTombstone);
dictEntry *de;
std::vector<dictEntry*> vecdeFree;
vecdeFree.reserve(dictSize(m_pdictTombstone));
unsigned rgcremoved[2] = {0};
while ((de = dictNext(di)) != nullptr)
{
dictEntry *deObj = dictFind(m_pdbSnapshot->m_pdict, dictGetKey(de));
if (deObj != nullptr && dictGetVal(deObj) != nullptr)
dictEntry **dePrev = nullptr;
dictht *ht = nullptr;
sds key = (sds)dictGetKey(de);
// BUG BUG: Why can't we do a shallow search here?
dictEntry *deObj = dictFindWithPrev(m_pdbSnapshot->m_pdict, key, (uint64_t)dictGetVal(de), &dePrev, &ht, false);
if (deObj != nullptr)
{
decrRefCount((robj*)dictGetVal(deObj));
void *ptrSet = nullptr;
__atomic_store(&deObj->v.val, &ptrSet, __ATOMIC_RELAXED);
// Now unlink the DE
__atomic_store(dePrev, &deObj->next, __ATOMIC_RELEASE);
if (ht == &m_pdbSnapshot->m_pdict->ht[0])
rgcremoved[0]++;
else
rgcremoved[1]++;
splazy->vecde.push_back(deObj);
} else {
serverAssert(dictFind(m_pdbSnapshot->m_pdict, key) == nullptr);
serverAssert(m_pdbSnapshot->find_cached_threadsafe(key) != nullptr);
dictAdd(dictTombstoneNew, sdsdupshared((sds)dictGetKey(de)), dictGetVal(de));
}
}
dictReleaseIterator(di);
dictForceRehash(dictTombstoneNew);
aeAcquireLock();
dict *dT = m_pdbSnapshot->m_pdict;
splazy->vecdictLazyFree.push_back(m_pdictTombstone);
__atomic_store(&m_pdictTombstone, &dictTombstoneNew, __ATOMIC_RELEASE);
__atomic_fetch_sub(&dT->ht[0].used, rgcremoved[0], __ATOMIC_RELEASE);
__atomic_fetch_sub(&dT->ht[1].used, rgcremoved[1], __ATOMIC_RELEASE);
serverLog(LL_WARNING, "tombstones removed: %u, remain: %lu", rgcremoved[0]+rgcremoved[1], dictSize(m_pdictTombstone));
g_pserver->garbageCollector.enqueue(serverTL->gcEpoch, std::move(splazy));
aeReleaseLock();
return true;
}
void redisDbPersistentData::endSnapshot(const redisDbPersistentDataSnapshot *psnapshot)
@ -299,15 +397,20 @@ void redisDbPersistentData::endSnapshot(const redisDbPersistentDataSnapshot *psn
// Stage 1 Loop through all the tracked deletes and remove them from the snapshot DB
dictIterator *di = dictGetIterator(m_pdictTombstone);
dictEntry *de;
m_spdbSnapshotHOLDER->m_pdict->iterators++;
auto splazy = std::make_unique<LazyFree>();
while ((de = dictNext(di)) != NULL)
{
dictEntry **dePrev;
dictht *ht;
dictEntry *deSnapshot = dictFindWithPrev(m_spdbSnapshotHOLDER->m_pdict, dictGetKey(de), &dePrev, &ht);
// BUG BUG Why not a shallow search?
dictEntry *deSnapshot = dictFindWithPrev(m_spdbSnapshotHOLDER->m_pdict, dictGetKey(de), (uint64_t)dictGetVal(de), &dePrev, &ht, false /*!!sdsisshared((sds)dictGetKey(de))*/);
if (deSnapshot == nullptr && m_spdbSnapshotHOLDER->m_pdbSnapshot)
{
// The tombstone is for a grand child, propogate it (or possibly in the storage provider - but an extra tombstone won't hurt)
#ifdef CHECKED_BUILD
serverAssert(m_spdbSnapshotHOLDER->m_pdbSnapshot->find_cached_threadsafe((const char*)dictGetKey(de)) != nullptr);
#endif
dictAdd(m_spdbSnapshotHOLDER->m_pdictTombstone, sdsdupshared((sds)dictGetKey(de)), nullptr);
continue;
}
@ -318,15 +421,16 @@ void redisDbPersistentData::endSnapshot(const redisDbPersistentDataSnapshot *psn
}
// Delete the object from the source dict, we don't use dictDelete to avoid a second search
dictFreeKey(m_spdbSnapshotHOLDER->m_pdict, deSnapshot);
dictFreeVal(m_spdbSnapshotHOLDER->m_pdict, deSnapshot);
serverAssert(*dePrev == deSnapshot);
splazy->vecde.push_back(deSnapshot);
*dePrev = deSnapshot->next;
zfree(deSnapshot);
ht->used--;
}
m_spdbSnapshotHOLDER->m_pdict->iterators--;
dictReleaseIterator(di);
dictEmpty(m_pdictTombstone, nullptr);
splazy->vecdictLazyFree.push_back(m_pdictTombstone);
m_pdictTombstone = dictCreate(&dbTombstoneDictType, nullptr);
// Stage 2 Move all new keys to the snapshot DB
dictMerge(m_spdbSnapshotHOLDER->m_pdict, m_pdict);
@ -355,8 +459,10 @@ void redisDbPersistentData::endSnapshot(const redisDbPersistentDataSnapshot *psn
auto spsnapshotFree = std::move(m_spdbSnapshotHOLDER);
m_spdbSnapshotHOLDER = std::move(spsnapshotFree->m_spdbSnapshotHOLDER);
if (serverTL != nullptr)
if (serverTL != nullptr) {
g_pserver->garbageCollector.enqueue(serverTL->gcEpoch, std::move(spsnapshotFree));
g_pserver->garbageCollector.enqueue(serverTL->gcEpoch, std::move(splazy));
}
// Sanity Checks
serverAssert(m_spdbSnapshotHOLDER != nullptr || m_pdbSnapshot == nullptr);
@ -393,8 +499,10 @@ dict_iter redisDbPersistentDataSnapshot::random_cache_threadsafe(bool fPrimaryOn
dict_iter redisDbPersistentData::find_cached_threadsafe(const char *key) const
{
dict *dictTombstone;
__atomic_load(&m_pdictTombstone, &dictTombstone, __ATOMIC_ACQUIRE);
dictEntry *de = dictFind(m_pdict, key);
if (de == nullptr && m_pdbSnapshot != nullptr && dictFind(m_pdictTombstone, key) == nullptr)
if (de == nullptr && m_pdbSnapshot != nullptr && dictFind(dictTombstone, key) == nullptr)
{
auto itr = m_pdbSnapshot->find_cached_threadsafe(key);
if (itr != nullptr)
@ -460,11 +568,20 @@ unsigned long redisDbPersistentDataSnapshot::scan_threadsafe(unsigned long itera
}
bool redisDbPersistentDataSnapshot::iterate_threadsafe(std::function<bool(const char*, robj_roptr o)> fn, bool fKeyOnly, bool fCacheOnly) const
{
return iterate_threadsafe_core(fn, fKeyOnly, fCacheOnly, true);
}
bool redisDbPersistentDataSnapshot::iterate_threadsafe_core(std::function<bool(const char*, robj_roptr o)> &fn, bool fKeyOnly, bool fCacheOnly, bool fFirst) const
{
// Take the size so we can ensure we visited every element exactly once
// use volatile to ensure it's not checked too late. This makes it more
// likely we'll detect races (but it won't gurantee it)
aeAcquireLock();
dict *dictTombstone;
__atomic_load(&m_pdictTombstone, &dictTombstone, __ATOMIC_ACQUIRE);
volatile ssize_t celem = (ssize_t)size();
aeReleaseLock();
dictEntry *de = nullptr;
bool fResult = true;
@ -510,19 +627,22 @@ bool redisDbPersistentDataSnapshot::iterate_threadsafe(std::function<bool(const
__atomic_load(&m_pdbSnapshot, &psnapshot, __ATOMIC_ACQUIRE);
if (fResult && psnapshot != nullptr)
{
fResult = psnapshot->iterate_threadsafe([this, &fn, &celem](const char *key, robj_roptr o) {
dictEntry *deTombstone = dictFind(m_pdictTombstone, key);
std::function<bool(const char*, robj_roptr o)> fnNew = [this, &fn, &celem, dictTombstone](const char *key, robj_roptr o) {
dictEntry *deTombstone = dictFind(dictTombstone, key);
if (deTombstone != nullptr)
return true;
// Alright it's a key in the use keyspace, lets ensure it and then pass it off
--celem;
return fn(key, o);
}, fKeyOnly, fCacheOnly);
};
fResult = psnapshot->iterate_threadsafe_core(fnNew, fKeyOnly, fCacheOnly, false);
}
// we should have hit all keys or had a good reason not to
serverAssert(!fResult || celem == 0 || (m_spstorage && fCacheOnly));
if (!(!fResult || celem == 0 || (m_spstorage && fCacheOnly)))
serverLog(LL_WARNING, "celem: %ld", celem);
serverAssert(!fResult || celem == 0 || (m_spstorage && fCacheOnly) || !fFirst);
return fResult;
}
@ -538,11 +658,12 @@ void redisDbPersistentData::consolidate_snapshot()
{
aeAcquireLock();
auto psnapshot = (m_pdbSnapshot != nullptr) ? m_spdbSnapshotHOLDER.get() : nullptr;
if (psnapshot == nullptr)
if (psnapshot == nullptr || psnapshot->snapshot_depth() == 0)
{
aeReleaseLock();
return;
}
psnapshot->m_refCount++; // ensure it's not free'd
aeReleaseLock();
psnapshot->consolidate_children(this, false /* fForce */);
@ -554,8 +675,6 @@ void redisDbPersistentData::consolidate_snapshot()
// only call this on the "real" database to consolidate the first child
void redisDbPersistentDataSnapshot::consolidate_children(redisDbPersistentData *pdbPrimary, bool fForce)
{
static fastlock s_lock {"consolidate_children"}; // this lock ensures only one thread is consolidating at a time
std::unique_lock<fastlock> lock(s_lock, std::defer_lock);
if (!lock.try_lock())
return; // this is a best effort function
@ -615,7 +734,6 @@ void redisDbPersistentDataSnapshot::consolidate_children(redisDbPersistentData *
serverLog(LL_VERBOSE, "cleaned %d snapshots", snapshot_depth()-1);
spdb->m_refCount = depth;
spdb->m_fConsolidated = true;
// Drop our refs from this snapshot and its children
psnapshotT = this;
std::vector<redisDbPersistentDataSnapshot*> vecT;