Merge pull request #60 from Snapchat/merge_os_apr_13

Merge in changes from Open Source. For the most part, this encompasses two changes: - Adding partial sync capabilities to multi-master - Adding ReadWrite locks to prevent global lock issues when forking threads, specifically with regards to the time thread.
2022-05-02 13:26:21 -04:00 · 2022-05-02 13:26:21 -04:00 · 3724cf97d9
commit 3724cf97d9
parent 807c443199 cd715f6c52
24 changed files with 442 additions and 212 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -34,3 +34,26 @@ jobs:
    - name: rotation test
      run: |
          ./runtest-rotation
+        
+  build-ubuntu-old:
+    runs-on: ubuntu-18.04
+    steps:
+    - uses: actions/checkout@v1
+    - name: make -j2
+      run: | 
+        sudo apt-get update
+        sudo apt-get -y install uuid-dev libcurl4-openssl-dev
+        make -j2
+
+  build-macos-latest:
+    runs-on: macos-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: make
+      run: make KEYDB_CFLAGS='-Werror' KEYDB_CXXFLAGS='-Werror' -j2
+
+  build-libc-malloc:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: make
--- a/README.md
+++ b/README.md
@ -33,19 +33,13 @@ Because of this difference of opinion features which are right for KeyDB may not
 Additional Resources
 --------------------

-Try our docker container: https://hub.docker.com/r/eqalpha/keydb
+Check out KeyDB's [Docker Image](https://hub.docker.com/r/eqalpha/keydb)

-Talk on Gitter: https://gitter.im/KeyDB
+Join us on [Slack](https://docs.keydb.dev/slack/)

-Visit our Website: https://keydb.dev
+Post to the [Community Forum](https://community.keydb.dev)

-See options for channel partners and support contracts: https://keydb.dev/support.html
-
-Learn with KeyDB’s official documentation site: https://docs.keydb.dev
-
-[Subscribe to the KeyDB mailing list](https://eqalpha.us20.list-manage.com/subscribe/post?u=978f486c2f95589b24591a9cc&id=4ab9220500)
-
-Management GUI: We recommend [FastoNoSQL](https://fastonosql.com/) which has official KeyDB support.
+Learn more through KeyDB's [Documentation & Learning Center](https://docs.keydb.dev)


 Benchmarking KeyDB
--- a/src/AsyncWorkQueue.cpp
+++ b/src/AsyncWorkQueue.cpp
@ -28,6 +28,7 @@ void AsyncWorkQueue::WorkerThreadMain()
        if (m_workqueue.empty())
            m_cvWakeup.wait(lock);

+        aeThreadOnline();
        while (!m_workqueue.empty())
        {
            WorkItem task = std::move(m_workqueue.front());
@ -49,6 +50,7 @@ void AsyncWorkQueue::WorkerThreadMain()
        }
        g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch);
        serverTL->gcEpoch.reset();
+        aeThreadOffline();
    }

    listRelease(vars.clients_pending_asyncwrite);
--- a/src/ae.cpp
+++ b/src/ae.cpp
@ -53,6 +53,7 @@
 #include "zmalloc.h"
 #include "config.h"
 #include "serverassert.h"
+#include "readwritelock.h"

 #ifdef USE_MUTEX
 thread_local int cOwnLock = 0;
@ -87,6 +88,7 @@ mutex_wrapper g_lock;
 #else
 fastlock g_lock("AE (global)");
 #endif
+readWriteLock g_forkLock("Fork (global)");
 thread_local aeEventLoop *g_eventLoopThisThread = NULL;

 /* Include the best multiplexing layer supported by this system.
@ -154,16 +156,22 @@ void aeProcessCmd(aeEventLoop *eventLoop, int fd, void *, int )

        case AE_ASYNC_OP::PostFunction:
            {
-            if (cmd.fLock && !ulock.owns_lock())
+            if (cmd.fLock && !ulock.owns_lock()) {
+                g_forkLock.releaseRead();
                ulock.lock();
+                g_forkLock.acquireRead();
+            }
            ((aePostFunctionProc*)cmd.proc)(cmd.clientData);
            break;
            }

        case AE_ASYNC_OP::PostCppFunction:
        {
-            if (cmd.fLock && !ulock.owns_lock())
+            if (cmd.fLock && !ulock.owns_lock()) {
+                g_forkLock.releaseRead();
                ulock.lock();
+                g_forkLock.acquireRead();
+            }
            (*cmd.pfn)();

            delete cmd.pfn;
@ -547,7 +555,11 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
            if (te->next)
                te->next->prev = te->prev;
            if (te->finalizerProc) {
-                if (!ulock.owns_lock()) ulock.lock();
+                if (!ulock.owns_lock()) {
+                    g_forkLock.releaseRead();
+                    ulock.lock();
+                    g_forkLock.acquireRead();
+                }
                te->finalizerProc(eventLoop, te->clientData);
                now = getMonotonicUs();
            }
@ -567,7 +579,11 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
        }

        if (te->when <= now) {
-            if (!ulock.owns_lock()) ulock.lock();
+            if (!ulock.owns_lock()) {
+                g_forkLock.releaseRead();
+                ulock.lock();
+                g_forkLock.acquireRead();
+            }
            int retval;

            id = te->id;
@ -591,8 +607,11 @@ extern "C" void ProcessEventCore(aeEventLoop *eventLoop, aeFileEvent *fe, int ma
 {
 #define LOCK_IF_NECESSARY(fe, tsmask) \
    std::unique_lock<decltype(g_lock)> ulock(g_lock, std::defer_lock); \
-    if (!(fe->mask & tsmask)) \
-        ulock.lock()
+    if (!(fe->mask & tsmask)) { \
+        g_forkLock.releaseRead(); \
+        ulock.lock(); \
+        g_forkLock.acquireRead(); \
+    }

    int fired = 0; /* Number of events fired for current fd. */

@ -704,8 +723,11 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)

        if (eventLoop->beforesleep != NULL && flags & AE_CALL_BEFORE_SLEEP) {
            std::unique_lock<decltype(g_lock)> ulock(g_lock, std::defer_lock);
-            if (!(eventLoop->beforesleepFlags & AE_SLEEP_THREADSAFE))
+            if (!(eventLoop->beforesleepFlags & AE_SLEEP_THREADSAFE)) {
+                g_forkLock.releaseRead();
                ulock.lock();
+                g_forkLock.acquireRead();
+            }
            eventLoop->beforesleep(eventLoop);
        }

@ -716,8 +738,11 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
        /* After sleep callback. */
        if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP) {
            std::unique_lock<decltype(g_lock)> ulock(g_lock, std::defer_lock);
-            if (!(eventLoop->aftersleepFlags & AE_SLEEP_THREADSAFE))
+            if (!(eventLoop->aftersleepFlags & AE_SLEEP_THREADSAFE)) {
+                g_forkLock.releaseRead();
                ulock.lock();
+                g_forkLock.acquireRead();
+            }
            eventLoop->aftersleep(eventLoop);
        }

@ -792,9 +817,21 @@ void setAeLockSetThreadSpinWorker(spin_worker worker)
    tl_worker = worker;
 }

+void aeThreadOnline()
+{
+    g_forkLock.acquireRead();
+}
+
 void aeAcquireLock()
 {
+    g_forkLock.releaseRead();
    g_lock.lock(tl_worker);
+    g_forkLock.acquireRead();
+}
+
+void aeAcquireForkLock()
+{
+    g_forkLock.upgradeWrite();
 }

 int aeTryAcquireLock(int fWeak)
@ -802,6 +839,11 @@ int aeTryAcquireLock(int fWeak)
    return g_lock.try_lock(!!fWeak);
 }

+void aeThreadOffline()
+{
+    g_forkLock.releaseRead();
+}
+
 void aeReleaseLock()
 {
    g_lock.unlock();
@ -812,6 +854,11 @@ void aeSetThreadOwnsLockOverride(int fOverride)
    fOwnLockOverride = fOverride;
 }

+void aeReleaseForkLock()
+{
+    g_forkLock.downgradeWrite();
+}
+
 int aeThreadOwnsLock()
 {
    return fOwnLockOverride || g_lock.fOwnLock();
--- a/src/ae.h
+++ b/src/ae.h
@ -164,9 +164,13 @@ void aeSetDontWait(aeEventLoop *eventLoop, int noWait);
 void aeClosePipesForForkChild(aeEventLoop *eventLoop);

 void setAeLockSetThreadSpinWorker(spin_worker worker);
+void aeThreadOnline();
 void aeAcquireLock();
+void aeAcquireForkLock();
 int aeTryAcquireLock(int fWeak);
+void aeThreadOffline();
 void aeReleaseLock();
+void aeReleaseForkLock();
 int aeThreadOwnsLock();
 void aeSetThreadOwnsLockOverride(int fOverride);
 int aeLockContested(int threshold);
--- a/src/aof.cpp
+++ b/src/aof.cpp
@ -884,7 +884,7 @@ int loadAppendOnlyFile(char *filename) {
    } else {
        /* RDB preamble. Pass loading the RDB functions. */
        rio rdb;
-        rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+        rdbSaveInfo rsi;

        serverLog(LL_NOTICE,"Reading RDB preamble from AOF file...");
        if (fseek(fp,0,SEEK_SET) == -1) goto readerr;
--- a/src/module.cpp
+++ b/src/module.cpp
@ -365,7 +365,7 @@ typedef struct RedisModuleCommandFilter {
 static list *moduleCommandFilters;

 /* Module GIL Variables */
-static readWriteLock s_moduleGIL;
+static readWriteLock s_moduleGIL("Module GIL");
 thread_local bool g_fModuleThread = false;

 typedef void (*RedisModuleForkDoneHandler) (int exitcode, int bysignal, void *user_data);
--- a/src/networking.cpp
+++ b/src/networking.cpp
@ -2002,18 +2002,6 @@ void ProcessPendingAsyncWrites()
        }

        c->fPendingAsyncWrite = FALSE;
-        // Now install the write event handler
-        int ae_flags = AE_WRITABLE|AE_WRITE_THREADSAFE;
-        /* For the fsync=always policy, we want that a given FD is never
-            * served for reading and writing in the same event loop iteration,
-            * so that in the middle of receiving the query, and serving it
-            * to the client, we'll call beforeSleep() that will do the
-            * actual fsync of AOF to disk. AE_BARRIER ensures that. */
-        if (g_pserver->aof_state == AOF_ON &&
-            g_pserver->aof_fsync == AOF_FSYNC_ALWAYS)
-        {
-            ae_flags |= AE_BARRIER;
-        }

        if (!((c->replstate == REPL_STATE_NONE || c->replstate == SLAVE_STATE_FASTSYNC_TX ||
         (c->replstate == SLAVE_STATE_ONLINE && !c->repl_put_online_on_ack))))
--- a/src/object.cpp
+++ b/src/object.cpp
@ -138,7 +138,7 @@ robj *createEmbeddedStringObject(const char *ptr, size_t len) {
 *
 * The current limit of 52 is chosen so that the biggest string object
 * we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
-#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 52
+size_t OBJ_ENCODING_EMBSTR_SIZE_LIMIT = 52;

 robj *createStringObject(const char *ptr, size_t len) {
    if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
--- a/src/pqsort.c
+++ b/src/pqsort.c
@ -41,6 +41,7 @@

 #include <errno.h>
 #include <stdlib.h>
+#include <stdint.h>

 static inline char	*med3 (char *, char *, char *,
    int (*)(const void *, const void *));
@ -62,7 +63,7 @@ static inline void	 swapfunc (char *, char *, size_t, int);
        } while (--i > 0);				\
 }

-#define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \
+#define SWAPINIT(a, es) swaptype = (uintptr_t)a % sizeof(long) || \
 	es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1;

 static inline void
--- a/src/rdb.cpp
+++ b/src/rdb.cpp
@ -1218,6 +1218,40 @@ int rdbSaveInfoAuxFields(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
            == -1) return -1;
        if (rdbSaveAuxFieldStrInt(rdb,"repl-offset",rsi->master_repl_offset)
            == -1) return -1;
+        if (g_pserver->fActiveReplica && listLength(g_pserver->masters) > 0) {
+            sdsstring val = sdsstring(sdsempty());
+            listNode *ln;
+            listIter li;
+            redisMaster* mi;
+            listRewind(g_pserver->masters,&li);
+            while ((ln = listNext(&li)) != NULL) {
+                mi = (redisMaster*)listNodeValue(ln);
+                if (!mi->master) {
+                    // If master client is not available, use info from master struct - better than nothing
+                    serverLog(LL_NOTICE, "saving master %s", mi->master_replid);
+                    if (mi->master_replid[0] == 0) {
+                        // if replid is null, there's no reason to save it
+                        continue;
+                    }
+                    val = val.catfmt("%s:%I:%s:%i;", mi->master_replid,
+                        mi->master_initial_offset,
+                        mi->masterhost,
+                        mi->masterport);
+                }
+                else {
+                    serverLog(LL_NOTICE, "saving master %s", mi->master->replid);
+                    if (mi->master->replid[0] == 0) {
+                        // if replid is null, there's no reason to save it
+                        continue;
+                    }
+                    val = val.catfmt("%s:%I:%s:%i;", mi->master->replid,
+                        mi->master->reploff,
+                        mi->masterhost,
+                        mi->masterport);
+                }
+            }
+            if (rdbSaveAuxFieldStrStr(rdb, "repl-masters",val.get()) == -1) return -1;
+        }
    }
    if (rdbSaveAuxFieldStrInt(rdb,"aof-preamble",aof_preamble) == -1) return -1;
    return 1;
@ -1552,6 +1586,7 @@ struct rdbSaveThreadArgs

 void *rdbSaveThread(void *vargs)
 {
+    aeThreadOnline();
    serverAssert(!g_pserver->rdbThreadVars.fDone);
    rdbSaveThreadArgs *args = reinterpret_cast<rdbSaveThreadArgs*>(vargs);
    serverAssert(serverTL == nullptr);
@ -1577,7 +1612,7 @@ void *rdbSaveThread(void *vargs)
                "%s: %zd MB of memory used by copy-on-write",
                "RDB",cbDiff/(1024*1024));
    }
-
+    aeThreadOffline();
    g_pserver->rdbThreadVars.fDone = true;
    return (retval == C_OK) ? (void*)0 : (void*)1;
 }
@ -1627,10 +1662,10 @@ int launchRdbSaveThread(pthread_t &child, rdbSaveInfo *rsi)
    } else
    {
        rdbSaveThreadArgs *args = (rdbSaveThreadArgs*)zmalloc(sizeof(rdbSaveThreadArgs) + ((cserver.dbnum-1)*sizeof(redisDbPersistentDataSnapshot*)), MALLOC_LOCAL);
-        rdbSaveInfo rsiT = RDB_SAVE_INFO_INIT;
+        rdbSaveInfo rsiT;
        if (rsi == nullptr)
            rsi = &rsiT;
-        memcpy(&args->rsi, rsi, sizeof(rdbSaveInfo));
+        args->rsi = *(new (args) rdbSaveInfo(*rsi));
        memcpy(&args->rsi.repl_id, g_pserver->replid, sizeof(g_pserver->replid));
        args->rsi.master_repl_offset = g_pserver->master_repl_offset;
            
@ -2887,11 +2922,11 @@ public:
        * snapshot taken by the master may not be reflected on the replica. */
        bool fExpiredKey = iAmMaster() && !(this->rdbflags&RDBFLAGS_AOF_PREAMBLE) && job.expiretime != INVALID_EXPIRE && job.expiretime < this->now;
        if (fStaleMvccKey || fExpiredKey) {
-            if (fStaleMvccKey && !fExpiredKey && this->rsi != nullptr && this->rsi->mi != nullptr && this->rsi->mi->staleKeyMap != nullptr && lookupKeyRead(job.db, &keyobj) == nullptr) {
+            if (fStaleMvccKey && !fExpiredKey && this->rsi != nullptr && this->rsi->masters != nullptr && this->rsi->masters->staleKeyMap != nullptr && lookupKeyRead(job.db, &keyobj) == nullptr) {
                // We have a key that we've already deleted and is not back in our database.
                //  We'll need to inform the sending master of the delete if it is also a replica of us
                robj_sharedptr objKeyDup(createStringObject(job.key, sdslen(job.key)));
-                this->rsi->mi->staleKeyMap->operator[](job.db->id).push_back(objKeyDup);                
+                this->rsi->masters->staleKeyMap->operator[](job.db->id).push_back(objKeyDup);                
            }
            sdsfree(job.key);
            job.key = nullptr;
@ -3205,6 +3240,23 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
                    memcpy(rsi->repl_id,ptrFromObj(auxval),CONFIG_RUN_ID_SIZE+1);
                    rsi->repl_id_is_set = 1;
                }
+            } else if (!strcasecmp(szFromObj(auxkey),"repl-masters")) {
+                if (rsi) {
+                    struct redisMaster mi;
+                    char *masters = szFromObj(auxval);
+                    char *entry = strtok(masters, ":");
+                    while (entry != NULL) {
+                        memcpy(mi.master_replid, entry, sizeof(mi.master_replid));
+                        entry = strtok(NULL, ":");
+                        mi.master_initial_offset = atoi(entry);
+                        entry = strtok(NULL, ":");
+                        mi.masterhost = entry;
+                        entry = strtok(NULL, ";");
+                        mi.masterport = atoi(entry);
+                        entry = strtok(NULL, ":");
+                        rsi->addMaster(mi);
+                    }
+                }
            } else if (!strcasecmp(szFromObj(auxkey),"repl-offset")) {
                if (rsi) rsi->repl_offset = strtoll(szFromObj(auxval),NULL,10);
            } else if (!strcasecmp(szFromObj(auxkey),"lua")) {
@ -3654,6 +3706,7 @@ void *rdbSaveToSlavesSocketsThread(void *vargs)
    int retval;
    rio rdb;

+    aeThreadOnline();
    serverAssert(serverTL == nullptr);
    redisServerThreadVars vars;
    serverTL = &vars;
@ -3684,7 +3737,7 @@ void *rdbSaveToSlavesSocketsThread(void *vargs)
        g_pserver->db[idb]->endSnapshotAsync(args->rgpdb[idb]);

    g_pserver->garbageCollector.endEpoch(vars.gcEpoch);
-
+    aeThreadOffline();

    close(args->safe_to_exit_pipe);
    zfree(args);
@ -3719,7 +3772,7 @@ int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
    args->rdb_pipe_write = pipefds[1]; /* write end */
    anetNonBlock(NULL, g_pserver->rdb_pipe_read);

-    memcpy(&args->rsi, rsi, sizeof(rdbSaveInfo));
+    args->rsi = *(new (args) rdbSaveInfo(*rsi));
    memcpy(&args->rsi.repl_id, g_pserver->replid, sizeof(g_pserver->replid));
    args->rsi.master_repl_offset = g_pserver->master_repl_offset;

@ -3859,8 +3912,8 @@ void bgsaveCommand(client *c) {
 * is returned, and the RDB saving will not persist any replication related
 * information. */
 rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
-    rdbSaveInfo rsi_init = RDB_SAVE_INFO_INIT;
-    *rsi = rsi_init;
+    rdbSaveInfo rsi_init;
+    *rsi = std::move(rsi_init);

    memcpy(rsi->repl_id, g_pserver->replid, sizeof(g_pserver->replid));
    rsi->master_repl_offset = g_pserver->master_repl_offset;
--- a/src/readwritelock.h
+++ b/src/readwritelock.h
@ -2,22 +2,24 @@
 #include <condition_variable>

 class readWriteLock {
-    std::mutex m_readLock;
-    std::recursive_mutex m_writeLock;
-    std::condition_variable m_cv;
+    fastlock m_readLock;
+    fastlock m_writeLock;
+    std::condition_variable_any m_cv;
    int m_readCount = 0;
    int m_writeCount = 0;
    bool m_writeWaiting = false;
 public:
+    readWriteLock(const char *name) : m_readLock(name), m_writeLock(name) {}
+
    void acquireRead() {
-        std::unique_lock<std::mutex> rm(m_readLock);
+        std::unique_lock<fastlock> rm(m_readLock);
        while (m_writeCount > 0 || m_writeWaiting)
            m_cv.wait(rm);
        m_readCount++;
    }
    
    bool tryAcquireRead() {
-        std::unique_lock<std::mutex> rm(m_readLock, std::defer_lock);
+        std::unique_lock<fastlock> rm(m_readLock, std::defer_lock);
        if (!rm.try_lock())
            return false;
        if (m_writeCount > 0 || m_writeWaiting)
@ -27,7 +29,7 @@ public:
    }

    void acquireWrite(bool exclusive = true) {
-        std::unique_lock<std::mutex> rm(m_readLock);
+        std::unique_lock<fastlock> rm(m_readLock);
        m_writeWaiting = true;
        while (m_readCount > 0)
            m_cv.wait(rm);
@ -43,24 +45,12 @@ public:
    }

    void upgradeWrite(bool exclusive = true) {
-        std::unique_lock<std::mutex> rm(m_readLock);
-        m_writeWaiting = true;
-        while (m_readCount > 1)
-            m_cv.wait(rm);
-        if (exclusive) {
-            /* Another thread might have the write lock while we have the read lock
-               but won't be able to release it until they can acquire the read lock
-               so release the read lock and try again instead of waiting to avoid deadlock */
-            while(!m_writeLock.try_lock())
-                m_cv.wait(rm);
-        }
-        m_writeCount++;
-        m_readCount--;
-        m_writeWaiting = false;
+        releaseRead();
+        acquireWrite(exclusive);
    }

    bool tryAcquireWrite(bool exclusive = true) {
-        std::unique_lock<std::mutex> rm(m_readLock, std::defer_lock);
+        std::unique_lock<fastlock> rm(m_readLock, std::defer_lock);
        if (!rm.try_lock())
            return false;
        if (m_readCount > 0)
@ -73,14 +63,13 @@ public:
    }

    void releaseRead() {
-        std::unique_lock<std::mutex> rm(m_readLock);
-        serverAssert(m_readCount > 0);
+        std::unique_lock<fastlock> rm(m_readLock);
        m_readCount--;
        m_cv.notify_all();
    }

    void releaseWrite(bool exclusive = true) {
-        std::unique_lock<std::mutex> rm(m_readLock);
+        std::unique_lock<fastlock> rm(m_readLock);
        serverAssert(m_writeCount > 0);
        if (exclusive)
            m_writeLock.unlock();
@ -89,14 +78,8 @@ public:
    }

    void downgradeWrite(bool exclusive = true) {
-        std::unique_lock<std::mutex> rm(m_readLock);
-        serverAssert(m_writeCount > 0);
-        if (exclusive)
-            m_writeLock.unlock();
-        m_writeCount--;
-        while (m_writeCount > 0 || m_writeWaiting)
-            m_cv.wait(rm);
-        m_readCount++;
+        releaseWrite(exclusive);
+        acquireRead();
    }

    bool hasReader() {
--- a/src/redis-benchmark.cpp
+++ b/src/redis-benchmark.cpp
@ -1017,7 +1017,11 @@ static void benchmark(const char *title, const char *cmd, int len) {
    createMissingClients(c);

    config.start = mstime();
-    if (!config.num_threads) aeMain(config.el);
+    if (!config.num_threads) {
+        aeThreadOnline();
+        aeMain(config.el);
+        aeThreadOffline();
+    }
    else startBenchmarkThreads();
    config.totlatency = mstime()-config.start;

@ -1057,7 +1061,9 @@ static void freeBenchmarkThreads() {

 static void *execBenchmarkThread(void *ptr) {
    benchmarkThread *thread = (benchmarkThread *) ptr;
+    aeThreadOnline();
    aeMain(thread->el);
+    aeThreadOffline();
    return NULL;
 }

@ -1696,7 +1702,7 @@ int main(int argc, const char **argv) {
    int len;

    client c;
-
+    aeThreadOnline();
    storage_init(NULL, 0);

    srandom(time(NULL) ^ getpid());
@ -1749,6 +1755,7 @@ int main(int argc, const char **argv) {
        cliSecureInit();
    }
 #endif
+    aeThreadOffline();

    if (config.cluster_mode) {
        // We only include the slot placeholder {tag} if cluster mode is enabled
--- a/src/replication.cpp
+++ b/src/replication.cpp
@ -1196,7 +1196,6 @@ int rdbSaveSnapshotForReplication(struct rdbSaveInfo *rsi) {
        size_t cbData = 0;
        size_t cbLastUpdate = 0;
        auto &replBuf = *spreplBuf;
-
        // Databases
        replBuf.addArrayLen(cserver.dbnum);
        for (int idb = 0; idb < cserver.dbnum; ++idb) {
@ -2250,24 +2249,6 @@ void changeReplicationId(void) {
    saveMasterStatusToStorage(false);
 }

-
-int hexchToInt(char ch)
-{
-    if (ch >= '0' && ch <= '9')
-        return ch - '0';
-    if (ch >= 'a' && ch <= 'f')
-        return (ch - 'a') + 10;
-    return (ch - 'A') + 10;
-}
-void mergeReplicationId(const char *id)
-{
-    for (int i = 0; i < CONFIG_RUN_ID_SIZE; ++i)
-    {
-        const char *charset = "0123456789abcdef";
-        g_pserver->replid[i] = charset[hexchToInt(g_pserver->replid[i]) ^ hexchToInt(id[i])];
-    }
-}
-
 /* Clear (invalidate) the secondary replication ID. This happens, for
 * example, after a full resynchronization, when we start a new replication
 * history. */
@ -2953,7 +2934,7 @@ bool readSyncBulkPayloadRdb(connection *conn, redisMaster *mi, rdbSaveInfo &rsi,
                mi->staleKeyMap->clear();
            else
                mi->staleKeyMap = new (MALLOC_LOCAL) std::map<int, std::vector<robj_sharedptr>>();
-            rsi.mi = mi;
+            rsi.addMaster(*mi);
        }
        if (rdbLoadFile(rdb_filename,&rsi,RDBFLAGS_REPLICATION) != C_OK) {
            serverLog(LL_WARNING,
@ -2993,7 +2974,7 @@ error:
 }

 void readSyncBulkPayload(connection *conn) {
-    rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+    rdbSaveInfo rsi;
    redisMaster *mi = (redisMaster*)connGetPrivateData(conn);
    static int usemark = 0;
    if (mi == nullptr) {
@ -3009,9 +2990,6 @@ void readSyncBulkPayload(connection *conn) {
            return;
    }

-    // Should we update our database, or create from scratch?
-    int fUpdate = g_pserver->fActiveReplica || g_pserver->enable_multimaster;
-
    /* Final setup of the connected slave <- master link */
    replicationCreateMasterClient(mi,mi->repl_transfer_s,rsi.repl_stream_db);
    if (mi->isRocksdbSnapshotRepl) {
@ -3038,11 +3016,7 @@ void readSyncBulkPayload(connection *conn) {
    /* After a full resynchronization we use the replication ID and
     * offset of the master. The secondary ID / offset are cleared since
     * we are starting a new history. */
-    if (fUpdate)
-    {
-        mergeReplicationId(mi->master->replid);
-    }
-    else
+    if (!g_pserver->fActiveReplica)
    {
        /* After a full resynchroniziation we use the replication ID and
        * offset of the master. The secondary ID / offset are cleared since
@ -3237,7 +3211,7 @@ int slaveTryPartialResynchronization(redisMaster *mi, connection *conn, int read
         * client structure representing the master into g_pserver->master. */
        mi->master_initial_offset = -1;

-        if (mi->cached_master && !g_pserver->fActiveReplica) {
+        if (mi->cached_master) {
            psync_replid = mi->cached_master->replid;
            snprintf(psync_offset,sizeof(psync_offset),"%lld", mi->cached_master->reploff+1);
            serverLog(LL_NOTICE,"Trying a partial resynchronization (request %s:%s).", psync_replid, psync_offset);
@ -3336,14 +3310,15 @@ int slaveTryPartialResynchronization(redisMaster *mi, connection *conn, int read
                    sizeof(g_pserver->replid2));
                g_pserver->second_replid_offset = g_pserver->master_repl_offset+1;

-                /* Update the cached master ID and our own primary ID to the
-                 * new one. */
-                memcpy(g_pserver->replid,sznew,sizeof(g_pserver->replid));
-                memcpy(mi->cached_master->replid,sznew,sizeof(g_pserver->replid));
+                if (!g_pserver->fActiveReplica) {
+                    /* Update the cached master ID and our own primary ID to the
+                     * new one. */
+                    memcpy(g_pserver->replid,sznew,sizeof(g_pserver->replid));
+                    memcpy(mi->cached_master->replid,sznew,sizeof(g_pserver->replid));

-                /* Disconnect all the sub-slaves: they need to be notified. */
-                if (!g_pserver->fActiveReplica)
+                    /* Disconnect all the replicas: they need to be notified. */
                    disconnectSlaves();
+                }
            }
        }

@ -3724,18 +3699,6 @@ retry_connect:
        disconnectSlavesExcept(mi->master_uuid); /* Force our slaves to resync with us as well. */
        freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */
    }
-    else
-    {
-        if (listLength(g_pserver->slaves))
-        {
-            changeReplicationId();
-            clearReplicationId2();
-        }
-        else
-        {
-            freeReplicationBacklog(); /* Don't allow our chained slaves to PSYNC. */
-        }
-    }

    /* Fall back to SYNC if needed. Otherwise psync_result == PSYNC_FULLRESYNC
     * and the g_pserver->master_replid and master_initial_offset are
@ -4404,6 +4367,26 @@ void replicationCacheMasterUsingMyself(redisMaster *mi) {
    mi->master = NULL;
 }

+/* This function is called when reloading master info from an RDB in Active Replica mode.
+ * It creates a cached master client using the info contained in the redisMaster struct.
+ *
+ * Assumes that the passed struct contains valid master info. */
+void replicationCacheMasterUsingMaster(redisMaster *mi) {
+    if (mi->cached_master) {
+        freeClient(mi->cached_master);
+    }
+
+    replicationCreateMasterClient(mi, NULL, -1);
+    std::lock_guard<decltype(mi->master->lock)> lock(mi->master->lock);
+
+    memcpy(mi->master->replid, mi->master_replid, sizeof(mi->master_replid));
+    mi->master->reploff = mi->master_initial_offset;
+
+    unlinkClient(mi->master);
+    mi->cached_master = mi->master;
+    mi->master = NULL;
+}
+
 /* Free a cached master, called when there are no longer the conditions for
 * a partial resync on reconnection. */
 void replicationDiscardCachedMaster(redisMaster *mi) {
--- a/src/sds.h
+++ b/src/sds.h
@ -429,6 +429,21 @@ public:
        return *this;
    }

+    sdsstring &operator=(sdsstring &&other)
+    {
+        sds tmp = m_str;
+        m_str = other.m_str;
+        other.m_str = tmp;
+        return *this;
+    }
+
+    template<typename... Args>
+    sdsstring catfmt(const char *fmt, Args... args)
+    {
+        m_str = sdscatfmt(m_str, fmt, args...);
+        return *this;
+    }
+
    sds release() {
        sds sdsT = m_str;
        m_str = nullptr;
--- a/src/server.cpp
+++ b/src/server.cpp
@ -62,7 +62,6 @@
 #include <sys/socket.h>
 #include <algorithm>
 #include <uuid/uuid.h>
-#include <mutex>
 #include <condition_variable>
 #include "aelocker.h"
 #include "motd.h"
@ -92,14 +91,12 @@ double R_Zero, R_PosInf, R_NegInf, R_Nan;
 /* Global vars */
 namespace GlobalHidden {
 struct redisServer server; /* Server global state */
-readWriteLock forkLock;
 }
 redisServer *g_pserver = &GlobalHidden::server;
-readWriteLock *g_forkLock = &GlobalHidden::forkLock;
 struct redisServerConst cserver;
 thread_local struct redisServerThreadVars *serverTL = NULL;   // thread local server vars
-std::mutex time_thread_mutex;
-std::condition_variable time_thread_cv;
+fastlock time_thread_lock("Time thread lock");
+std::condition_variable_any time_thread_cv;
 int sleeping_threads = 0;
 void wakeTimeThread();

@ -2697,7 +2694,7 @@ extern "C" void asyncFreeDictTable(dictEntry **de)

 void blockingOperationStarts() {
    if(!g_pserver->blocking_op_nesting++){
-        g_pserver->blocked_last_cron = g_pserver->mstime;
+        __atomic_load(&g_pserver->mstime, &g_pserver->blocked_last_cron, __ATOMIC_ACQUIRE);
    }
 }

@ -2959,9 +2956,10 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
    if (!fSentReplies)
        handleClientsWithPendingWrites(iel, aof_state);

+    aeThreadOffline();
    // Scope lock_guard
    {
-        std::lock_guard<std::mutex> lock(time_thread_mutex);
+        std::unique_lock<fastlock> lock(time_thread_lock);
        sleeping_threads++;
        serverAssert(sleeping_threads <= cserver.cthreads);
    }
@ -2975,7 +2973,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
            g_pserver->garbageCollector.endEpoch(epoch);
        }, true /*fHiPri*/);
    }
-
+    
    /* Determine whether the modules are enabled before sleeping, and use that result
       both here, and after wakeup to avoid double acquire or release of the GIL */
    serverTL->modulesEnabledThisAeLoop = !!moduleCount();
@ -2995,8 +2993,9 @@ void afterSleep(struct aeEventLoop *eventLoop) {
       Don't check here that modules are enabled, rather use the result from beforeSleep
       Otherwise you may double acquire the GIL and cause deadlocks in the module */
    if (!ProcessingEventsWhileBlocked) {
-        wakeTimeThread();
        if (serverTL->modulesEnabledThisAeLoop) moduleAcquireGIL(TRUE /*fServerThread*/);
+        aeThreadOnline();
+        wakeTimeThread();

        serverAssert(serverTL->gcEpoch.isReset());
        serverTL->gcEpoch = g_pserver->garbageCollector.startEpoch();
@ -6852,17 +6851,18 @@ int redisFork(int purpose) {
        openChildInfoPipe();
    }
    long long startWriteLock = ustime();
-    g_forkLock->acquireWrite();
+    aeAcquireForkLock();
    latencyAddSampleIfNeeded("fork-lock",(ustime()-startWriteLock)/1000);
    if ((childpid = fork()) == 0) {
        /* Child */
+        aeReleaseForkLock();
        g_pserver->in_fork_child = purpose;
        setOOMScoreAdj(CONFIG_OOM_BGCHILD);
        setupChildSignalHandlers();
        closeChildUnusedResourceAfterFork();
    } else {
        /* Parent */
-        g_forkLock->releaseWrite();
+        aeReleaseForkLock();
        g_pserver->stat_total_forks++;
        g_pserver->stat_fork_time = ustime()-start;
        g_pserver->stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / g_pserver->stat_fork_time / (1024*1024*1024); /* GB per second. */
@ -6940,7 +6940,7 @@ void loadDataFromDisk(void) {
        if (loadAppendOnlyFile(g_pserver->aof_filename) == C_OK)
            serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
    } else if (g_pserver->rdb_filename != NULL || g_pserver->rdb_s3bucketpath != NULL) {
-        rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+        rdbSaveInfo rsi;
        rsi.fForceSetKey = false;
        errno = 0; /* Prevent a stale value from affecting error checking */
        if (rdbLoad(&rsi,RDBFLAGS_NONE) == C_OK) {
@ -6969,11 +6969,23 @@ void loadDataFromDisk(void) {
                while ((ln = listNext(&li)))
                {
                    redisMaster *mi = (redisMaster*)listNodeValue(ln);
-                    /* If we are a replica, create a cached master from this
-                    * information, in order to allow partial resynchronizations
-                    * with masters. */
-                    replicationCacheMasterUsingMyself(mi);
-                    selectDb(mi->cached_master,rsi.repl_stream_db);
+                    if (g_pserver->fActiveReplica) {
+                        for (size_t i = 0; i < rsi.numMasters(); i++) {
+                            if (!strcmp(mi->masterhost, rsi.masters[i].masterhost) && mi->masterport == rsi.masters[i].masterport) {
+                                memcpy(mi->master_replid, rsi.masters[i].master_replid, sizeof(mi->master_replid));
+                                mi->master_initial_offset = rsi.masters[i].master_initial_offset;
+                                replicationCacheMasterUsingMaster(mi);
+                                serverLog(LL_NOTICE, "Cached master recovered from RDB for %s:%d", mi->masterhost, mi->masterport);
+                            }
+                        }
+                    }
+                    else {
+                        /* If we are a replica, create a cached master from this
+                        * information, in order to allow partial resynchronizations
+                        * with masters. */
+                        replicationCacheMasterUsingMyself(mi);
+                        selectDb(mi->cached_master,rsi.repl_stream_db);
+                    }
                }
            }
        } else if (errno != ENOENT) {
@ -7207,7 +7219,9 @@ void OnTerminate()

 void wakeTimeThread() {
    updateCachedTime();
-    std::lock_guard<std::mutex> lock(time_thread_mutex);
+    aeThreadOffline();
+    std::unique_lock<fastlock> lock(time_thread_lock);
+    aeThreadOnline();
    if (sleeping_threads >= cserver.cthreads)
        time_thread_cv.notify_one();
    sleeping_threads--;
@ -7219,21 +7233,23 @@ void *timeThreadMain(void*) {
    delay.tv_sec = 0;
    delay.tv_nsec = 100;
    int cycle_count = 0;
-    g_forkLock->acquireRead();
+    aeThreadOnline();
    while (true) {
        {
-            std::unique_lock<std::mutex> lock(time_thread_mutex);
+            aeThreadOffline();
+            std::unique_lock<fastlock> lock(time_thread_lock);
+            aeThreadOnline();
            if (sleeping_threads >= cserver.cthreads) {
-                g_forkLock->releaseRead();
+                aeThreadOffline();
                time_thread_cv.wait(lock);
-                g_forkLock->acquireRead();
+                aeThreadOnline();
                cycle_count = 0;
            }
        }
        updateCachedTime();
        if (cycle_count == MAX_CYCLES_TO_HOLD_FORK_LOCK) {
-            g_forkLock->releaseRead();
-            g_forkLock->acquireRead();
+            aeThreadOffline();
+            aeThreadOnline();
            cycle_count = 0;
        }
 #if defined(__APPLE__)
@ -7243,7 +7259,7 @@ void *timeThreadMain(void*) {
 #endif
        cycle_count++;
    }
-    g_forkLock->releaseRead();
+    aeThreadOffline();
 }

 void *workerThreadMain(void *parg)
@ -7255,12 +7271,15 @@ void *workerThreadMain(void *parg)

    if (iel != IDX_EVENT_LOOP_MAIN)
    {
+        aeThreadOnline();
        aeAcquireLock();
        initNetworkingThread(iel, cserver.cthreads > 1);
        aeReleaseLock();
+        aeThreadOffline();
    }

    moduleAcquireGIL(true); // Normally afterSleep acquires this, but that won't be called on the first run
+    aeThreadOnline();
    aeEventLoop *el = g_pserver->rgthreadvar[iel].el;
    try
    {
@ -7269,6 +7288,7 @@ void *workerThreadMain(void *parg)
    catch (ShutdownException)
    {
    }
+    aeThreadOffline();
    moduleReleaseGIL(true);
    serverAssert(!GlobalLocksAcquired());
    aeDeleteEventLoop(el);
@ -7417,8 +7437,8 @@ int main(int argc, char **argv) {
    g_pserver->sentinel_mode = checkForSentinelMode(argc,argv);
    initServerConfig();
    serverTL = &g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN];
+    aeThreadOnline();
    aeAcquireLock();    // We own the lock on boot
-
    ACLInit(); /* The ACL subsystem must be initialized ASAP because the
                  basic networking code and client creation depends on it. */
    moduleInitModulesSystem();
@ -7588,7 +7608,7 @@ int main(int argc, char **argv) {
            __AFL_INIT();
 #endif
            rio rdb;
-            rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
+            rdbSaveInfo rsi;
            startLoadingFile(stdin, (char*)"stdin", 0);
            rioInitWithFile(&rdb,stdin);
            rdbLoadRio(&rdb,0,&rsi);
@ -7653,6 +7673,7 @@ int main(int argc, char **argv) {

    redisSetCpuAffinity(g_pserver->server_cpulist);
    aeReleaseLock();    //Finally we can dump the lock
+    aeThreadOffline();
    moduleReleaseGIL(true);
    
    setOOMScoreAdj(-1);
--- a/src/server.h
+++ b/src/server.h
@ -1850,6 +1850,43 @@ struct redisMemOverhead {
    } *db;
 };

+
+struct redisMaster {
+    char *masteruser;               /* AUTH with this user and masterauth with master */
+    char *masterauth;               /* AUTH with this password with master */
+    char *masterhost;               /* Hostname of master */
+    int masterport;                 /* Port of master */
+    client *cached_master;          /* Cached master to be reused for PSYNC. */
+    client *master;
+    /* The following two fields is where we store master PSYNC replid/offset
+     * while the PSYNC is in progress. At the end we'll copy the fields into
+     * the server->master client structure. */
+    char master_replid[CONFIG_RUN_ID_SIZE+1];  /* Master PSYNC runid. */
+    long long master_initial_offset;           /* Master PSYNC offset. */
+
+    bool isActive = false;
+    bool isRocksdbSnapshotRepl = false;
+    int repl_state;          /* Replication status if the instance is a replica */
+    off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
+    off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
+    off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
+    connection *repl_transfer_s;     /* Replica -> Master SYNC socket */
+    int repl_transfer_fd;    /* Replica -> Master SYNC temp file descriptor */
+    char *repl_transfer_tmpfile; /* Replica-> master SYNC temp file name */
+    time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
+    time_t repl_down_since; /* Unix time at which link with master went down */
+
+    class SnapshotPayloadParseState *parseState;
+    sds bulkreadBuffer = nullptr;
+
+    unsigned char master_uuid[UUID_BINARY_LEN];  /* Used during sync with master, this is our master's UUID */
+                                                /* After we've connected with our master use the UUID in g_pserver->master */
+    uint64_t mvccLastSync;
+    /* During a handshake the server may have stale keys, we track these here to share once a reciprocal connection is made */
+    std::map<int, std::vector<robj_sharedptr>> *staleKeyMap;
+    int ielReplTransfer = -1;
+};
+
 /* This structure can be optionally passed to RDB save/load functions in
 * order to implement additional functionalities, by storing and loading
 * metadata to the RDB file.
@ -1858,7 +1895,66 @@ struct redisMemOverhead {
 * replication in order to make sure that chained slaves (slaves of slaves)
 * select the correct DB and are able to accept the stream coming from the
 * top-level master. */
-typedef struct rdbSaveInfo {
+class rdbSaveInfo {
+public:
+    rdbSaveInfo() {
+        repl_stream_db = -1;
+        repl_id_is_set = 0;
+        memcpy(repl_id, "0000000000000000000000000000000000000000", sizeof(repl_id));
+        repl_offset = -1;
+        fForceSetKey = TRUE;
+        mvccMinThreshold = 0;
+        masters = nullptr;
+        masterCount = 0;
+    }
+    rdbSaveInfo(const rdbSaveInfo &other) {
+        repl_stream_db = other.repl_stream_db;
+        repl_id_is_set = other.repl_id_is_set;
+        memcpy(repl_id, other.repl_id, sizeof(repl_id));
+        repl_offset = other.repl_offset;
+        fForceSetKey = other.fForceSetKey;
+        mvccMinThreshold = other.mvccMinThreshold;
+        masters = (struct redisMaster*)malloc(sizeof(struct redisMaster) * other.masterCount);
+        memcpy(masters, other.masters, sizeof(struct redisMaster) * other.masterCount);
+        masterCount = other.masterCount;
+    }
+    rdbSaveInfo(rdbSaveInfo &&other) : rdbSaveInfo() {
+        swap(*this, other);
+    }
+    rdbSaveInfo &operator=(rdbSaveInfo other) {
+        swap(*this, other);
+        return *this;
+    }
+    ~rdbSaveInfo() {
+        free(masters);
+    }
+    friend void swap(rdbSaveInfo &first, rdbSaveInfo &second) {
+        std::swap(first.repl_stream_db, second.repl_stream_db);
+        std::swap(first.repl_id_is_set, second.repl_id_is_set);
+        std::swap(first.repl_id, second.repl_id);
+        std::swap(first.repl_offset, second.repl_offset);
+        std::swap(first.fForceSetKey, second.fForceSetKey);
+        std::swap(first.mvccMinThreshold, second.mvccMinThreshold);
+        std::swap(first.masters, second.masters);
+        std::swap(first.masterCount, second.masterCount);
+
+    }
+
+    void addMaster(const struct redisMaster &mi) {
+        masterCount++;
+        if (masters == nullptr) {
+            masters = (struct redisMaster*)malloc(sizeof(struct redisMaster));
+        }
+        else {
+            masters = (struct redisMaster*)realloc(masters, sizeof(struct redisMaster) * masterCount);
+        }
+        memcpy(masters + masterCount - 1, &mi, sizeof(struct redisMaster));
+    }
+
+    size_t numMasters() {
+        return masterCount;
+    }
+
    /* Used saving and loading. */
    int repl_stream_db;  /* DB to select in g_pserver->master client. */

@ -1872,10 +1968,11 @@ typedef struct rdbSaveInfo {
    long long master_repl_offset;

    uint64_t mvccMinThreshold;
-    struct redisMaster *mi;
-} rdbSaveInfo;
+    struct redisMaster *masters;

-#define RDB_SAVE_INFO_INIT {-1,0,"0000000000000000000000000000000000000000",-1, TRUE, 0, 0, nullptr}
+private:
+    size_t masterCount;
+};

 struct malloc_stats {
    size_t zmalloc_used;
@ -2081,42 +2178,6 @@ private:
    int rdb_key_save_delay = -1; // thread local cache
 };

-struct redisMaster {
-    char *masteruser;               /* AUTH with this user and masterauth with master */
-    char *masterauth;               /* AUTH with this password with master */
-    char *masterhost;               /* Hostname of master */
-    int masterport;                 /* Port of master */
-    client *cached_master;          /* Cached master to be reused for PSYNC. */
-    client *master;
-    /* The following two fields is where we store master PSYNC replid/offset
-     * while the PSYNC is in progress. At the end we'll copy the fields into
-     * the server->master client structure. */
-    char master_replid[CONFIG_RUN_ID_SIZE+1];  /* Master PSYNC runid. */
-    long long master_initial_offset;           /* Master PSYNC offset. */
-
-    bool isActive = false;
-    bool isRocksdbSnapshotRepl = false;
-    int repl_state;          /* Replication status if the instance is a replica */
-    off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
-    off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
-    off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
-    connection *repl_transfer_s;     /* Slave -> Master SYNC socket */
-    int repl_transfer_fd;    /* Slave -> Master SYNC temp file descriptor */
-    char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
-    time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
-    time_t repl_down_since; /* Unix time at which link with master went down */
-
-    class SnapshotPayloadParseState *parseState;
-    sds bulkreadBuffer = nullptr;
-
-    unsigned char master_uuid[UUID_BINARY_LEN];  /* Used during sync with master, this is our master's UUID */
-                                                /* After we've connected with our master use the UUID in g_pserver->master */
-    uint64_t mvccLastSync;
-    /* During a handshake the server may have stale keys, we track these here to share once a reciprocal connection is made */
-    std::map<int, std::vector<robj_sharedptr>> *staleKeyMap;
-    int ielReplTransfer = -1;
-};
-
 // Const vars are not changed after worker threads are launched
 struct redisServerConst {
    pid_t pid;                  /* Main process pid. */
@ -2764,7 +2825,6 @@ typedef struct {
 *----------------------------------------------------------------------------*/

 //extern struct redisServer server;
-extern readWriteLock *g_forkLock;
 extern struct redisServerConst cserver;
 extern thread_local struct redisServerThreadVars *serverTL;   // thread local server vars
 extern struct sharedObjectsStruct shared;
@ -3100,9 +3160,9 @@ long long getPsyncInitialOffset(void);
 int replicationSetupSlaveForFullResync(client *replica, long long offset);
 void changeReplicationId(void);
 void clearReplicationId2(void);
-void mergeReplicationId(const char *);
 void chopReplicationBacklog(void);
 void replicationCacheMasterUsingMyself(struct redisMaster *mi);
+void replicationCacheMasterUsingMaster(struct redisMaster *mi);
 void feedReplicationBacklog(const void *ptr, size_t len);
 void updateMasterAuth();
 void showLatestBacklog();
--- a/src/storage.cpp
+++ b/src/storage.cpp
@ -118,6 +118,7 @@ void pool_free(struct alloc_pool *ppool, void *pv)
    return;
 }

+extern size_t OBJ_ENCODING_EMBSTR_SIZE_LIMIT;
 #define EMBSTR_ROBJ_SIZE (sizeof(robj)+sizeof(struct sdshdr8)+OBJ_ENCODING_EMBSTR_SIZE_LIMIT+1)
 struct alloc_pool poolobj;
 struct alloc_pool poolembstrobj;
--- a/src/t_zset.cpp
+++ b/src/t_zset.cpp
@ -1191,24 +1191,26 @@ void zsetConvert(robj *zobj, int encoding) {
        zs->dict = dictCreate(&zsetDictType,NULL);
        zs->zsl = zslCreate();

-        eptr = ziplistIndex(zl,0);
-        serverAssertWithInfo(NULL,zobj,eptr != NULL);
-        sptr = ziplistNext(zl,eptr);
-        serverAssertWithInfo(NULL,zobj,sptr != NULL);
+        if (ziplistLen(zl) > 0) {
+            eptr = ziplistIndex(zl,0);
+            serverAssertWithInfo(NULL,zobj,eptr != NULL);
+            sptr = ziplistNext(zl,eptr);
+            serverAssertWithInfo(NULL,zobj,sptr != NULL);

-        while (eptr != NULL) {
-            score = zzlGetScore(sptr);
-            serverAssertWithInfo(NULL,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
-            if (vstr == NULL)
-                ele = sdsfromlonglong(vlong);
-            else
-                ele = sdsnewlen((char*)vstr,vlen);
+            while (eptr != NULL) {
+                score = zzlGetScore(sptr);
+                serverAssertWithInfo(NULL,zobj,ziplistGet(eptr,&vstr,&vlen,&vlong));
+                if (vstr == NULL)
+                    ele = sdsfromlonglong(vlong);
+                else
+                    ele = sdsnewlen((char*)vstr,vlen);

-            node = zslInsert(zs->zsl,score,ele);
-            serverAssert(dictAdd(zs->dict,ele,&node->score) == DICT_OK);
-            zzlNext(zl,&eptr,&sptr);
+                node = zslInsert(zs->zsl,score,ele);
+                serverAssert(dictAdd(zs->dict,ele,&node->score) == DICT_OK);
+                zzlNext(zl,&eptr,&sptr);
+            }
        }
-
+        
        zfree(zobj->m_ptr);
        zobj->m_ptr = zs;
        zobj->encoding = OBJ_ENCODING_SKIPLIST;
--- a/src/zmalloc.cpp
+++ b/src/zmalloc.cpp
@ -243,7 +243,7 @@ void *ztryrealloc_usable(void *ptr, size_t size, size_t *usable) {
 #else
    realptr = (char*)ptr-PREFIX_SIZE;
    oldsize = *((size_t*)realptr);
-    newptr = realloc(realptr,size+PREFIX_SIZE);
+    newptr = realloc(realptr,size+PREFIX_SIZE, MALLOC_LOCAL);
    if (newptr == NULL) {
        if (usable) *usable = 0;
        return NULL;
--- a/tests/assets/minimal.conf
+++ b/tests/assets/minimal.conf
@ -1,4 +1,5 @@
 # Minimal configuration for testing.
+bind 127.0.0.1
 always-show-logo yes
 daemonize no
 pidfile /var/run/keydb.pid
--- a/tests/cluster/run.tcl
+++ b/tests/cluster/run.tcl
@ -13,8 +13,9 @@ set ::tlsdir "../../tls"
 proc main {} {
    parse_options
    spawn_instance redis $::redis_base_port $::instances_count {
-        "cluster-enabled yes"
-        "appendonly yes"
+	"bind 127.0.0.1"
+	"cluster-enabled yes"
+	"appendonly yes"
 	"testmode yes"
 	"server-threads 3"
    }
--- a/tests/integration/multimaster-psync.tcl
+++ b/tests/integration/multimaster-psync.tcl
@ -0,0 +1,43 @@
+start_server {tags {"repl"} overrides {active-replica {yes} multi-master {yes}}} {
+    start_server {overrides {active-replica {yes} multi-master {yes}}} {
+        test {2 node cluster heals after multimaster psync} {
+            set master [srv -1 client]
+            set master_host [srv -1 host]
+            set master_port [srv -1 port]
+            set replica [srv 0 client]
+            set replica_host [srv 0 host]
+            set replica_port [srv 0 port]
+
+            # connect two nodes in active-active
+            $replica replicaof $master_host $master_port
+            $master replicaof $replica_host $replica_port
+            after 1000
+
+            # write to db7 in the master
+            $master select 7
+            $master set x 1
+
+            # restart the replica to break the connection and force a psync
+            restart_server 0 true false
+            set replica [srv 0 client]
+
+            # write again to db7
+            $master set y 2
+
+            # uncommenting the following delay causes test to pass
+            # after 1000
+
+            # reconnect the replica to the master
+            $replica replicaof $master_host $master_port
+
+            # verify results
+            after 1000
+            for {set j 0} {$j < 16} {incr j} {
+                $master select $j
+                $replica select $j
+                assert_equal [$replica get x] [$master get x] 
+                assert_equal [$replica get y] [$master get y]
+            }
+        }
+    }
+}
--- a/tests/modules/Makefile
+++ b/tests/modules/Makefile
@ -32,6 +32,7 @@ TEST_MODULES = \
    auth.so \
    keyspace_events.so \
    blockedclient.so \
+    timers.so \
    getkeys.so \
    test_lazyfree.so \
    timer.so \