Merge pull request #386 from EQ-Alpha/fix_rdb_hang

add readwrite lock for forking

Former-commit-id: dabb81960f6ccc7f62c53648127924fc2fef7cdc
This commit is contained in:
MalavanEQAlpha 2022-01-14 16:17:32 -05:00 committed by Malavan Sotheeswaran
parent ec255fea11
commit 4e2e461acc
5 changed files with 231 additions and 123 deletions

View File

@ -365,11 +365,7 @@ typedef struct RedisModuleCommandFilter {
static list *moduleCommandFilters; static list *moduleCommandFilters;
/* Module GIL Variables */ /* Module GIL Variables */
static int s_cAcquisitionsServer = 0; static readWriteLock s_moduleGIL;
static int s_cAcquisitionsModule = 0;
static std::mutex s_mutex;
static std::condition_variable s_cv;
static std::recursive_mutex s_mutexModule;
thread_local bool g_fModuleThread = false; thread_local bool g_fModuleThread = false;
typedef void (*RedisModuleForkDoneHandler) (int exitcode, int bysignal, void *user_data); typedef void (*RedisModuleForkDoneHandler) (int exitcode, int bysignal, void *user_data);
@ -5969,95 +5965,58 @@ void RM_ThreadSafeContextUnlock(RedisModuleCtx *ctx) {
// as the server thread acquisition is sufficient. If we did try to lock we would deadlock // as the server thread acquisition is sufficient. If we did try to lock we would deadlock
static bool FModuleCallBackLock(bool fServerThread) static bool FModuleCallBackLock(bool fServerThread)
{ {
return !fServerThread && aeThreadOwnsLock() && !g_fModuleThread && s_cAcquisitionsServer > 0; return !fServerThread && aeThreadOwnsLock() && !g_fModuleThread && s_moduleGIL.hasReader();
} }
void moduleAcquireGIL(int fServerThread, int fExclusive) { void moduleAcquireGIL(int fServerThread, int fExclusive) {
std::unique_lock<std::mutex> lock(s_mutex);
int *pcheck = fServerThread ? &s_cAcquisitionsModule : &s_cAcquisitionsServer;
if (FModuleCallBackLock(fServerThread)) { if (FModuleCallBackLock(fServerThread)) {
return; return;
} }
while (*pcheck > 0)
s_cv.wait(lock);
if (fServerThread) if (fServerThread)
{ {
++s_cAcquisitionsServer; s_moduleGIL.acquireRead();
} }
else else
{ {
// only try to acquire the mutexModule in exclusive mode s_moduleGIL.acquireWrite(fExclusive);
if (fExclusive){
// It is possible that another module thread holds the GIL (and s_mutexModule as a result).
// When said thread goes to release the GIL, it will wait for s_mutex, which this thread owns.
// This thread is however waiting for the GIL (and s_mutexModule) that the other thread owns.
// As a result, a deadlock has occured.
// We release the lock on s_mutex and wait until we are able to safely acquire the GIL
// in order to prevent this deadlock from occuring.
while (!s_mutexModule.try_lock())
s_cv.wait(lock);
}
++s_cAcquisitionsModule;
fModuleGILWlocked++;
} }
} }
int moduleTryAcquireGIL(bool fServerThread, int fExclusive) { int moduleTryAcquireGIL(bool fServerThread, int fExclusive) {
std::unique_lock<std::mutex> lock(s_mutex, std::defer_lock);
if (!lock.try_lock())
return 1;
int *pcheck = fServerThread ? &s_cAcquisitionsModule : &s_cAcquisitionsServer;
if (FModuleCallBackLock(fServerThread)) { if (FModuleCallBackLock(fServerThread)) {
return 0; return 0;
} }
if (*pcheck > 0)
return 1;
if (fServerThread) if (fServerThread)
{ {
++s_cAcquisitionsServer; if (!s_moduleGIL.tryAcquireRead())
return 1;
} }
else else
{ {
// only try to acquire the mutexModule in exclusive mode if (!s_moduleGIL.tryAcquireWrite(fExclusive))
if (fExclusive){
if (!s_mutexModule.try_lock())
return 1; return 1;
} }
++s_cAcquisitionsModule;
fModuleGILWlocked++;
}
return 0; return 0;
} }
void moduleReleaseGIL(int fServerThread, int fExclusive) { void moduleReleaseGIL(int fServerThread, int fExclusive) {
std::unique_lock<std::mutex> lock(s_mutex);
if (FModuleCallBackLock(fServerThread)) { if (FModuleCallBackLock(fServerThread)) {
return; return;
} }
if (fServerThread) if (fServerThread)
{ {
--s_cAcquisitionsServer; s_moduleGIL.releaseRead();
} }
else else
{ {
// only try to release the mutexModule in exclusive mode s_moduleGIL.releaseWrite(fExclusive);
if (fExclusive)
s_mutexModule.unlock();
--s_cAcquisitionsModule;
fModuleGILWlocked--;
} }
s_cv.notify_all();
} }
int moduleGILAcquiredByModule(void) { int moduleGILAcquiredByModule(void) {
return fModuleGILWlocked > 0; return s_moduleGIL.hasWriter();
} }

View File

@ -3863,37 +3863,10 @@ int checkClientPauseTimeoutAndReturnIfPaused(void) {
* *
* The function returns the total number of events processed. */ * The function returns the total number of events processed. */
void processEventsWhileBlocked(int iel) { void processEventsWhileBlocked(int iel) {
serverAssert(GlobalLocksAcquired());
int eventsCount = 0;
executeWithoutGlobalLock([&](){
int iterations = 4; /* See the function top-comment. */ int iterations = 4; /* See the function top-comment. */
std::vector<client*> vecclients;
listIter li;
listNode *ln;
listRewind(g_pserver->clients, &li);
// All client locks must be acquired *after* the global lock is reacquired to prevent deadlocks
// so unlock here, and save them for reacquisition later
while ((ln = listNext(&li)) != nullptr)
{
client *c = (client*)listNodeValue(ln);
if (c->lock.fOwnLock()) {
serverAssert(c->flags & CLIENT_PROTECTED); // If the client is not protected we have no gurantee they won't be free'd in the event loop
c->lock.unlock();
vecclients.push_back(c);
}
}
/* Since we're about to release our lock we need to flush the repl backlog queue */
bool fReplBacklog = g_pserver->repl_batch_offStart >= 0;
if (fReplBacklog) {
flushReplBacklogToClients();
g_pserver->repl_batch_idxStart = -1;
g_pserver->repl_batch_offStart = -1;
}
long long eventsCount = 0;
aeReleaseLock();
serverAssert(!GlobalLocksAcquired());
try try
{ {
ProcessingEventsWhileBlocked = 1; ProcessingEventsWhileBlocked = 1;
@ -3912,33 +3885,15 @@ void processEventsWhileBlocked(int iel) {
} }
catch (...) catch (...)
{ {
// Caller expects us to be locked so fix and rethrow
ProcessingEventsWhileBlocked = 0; ProcessingEventsWhileBlocked = 0;
AeLocker locker;
locker.arm(nullptr);
locker.release();
for (client *c : vecclients)
c->lock.lock();
throw; throw;
} }
});
AeLocker locker;
locker.arm(nullptr);
locker.release();
g_pserver->events_processed_while_blocked += eventsCount; g_pserver->events_processed_while_blocked += eventsCount;
whileBlockedCron(); whileBlockedCron();
// Restore it so the calling code is not confused
if (fReplBacklog) {
g_pserver->repl_batch_idxStart = g_pserver->repl_backlog_idx;
g_pserver->repl_batch_offStart = g_pserver->master_repl_offset;
}
for (client *c : vecclients)
c->lock.lock();
// If a different thread processed the shutdown we need to abort the lua command or we will hang // If a different thread processed the shutdown we need to abort the lua command or we will hang
if (serverTL->el->stop) if (serverTL->el->stop)
throw ShutdownException(); throw ShutdownException();

113
src/readwritelock.h Normal file
View File

@ -0,0 +1,113 @@
#pragma once
#include <condition_variable>
class readWriteLock {
std::mutex m_readLock;
std::recursive_mutex m_writeLock;
std::condition_variable m_cv;
int m_readCount = 0;
int m_writeCount = 0;
bool m_writeWaiting = false;
public:
void acquireRead() {
std::unique_lock<std::mutex> rm(m_readLock);
while (m_writeCount > 0 || m_writeWaiting)
m_cv.wait(rm);
m_readCount++;
}
bool tryAcquireRead() {
std::unique_lock<std::mutex> rm(m_readLock, std::defer_lock);
if (!rm.try_lock())
return false;
if (m_writeCount > 0 || m_writeWaiting)
return false;
m_readCount++;
return true;
}
void acquireWrite(bool exclusive = true) {
std::unique_lock<std::mutex> rm(m_readLock);
m_writeWaiting = true;
while (m_readCount > 0)
m_cv.wait(rm);
if (exclusive) {
/* Another thread might have the write lock while we have the read lock
but won't be able to release it until they can acquire the read lock
so release the read lock and try again instead of waiting to avoid deadlock */
while(!m_writeLock.try_lock())
m_cv.wait(rm);
}
m_writeCount++;
m_writeWaiting = false;
}
void upgradeWrite(bool exclusive = true) {
std::unique_lock<std::mutex> rm(m_readLock);
m_writeWaiting = true;
while (m_readCount > 1)
m_cv.wait(rm);
if (exclusive) {
/* Another thread might have the write lock while we have the read lock
but won't be able to release it until they can acquire the read lock
so release the read lock and try again instead of waiting to avoid deadlock */
while(!m_writeLock.try_lock())
m_cv.wait(rm);
}
m_writeCount++;
m_readCount--;
m_writeWaiting = false;
}
bool tryAcquireWrite(bool exclusive = true) {
std::unique_lock<std::mutex> rm(m_readLock, std::defer_lock);
if (!rm.try_lock())
return false;
if (m_readCount > 0)
return false;
if (exclusive)
if (!m_writeLock.try_lock())
return false;
m_writeCount++;
return true;
}
void releaseRead() {
std::unique_lock<std::mutex> rm(m_readLock);
serverAssert(m_readCount > 0);
m_readCount--;
m_cv.notify_all();
}
void releaseWrite(bool exclusive = true) {
std::unique_lock<std::mutex> rm(m_readLock);
serverAssert(m_writeCount > 0);
if (exclusive)
m_writeLock.unlock();
m_writeCount--;
m_cv.notify_all();
}
void downgradeWrite(bool exclusive = true) {
std::unique_lock<std::mutex> rm(m_readLock);
serverAssert(m_writeCount > 0);
if (exclusive)
m_writeLock.unlock();
m_writeCount--;
while (m_writeCount > 0 || m_writeWaiting)
m_cv.wait(rm);
m_readCount++;
}
bool hasReader() {
return m_readCount > 0;
}
bool hasWriter() {
return m_writeCount > 0;
}
bool writeWaiting() {
return m_writeWaiting;
}
};

View File

@ -67,6 +67,7 @@
#include "aelocker.h" #include "aelocker.h"
#include "motd.h" #include "motd.h"
#include "t_nhash.h" #include "t_nhash.h"
#include "readwritelock.h"
#ifdef __linux__ #ifdef __linux__
#include <sys/prctl.h> #include <sys/prctl.h>
#include <sys/mman.h> #include <sys/mman.h>
@ -91,8 +92,10 @@ double R_Zero, R_PosInf, R_NegInf, R_Nan;
/* Global vars */ /* Global vars */
namespace GlobalHidden { namespace GlobalHidden {
struct redisServer server; /* Server global state */ struct redisServer server; /* Server global state */
readWriteLock forkLock;
} }
redisServer *g_pserver = &GlobalHidden::server; redisServer *g_pserver = &GlobalHidden::server;
readWriteLock *g_forkLock = &GlobalHidden::forkLock;
struct redisServerConst cserver; struct redisServerConst cserver;
__thread struct redisServerThreadVars *serverTL = NULL; // thread local server vars __thread struct redisServerThreadVars *serverTL = NULL; // thread local server vars
std::mutex time_thread_mutex; std::mutex time_thread_mutex;
@ -2649,8 +2652,8 @@ void afterSleep(struct aeEventLoop *eventLoop) {
Don't check here that modules are enabled, rather use the result from beforeSleep Don't check here that modules are enabled, rather use the result from beforeSleep
Otherwise you may double acquire the GIL and cause deadlocks in the module */ Otherwise you may double acquire the GIL and cause deadlocks in the module */
if (!ProcessingEventsWhileBlocked) { if (!ProcessingEventsWhileBlocked) {
wakeTimeThread();
if (serverTL->modulesEnabledThisAeLoop) moduleAcquireGIL(TRUE /*fServerThread*/); if (serverTL->modulesEnabledThisAeLoop) moduleAcquireGIL(TRUE /*fServerThread*/);
wakeTimeThread();
} }
} }
@ -6218,6 +6221,63 @@ void closeChildUnusedResourceAfterFork() {
cserver.pidfile = NULL; cserver.pidfile = NULL;
} }
void executeWithoutGlobalLock(std::function<void()> func) {
serverAssert(GlobalLocksAcquired());
std::vector<client*> vecclients;
listIter li;
listNode *ln;
listRewind(g_pserver->clients, &li);
// All client locks must be acquired *after* the global lock is reacquired to prevent deadlocks
// so unlock here, and save them for reacquisition later
while ((ln = listNext(&li)) != nullptr)
{
client *c = (client*)listNodeValue(ln);
if (c->lock.fOwnLock()) {
serverAssert(c->flags & CLIENT_PROTECTED || c->flags & CLIENT_EXECUTING_COMMAND); // If the client is not protected we have no gurantee they won't be free'd in the event loop
c->lock.unlock();
vecclients.push_back(c);
}
}
/* Since we're about to release our lock we need to flush the repl backlog queue */
bool fReplBacklog = g_pserver->repl_batch_offStart >= 0;
if (fReplBacklog) {
flushReplBacklogToClients();
g_pserver->repl_batch_idxStart = -1;
g_pserver->repl_batch_offStart = -1;
}
aeReleaseLock();
serverAssert(!GlobalLocksAcquired());
try {
func();
}
catch (...) {
// Caller expects us to be locked so fix and rethrow
AeLocker locker;
locker.arm(nullptr);
locker.release();
for (client *c : vecclients)
c->lock.lock();
throw;
}
AeLocker locker;
locker.arm(nullptr);
locker.release();
// Restore it so the calling code is not confused
if (fReplBacklog) {
g_pserver->repl_batch_idxStart = g_pserver->repl_backlog_idx;
g_pserver->repl_batch_offStart = g_pserver->master_repl_offset;
}
for (client *c : vecclients)
c->lock.lock();
}
/* purpose is one of CHILD_TYPE_ types */ /* purpose is one of CHILD_TYPE_ types */
int redisFork(int purpose) { int redisFork(int purpose) {
int childpid; int childpid;
@ -6229,7 +6289,9 @@ int redisFork(int purpose) {
openChildInfoPipe(); openChildInfoPipe();
} }
long long startWriteLock = ustime();
g_forkLock->acquireWrite();
latencyAddSampleIfNeeded("fork-lock",(ustime()-startWriteLock)/1000);
if ((childpid = fork()) == 0) { if ((childpid = fork()) == 0) {
/* Child */ /* Child */
g_pserver->in_fork_child = purpose; g_pserver->in_fork_child = purpose;
@ -6238,6 +6300,7 @@ int redisFork(int purpose) {
closeChildUnusedResourceAfterFork(); closeChildUnusedResourceAfterFork();
} else { } else {
/* Parent */ /* Parent */
g_forkLock->releaseWrite();
g_pserver->stat_total_forks++; g_pserver->stat_total_forks++;
g_pserver->stat_fork_time = ustime()-start; g_pserver->stat_fork_time = ustime()-start;
g_pserver->stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / g_pserver->stat_fork_time / (1024*1024*1024); /* GB per second. */ g_pserver->stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / g_pserver->stat_fork_time / (1024*1024*1024); /* GB per second. */
@ -6554,20 +6617,32 @@ void *timeThreadMain(void*) {
timespec delay; timespec delay;
delay.tv_sec = 0; delay.tv_sec = 0;
delay.tv_nsec = 100; delay.tv_nsec = 100;
int cycle_count = 0;
g_forkLock->acquireRead();
while (true) { while (true) {
{ {
std::unique_lock<std::mutex> lock(time_thread_mutex); std::unique_lock<std::mutex> lock(time_thread_mutex);
if (sleeping_threads >= cserver.cthreads) { if (sleeping_threads >= cserver.cthreads) {
g_forkLock->releaseRead();
time_thread_cv.wait(lock); time_thread_cv.wait(lock);
g_forkLock->acquireRead();
cycle_count = 0;
} }
} }
updateCachedTime(); updateCachedTime();
if (cycle_count == MAX_CYCLES_TO_HOLD_FORK_LOCK) {
g_forkLock->releaseRead();
g_forkLock->acquireRead();
cycle_count = 0;
}
#if defined(__APPLE__) #if defined(__APPLE__)
nanosleep(&delay, nullptr); nanosleep(&delay, nullptr);
#else #else
clock_nanosleep(CLOCK_MONOTONIC, 0, &delay, NULL); clock_nanosleep(CLOCK_MONOTONIC, 0, &delay, NULL);
#endif #endif
cycle_count++;
} }
g_forkLock->releaseRead();
} }
void *workerThreadMain(void *parg) void *workerThreadMain(void *parg)

View File

@ -96,6 +96,7 @@ typedef long long ustime_t; /* microsecond time type. */
#include "connection.h" /* Connection abstraction */ #include "connection.h" /* Connection abstraction */
#include "serverassert.h" #include "serverassert.h"
#include "expire.h" #include "expire.h"
#include "readwritelock.h"
#define REDISMODULE_CORE 1 #define REDISMODULE_CORE 1
#include "redismodule.h" /* Redis modules API defines. */ #include "redismodule.h" /* Redis modules API defines. */
@ -733,6 +734,9 @@ typedef enum {
#define REDISMODULE_AUX_BEFORE_RDB (1<<0) #define REDISMODULE_AUX_BEFORE_RDB (1<<0)
#define REDISMODULE_AUX_AFTER_RDB (1<<1) #define REDISMODULE_AUX_AFTER_RDB (1<<1)
/* Number of cycles before time thread gives up fork lock */
#define MAX_CYCLES_TO_HOLD_FORK_LOCK 10
struct RedisModule; struct RedisModule;
struct RedisModuleIO; struct RedisModuleIO;
struct RedisModuleDigest; struct RedisModuleDigest;
@ -2113,6 +2117,7 @@ typedef struct {
//extern struct redisServer server; //extern struct redisServer server;
extern redisServer *g_pserver; extern redisServer *g_pserver;
extern readWriteLock *g_forkLock;
extern struct redisServerConst cserver; extern struct redisServerConst cserver;
extern __thread struct redisServerThreadVars *serverTL; // thread local server vars extern __thread struct redisServerThreadVars *serverTL; // thread local server vars
extern struct sharedObjectsStruct shared; extern struct sharedObjectsStruct shared;
@ -2496,6 +2501,7 @@ void sendChildInfo(childInfoType info_type, size_t keys, const char *pname);
void receiveChildInfo(void); void receiveChildInfo(void);
/* Fork helpers */ /* Fork helpers */
void executeWithoutGlobalLock(std::function<void()> func);
int redisFork(int type); int redisFork(int type);
int hasActiveChildProcess(); int hasActiveChildProcess();
void resetChildState(); void resetChildState();