deadlock fixes

This commit is contained in:
John Sully 2019-02-18 23:52:21 -05:00
parent 2526d51d1a
commit acbad0c04e
10 changed files with 81 additions and 37 deletions

View File

@ -121,9 +121,17 @@ void aeProcessCmd(aeEventLoop *eventLoop, int fd, void *, int )
case AE_ASYNC_OP::CreateFileEvent: case AE_ASYNC_OP::CreateFileEvent:
{ {
std::unique_lock<std::mutex> ulock(cmd.pctl->mutexcv); if (cmd.pctl != nullptr)
std::atomic_store(&cmd.pctl->rval, aeCreateFileEvent(eventLoop, cmd.fd, cmd.mask, cmd.fproc, cmd.clientData)); {
cmd.pctl->cv.notify_all(); cmd.pctl->mutexcv.lock();
std::atomic_store(&cmd.pctl->rval, aeCreateFileEvent(eventLoop, cmd.fd, cmd.mask, cmd.fproc, cmd.clientData));
cmd.pctl->cv.notify_all();
cmd.pctl->mutexcv.unlock();
}
else
{
aeCreateFileEvent(eventLoop, cmd.fd, cmd.mask, cmd.fproc, cmd.clientData);
}
} }
break; break;
@ -145,8 +153,8 @@ void aeProcessCmd(aeEventLoop *eventLoop, int fd, void *, int )
} }
} }
int aeCreateRemoteFileEventSync(aeEventLoop *eventLoop, int fd, int mask, int aeCreateRemoteFileEvent(aeEventLoop *eventLoop, int fd, int mask,
aeFileProc *proc, void *clientData) aeFileProc *proc, void *clientData, int fSynchronous)
{ {
if (eventLoop == g_eventLoopThisThread) if (eventLoop == g_eventLoopThisThread)
return aeCreateFileEvent(eventLoop, fd, mask, proc, clientData); return aeCreateFileEvent(eventLoop, fd, mask, proc, clientData);
@ -157,14 +165,22 @@ int aeCreateRemoteFileEventSync(aeEventLoop *eventLoop, int fd, int mask,
cmd.mask = mask; cmd.mask = mask;
cmd.fproc = proc; cmd.fproc = proc;
cmd.clientData = clientData; cmd.clientData = clientData;
cmd.pctl = new aeCommandControl(); cmd.pctl = nullptr;
if (fSynchronous)
cmd.pctl = new aeCommandControl();
std::unique_lock<std::mutex> ulock(cmd.pctl->mutexcv); std::unique_lock<std::mutex> ulock(cmd.pctl->mutexcv, std::defer_lock);
if (fSynchronous)
cmd.pctl->mutexcv.lock();
auto size = write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd)); auto size = write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd));
AE_ASSERT(size == sizeof(cmd)); AE_ASSERT(size == sizeof(cmd));
cmd.pctl->cv.wait(ulock); int ret = AE_OK;
int ret = cmd.pctl->rval; if (fSynchronous)
delete cmd.pctl; {
cmd.pctl->cv.wait(ulock);
ret = cmd.pctl->rval;
delete cmd.pctl;
}
return ret; return ret;
} }
@ -691,7 +707,9 @@ void aeMain(aeEventLoop *eventLoop) {
ulock.lock(); ulock.lock();
eventLoop->beforesleep(eventLoop); eventLoop->beforesleep(eventLoop);
} }
AE_ASSERT(!aeThreadOwnsLock()); // we should have relinquished it after processing
aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP); aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP);
AE_ASSERT(!aeThreadOwnsLock()); // we should have relinquished it after processing
} }
} }

View File

@ -138,8 +138,10 @@ void aeDeleteEventLoop(aeEventLoop *eventLoop);
void aeStop(aeEventLoop *eventLoop); void aeStop(aeEventLoop *eventLoop);
int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask, int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
aeFileProc *proc, void *clientData); aeFileProc *proc, void *clientData);
int aeCreateRemoteFileEventSync(aeEventLoop *eventLoop, int fd, int mask,
aeFileProc *proc, void *clientData); int aeCreateRemoteFileEvent(aeEventLoop *eventLoop, int fd, int mask,
aeFileProc *proc, void *clientData, int fSynchronous);
void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask); void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask);
void aeDeleteFileEventAsync(aeEventLoop *eventLoop, int fd, int mask); void aeDeleteFileEventAsync(aeEventLoop *eventLoop, int fd, int mask);
int aeGetFileEvents(aeEventLoop *eventLoop, int fd); int aeGetFileEvents(aeEventLoop *eventLoop, int fd);

View File

@ -97,7 +97,7 @@ void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask) {
aofrwblock *block; aofrwblock *block;
ssize_t nwritten; ssize_t nwritten;
serverAssert(el == server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el); serverAssert(el == server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el);
UNUSED(el); UNUSED(el);
UNUSED(fd); UNUSED(fd);
UNUSED(privdata); UNUSED(privdata);
@ -1497,7 +1497,7 @@ int aofCreatePipes(void) {
/* Parent -> children data is non blocking. */ /* Parent -> children data is non blocking. */
if (anetNonBlock(NULL,fds[0]) != ANET_OK) goto error; if (anetNonBlock(NULL,fds[0]) != ANET_OK) goto error;
if (anetNonBlock(NULL,fds[1]) != ANET_OK) goto error; if (anetNonBlock(NULL,fds[1]) != ANET_OK) goto error;
if (aeCreateRemoteFileEventSync(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el, fds[2], AE_READABLE, aofChildPipeReadable, NULL) == AE_ERR) goto error; if (aeCreateRemoteFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el, fds[2], AE_READABLE, aofChildPipeReadable, NULL, TRUE) == AE_ERR) goto error;
server.aof_pipe_write_data_to_child = fds[1]; server.aof_pipe_write_data_to_child = fds[1];
server.aof_pipe_read_data_from_parent = fds[0]; server.aof_pipe_read_data_from_parent = fds[0];

View File

@ -110,6 +110,8 @@ void blockClient(client *c, int btype) {
* in order to process the pending input buffer of clients that were * in order to process the pending input buffer of clients that were
* unblocked after a blocking operation. */ * unblocked after a blocking operation. */
void processUnblockedClients(int iel) { void processUnblockedClients(int iel) {
serverAssert(aeThreadOwnsLock());
listNode *ln; listNode *ln;
client *c; client *c;
list *unblocked_clients = server.rgthreadvar[iel].unblocked_clients; list *unblocked_clients = server.rgthreadvar[iel].unblocked_clients;
@ -127,11 +129,9 @@ void processUnblockedClients(int iel) {
* client is not blocked before to proceed, but things may change and * client is not blocked before to proceed, but things may change and
* the code is conceptually more correct this way. */ * the code is conceptually more correct this way. */
if (!(c->flags & CLIENT_BLOCKED)) { if (!(c->flags & CLIENT_BLOCKED)) {
aeAcquireLock();
if (c->querybuf && sdslen(c->querybuf) > 0) { if (c->querybuf && sdslen(c->querybuf) > 0) {
processInputBufferAndReplicate(c); processInputBufferAndReplicate(c);
} }
aeReleaseLock();
} }
} }
} }
@ -155,7 +155,7 @@ void processUnblockedClients(int iel) {
void queueClientForReprocessing(client *c) { void queueClientForReprocessing(client *c) {
/* The client may already be into the unblocked list because of a previous /* The client may already be into the unblocked list because of a previous
* blocking operation, don't add back it into the list multiple times. */ * blocking operation, don't add back it into the list multiple times. */
AssertCorrectThread(c); serverAssert(aeThreadOwnsLock());
if (!(c->flags & CLIENT_UNBLOCKED)) { if (!(c->flags & CLIENT_UNBLOCKED)) {
c->flags |= CLIENT_UNBLOCKED; c->flags |= CLIENT_UNBLOCKED;
listAddNodeTail(server.rgthreadvar[c->iel].unblocked_clients,c); listAddNodeTail(server.rgthreadvar[c->iel].unblocked_clients,c);

View File

@ -32,6 +32,7 @@
#include <sys/uio.h> #include <sys/uio.h>
#include <math.h> #include <math.h>
#include <ctype.h> #include <ctype.h>
#include <vector>
static void setProtocolError(const char *errstr, client *c); static void setProtocolError(const char *errstr, client *c);
void addReplyLongLongWithPrefixCore(client *c, long long ll, char prefix, bool fAsync); void addReplyLongLongWithPrefixCore(client *c, long long ll, char prefix, bool fAsync);
@ -233,13 +234,10 @@ void clientInstallWriteHandler(client *c) {
} }
void clientInstallAsyncWriteHandler(client *c) { void clientInstallAsyncWriteHandler(client *c) {
UNUSED(c);
#if 0 // Not yet
if (!(c->flags & CLIENT_PENDING_ASYNCWRITE)) { if (!(c->flags & CLIENT_PENDING_ASYNCWRITE)) {
c->flags |= CLIENT_PENDING_ASYNCWRITE; c->flags |= CLIENT_PENDING_ASYNCWRITE;
listAddNodeHead(serverTL->clients_pending_asyncwrite,c); listAddNodeHead(serverTL->clients_pending_asyncwrite,c);
} }
#endif
} }
/* This function is called every time we are going to transmit new data /* This function is called every time we are going to transmit new data
@ -436,6 +434,10 @@ void addReplyProto(client *c, const char *s, size_t len) {
addReplyProtoCore(c, s, len, false); addReplyProtoCore(c, s, len, false);
} }
void addReplyProtoAsync(client *c, const char *s, size_t len) {
addReplyProtoCore(c, s, len, true);
}
/* Low level function called by the addReplyError...() functions. /* Low level function called by the addReplyError...() functions.
* It emits the protocol for a Redis error, in the form: * It emits the protocol for a Redis error, in the form:
* *
@ -1127,10 +1129,24 @@ static void freeClientArgv(client *c) {
* when we resync with our own master and want to force all our slaves to * when we resync with our own master and want to force all our slaves to
* resync with us as well. */ * resync with us as well. */
void disconnectSlaves(void) { void disconnectSlaves(void) {
while (listLength(server.slaves)) { std::vector<client*> vecfreeImmediate;
listNode *ln = listFirst(server.slaves); listNode *ln;
freeClient((client*)ln->value); listIter li;
listRewind(server.slaves, &li);
while ((ln = listNext(&li))) {
client *c = (client*)ln->value;
if (c->iel == serverTL - server.rgthreadvar)
{
vecfreeImmediate.push_back(c);
}
else
{
freeClientAsync(c);
}
} }
for (client *c : vecfreeImmediate)
freeClient(c);
} }
/* Remove the specified client from global lists where the client could /* Remove the specified client from global lists where the client could
@ -2706,6 +2722,9 @@ int clientsArePaused(void) {
int processEventsWhileBlocked(int iel) { int processEventsWhileBlocked(int iel) {
int iterations = 4; /* See the function top-comment. */ int iterations = 4; /* See the function top-comment. */
int count = 0; int count = 0;
// BUGBUG - This function isn't fair - why should clients on this thread get to run, but not clients elsewhere?
// We mix up replies when releasing the lock here so more work is needed to fix this
while (iterations--) { while (iterations--) {
int events = 0; int events = 0;
events += aeProcessEvents(server.rgthreadvar[iel].el, AE_FILE_EVENTS|AE_DONT_WAIT); events += aeProcessEvents(server.rgthreadvar[iel].el, AE_FILE_EVENTS|AE_DONT_WAIT);

View File

@ -51,13 +51,13 @@ void addReplyPubsubMessage(client *c, robj *channel, robj *msg) {
* this message format also includes the pattern that matched the message. */ * this message format also includes the pattern that matched the message. */
void addReplyPubsubPatMessage(client *c, robj *pat, robj *channel, robj *msg) { void addReplyPubsubPatMessage(client *c, robj *pat, robj *channel, robj *msg) {
if (c->resp == 2) if (c->resp == 2)
addReply(c,shared.mbulkhdr[4]); addReplyAsync(c,shared.mbulkhdr[4]);
else else
addReplyPushLen(c,4); addReplyPushLenAsync(c,4);
addReply(c,shared.pmessagebulk); addReplyAsync(c,shared.pmessagebulk);
addReplyBulk(c,pat); addReplyBulkAsync(c,pat);
addReplyBulk(c,channel); addReplyBulkAsync(c,channel);
addReplyBulk(c,msg); addReplyBulkAsync(c,msg);
} }
/* Send the pubsub subscription notification to the client. */ /* Send the pubsub subscription notification to the client. */

View File

@ -296,7 +296,7 @@ void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t bufle
/* Don't feed slaves that are still waiting for BGSAVE to start */ /* Don't feed slaves that are still waiting for BGSAVE to start */
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue; if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
addReplyProto(slave,buf,buflen); addReplyProtoAsync(slave,buf,buflen);
} }
} }
@ -334,7 +334,7 @@ void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv,
listRewind(monitors,&li); listRewind(monitors,&li);
while((ln = listNext(&li))) { while((ln = listNext(&li))) {
client *monitor = (client*)ln->value; client *monitor = (client*)ln->value;
addReply(monitor,cmdobj); addReplyAsync(monitor,cmdobj);
} }
decrRefCount(cmdobj); decrRefCount(cmdobj);
} }

View File

@ -368,6 +368,9 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
struct redisCommand *cmd; struct redisCommand *cmd;
client *c = server.lua_client; client *c = server.lua_client;
sds reply; sds reply;
// Ensure our client is on the right thread
c->iel = serverTL - server.rgthreadvar;
/* Cached across calls. */ /* Cached across calls. */
static robj **argv = NULL; static robj **argv = NULL;
@ -1278,7 +1281,7 @@ void luaMaskCountHook(lua_State *lua, lua_Debug *ar) {
* here when the EVAL command will return. */ * here when the EVAL command will return. */
protectClient(server.lua_caller); protectClient(server.lua_caller);
} }
if (server.lua_timedout) processEventsWhileBlocked(IDX_EVENT_LOOP_MAIN); if (server.lua_timedout) processEventsWhileBlocked(serverTL - server.rgthreadvar);
if (server.lua_kill) { if (server.lua_kill) {
serverLog(LL_WARNING,"Lua script killed by user with SCRIPT KILL."); serverLog(LL_WARNING,"Lua script killed by user with SCRIPT KILL.");
lua_pushstring(lua,"Script killed by user with SCRIPT KILL..."); lua_pushstring(lua,"Script killed by user with SCRIPT KILL...");

View File

@ -2104,9 +2104,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
/* Try to process pending commands for clients that were just unblocked. */ /* Try to process pending commands for clients that were just unblocked. */
if (listLength(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].unblocked_clients)) if (listLength(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].unblocked_clients))
{ {
aeReleaseLock();
processUnblockedClients(IDX_EVENT_LOOP_MAIN); processUnblockedClients(IDX_EVENT_LOOP_MAIN);
aeAcquireLock();
} }
/* Write the AOF buffer on disk */ /* Write the AOF buffer on disk */
@ -2129,9 +2127,12 @@ void beforeSleepLite(struct aeEventLoop *eventLoop)
/* Try to process pending commands for clients that were just unblocked. */ /* Try to process pending commands for clients that were just unblocked. */
if (listLength(server.rgthreadvar[iel].unblocked_clients)) { if (listLength(server.rgthreadvar[iel].unblocked_clients)) {
aeAcquireLock();
processUnblockedClients(iel); processUnblockedClients(iel);
aeReleaseLock();
} }
/* Handle writes with pending output buffers. */ /* Handle writes with pending output buffers. */
handleClientsWithPendingWrites(iel); handleClientsWithPendingWrites(iel);
} }
@ -3213,7 +3214,7 @@ static void ProcessPendingAsyncWrites()
ae_flags |= AE_BARRIER; ae_flags |= AE_BARRIER;
} }
if (aeCreateRemoteFileEventSync(server.rgthreadvar[c->iel].el, c->fd, ae_flags, sendReplyToClient, c) == AE_ERR) if (aeCreateRemoteFileEvent(server.rgthreadvar[c->iel].el, c->fd, ae_flags, sendReplyToClient, c, FALSE) == AE_ERR)
freeClientAsync(c); freeClientAsync(c);
} }
} }
@ -5045,7 +5046,7 @@ int main(int argc, char **argv) {
initServer(); initServer();
server.cthreads = 1; //testing server.cthreads = 2; //testing
initNetworking(1 /* fReusePort */); initNetworking(1 /* fReusePort */);
if (background || server.pidfile) createPidFile(); if (background || server.pidfile) createPidFile();

View File

@ -1044,7 +1044,7 @@ struct redisServerThreadVars {
int ipfd[CONFIG_BINDADDR_MAX]; /* TCP socket file descriptors */ int ipfd[CONFIG_BINDADDR_MAX]; /* TCP socket file descriptors */
int ipfd_count; /* Used slots in ipfd[] */ int ipfd_count; /* Used slots in ipfd[] */
list *clients_pending_write; /* There is to write or install handler. */ list *clients_pending_write; /* There is to write or install handler. */
list *unblocked_clients; /* list of clients to unblock before next loop */ list *unblocked_clients; /* list of clients to unblock before next loop NOT THREADSAFE */
list *clients_pending_asyncwrite; list *clients_pending_asyncwrite;
}; };
@ -1628,6 +1628,7 @@ void unprotectClient(client *c);
// Special Thread-safe addReply() commands for posting messages to clients from a different thread // Special Thread-safe addReply() commands for posting messages to clients from a different thread
void addReplyAsync(client *c, robj *obj); void addReplyAsync(client *c, robj *obj);
void addReplyArrayLenAsync(client *c, long length); void addReplyArrayLenAsync(client *c, long length);
void addReplyProtoAsync(client *c, const char *s, size_t len);
void addReplyBulkAsync(client *c, robj *obj); void addReplyBulkAsync(client *c, robj *obj);
void addReplyBulkCBufferAsync(client *c, const void *p, size_t len); void addReplyBulkCBufferAsync(client *c, const void *p, size_t len);
void addReplyErrorAsync(client *c, const char *err); void addReplyErrorAsync(client *c, const char *err);