From 3c82c85fcfb7e80b6177a080f5d4be64c1506245 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 27 May 2013 11:17:17 +0200 Subject: [PATCH] Close connection with timedout slaves. Now masters, using the time at which the last REPLCONF ACK was received, are able to explicitly disconnect slaves that are no longer responding. Previously the only chance was to see a very long output buffer, that was highly suboptimal. --- src/replication.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/replication.c b/src/replication.c index 2791f3b90..3157c57ad 100644 --- a/src/replication.c +++ b/src/replication.c @@ -424,6 +424,7 @@ int masterTryPartialResynchronization(redisClient *c) { * 3) Send the backlog data (from the offset to the end) to the slave. */ c->flags |= REDIS_SLAVE; c->replstate = REDIS_REPL_ONLINE; + c->repl_ack_time = server.unixtime; listAddNodeTail(server.slaves,c); /* We can't use the connection buffers since they are used to accumulate * new commands at this stage. But we are sure the socket send buffer is @@ -655,6 +656,7 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) { slave->repldbfd = -1; aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE); slave->replstate = REDIS_REPL_ONLINE; + slave->repl_ack_time = server.unixtime; if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendReplyToClient, slave) == AE_ERR) { freeClient(slave); @@ -1477,6 +1479,31 @@ void replicationCron(void) { } } + /* Disconnect timedout slaves. */ + if (listLength(server.slaves)) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + redisClient *slave = ln->value; + + if (slave->replstate != REDIS_REPL_ONLINE) continue; + if ((server.unixtime - slave->repl_ack_time) > server.repl_timeout) + { + char ip[32]; + int port; + + if (anetPeerToString(slave->fd,ip,&port) != -1) { + redisLog(REDIS_WARNING, + "Disconnecting timedout slave: %s:%d", + ip, slave->slave_listening_port); + } + freeClient(slave); + } + } + } + /* If we have no attached slaves and there is a replication backlog * using memory, free it after some (configured) time. */ if (listLength(server.slaves) == 0 && server.repl_backlog_time_limit &&