From 6c7473623e3c7e6bbeeff3b4419b70b63d51a1f4 Mon Sep 17 00:00:00 2001 From: charsyam Date: Thu, 31 Jan 2013 12:09:16 +0900 Subject: [PATCH] Turn off TCP_NODELAY on the slave socket after SYNC. Further details from @antirez: It was reported by @StopForumSpam on Twitter that the Redis replication link was strangely using multiple TCP packets for multiple commands. This wastes a lot of bandwidth and is due to the TCP_NODELAY option we enable on the socket after accepting a new connection. However the master -> slave channel is a one-way channel since Redis replication is asynchronous, so there is no point in trying to reduce the latency, we should aim to reduce the bandwidth. For this reason this commit introduces the ability to disable the nagle algorithm on the socket after a successful SYNC. This feature is off by default because the delay can be up to 40 milliseconds with normally configured Linux kernels. --- src/anet.c | 14 ++++++++++++-- src/anet.h | 1 + src/config.c | 7 +++++++ src/redis.c | 1 + src/redis.h | 1 + src/replication.c | 8 ++++++++ 6 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/anet.c b/src/anet.c index 4da3e28db..d002cb31c 100644 --- a/src/anet.c +++ b/src/anet.c @@ -75,9 +75,8 @@ int anetNonBlock(char *err, int fd) return ANET_OK; } -int anetTcpNoDelay(char *err, int fd) +static int _anetTcpNoDelay(char *err, int fd, int yes) { - int yes = 1; if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &yes, sizeof(yes)) == -1) { anetSetError(err, "setsockopt TCP_NODELAY: %s", strerror(errno)); @@ -86,6 +85,17 @@ int anetTcpNoDelay(char *err, int fd) return ANET_OK; } +int anetTcpNoDelay(char *err, int fd) +{ + return _anetTcpNoDelay(err, fd, 1); +} + +int anetTcpNoDelayOff(char *err, int fd) +{ + return _anetTcpNoDelay(err, fd, 0); +} + + int anetSetSendBuffer(char *err, int fd, int buffsize) { if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buffsize, sizeof(buffsize)) == -1) diff --git a/src/anet.h b/src/anet.h index 062b22c56..56ae50573 100644 --- a/src/anet.h +++ b/src/anet.h @@ -52,6 +52,7 @@ int anetUnixAccept(char *err, int serversock); int anetWrite(int fd, char *buf, int count); int anetNonBlock(char *err, int fd); int anetTcpNoDelay(char *err, int fd); +int anetTcpNoDelayOff(char *err, int fd); int anetTcpKeepAlive(char *err, int fd); int anetPeerToString(int fd, char *ip, int *port); diff --git a/src/config.c b/src/config.c index f994dd657..f4c45fc3c 100644 --- a/src/config.c +++ b/src/config.c @@ -389,6 +389,8 @@ void loadServerConfigFromString(char *config) { if ((server.stop_writes_on_bgsave_err = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } + } else if (!strcasecmp(argv[0],"slave-tcp-nodelay-off") && argc == 2) { + server.slave_tcp_nodelay_off = atoi(argv[1]); } else if (!strcasecmp(argv[0],"slave-priority") && argc == 2) { server.slave_priority = atoi(argv[1]); } else if (!strcasecmp(argv[0],"notify-keyspace-events") && argc == 2) { @@ -722,6 +724,10 @@ void configSetCommand(redisClient *c) { if (flags == -1) goto badfmt; server.notify_keyspace_events = flags; + } else if (!strcasecmp(c->argv[2]->ptr,"slave-tcp-nodelay-off")) { + if (getLongLongFromObject(o,&ll) == REDIS_ERR ) goto badfmt; + + server.slave_tcp_nodelay_off = ll; } else if (!strcasecmp(c->argv[2]->ptr,"slave-priority")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt; @@ -815,6 +821,7 @@ void configGetCommand(redisClient *c) { config_get_numerical_field("repl-timeout",server.repl_timeout); config_get_numerical_field("maxclients",server.maxclients); config_get_numerical_field("watchdog-period",server.watchdog_period); + config_get_numerical_field("slave-tcp-nodelay-off",server.slave_tcp_nodelay_off); config_get_numerical_field("slave-priority",server.slave_priority); config_get_numerical_field("hz",server.hz); diff --git a/src/redis.c b/src/redis.c index f271ac186..c0c3af384 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1200,6 +1200,7 @@ void initServerConfig() { server.repl_serve_stale_data = 1; server.repl_slave_ro = 1; server.repl_down_since = time(NULL); + server.slave_tcp_nodelay_off = 1; server.slave_priority = REDIS_DEFAULT_SLAVE_PRIORITY; /* Client output buffer limits */ diff --git a/src/redis.h b/src/redis.h index 46f2be8b2..b4955d073 100644 --- a/src/redis.h +++ b/src/redis.h @@ -763,6 +763,7 @@ struct redisServer { int repl_serve_stale_data; /* Serve stale data when link is down? */ int repl_slave_ro; /* Slave is read only? */ time_t repl_down_since; /* Unix time at which link with master went down */ + int slave_tcp_nodelay_off; /* turn off slave's tcp nodelay */ int slave_priority; /* Reported in INFO and used by Sentinel. */ /* Limits */ unsigned int maxclients; /* Max number of simultaneous clients */ diff --git a/src/replication.c b/src/replication.c index 2f0cba701..fffac0e12 100644 --- a/src/replication.c +++ b/src/replication.c @@ -118,6 +118,14 @@ void syncCommand(redisClient *c) { /* ignore SYNC if already slave or in monitor mode */ if (c->flags & REDIS_SLAVE) return; + if (server.slave_tcp_nodelay_off) { + redisLog(REDIS_NOTICE, "Turning off slave's :%d TCP NODELAY SETTING", c->fd); + char err[1024]; + if (anetTcpNoDelayOff(err, c->fd) == ANET_ERR) + redisLog(REDIS_WARNING, + "Can't turn off %d 's tcp nodelay setting: %s", c->fd, err); + } + /* Refuse SYNC requests if we are a slave but the link with our master * is not ok... */ if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED) {