From 1aa633d61bc22ad5f8865977ac9e5ec581c6f3c2 Mon Sep 17 00:00:00 2001 From: Andy Pan Date: Wed, 27 Dec 2023 00:44:18 +0800 Subject: [PATCH] Implement TCP Keep-Alives across most Unix-like systems (#12782) ## TCP Keep-Alives [TCP Keep-Alives](https://datatracker.ietf.org/doc/html/rfc9293#name-tcp-keep-alives) provides a way to detect whether a TCP connection is alive or dead, which can be useful for reducing system resources by cleaning up dead connections. There is full support of TCP Keep-Alives on Linux and partial support on macOS in `redis` at present. This PR intends to complete the rest. ## Unix-like OS's support `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` are not included in the POSIX standard for `setsockopts`, while these three socket options are widely available on most Unix-like systems and Windows. ### References - [AIX](https://www.ibm.com/support/pages/ibm-aix-tcp-keepalive-probes) - [DragonflyBSD](https://man.dragonflybsd.org/?command=tcp§ion=4) - [FreeBSD](https://www.freebsd.org/cgi/man.cgi?query=tcp) - [HP-UX](https://docstore.mik.ua/manuals/hp-ux/en/B2355-60130/TCP.7P.html) - [illumos](https://illumos.org/man/4P/tcp) - [Linux](https://man7.org/linux/man-pages/man7/tcp.7.html) - [NetBSD](https://man.netbsd.org/NetBSD-8.0/tcp.4) - [Windows](https://learn.microsoft.com/en-us/windows/win32/winsock/ipproto-tcp-socket-options) ### Mac OS In earlier versions, macOS only supported setting `TCP_KEEPALIVE` (the equivalent of `TCP_KEEPIDLE` on other platforms), but since macOS 10.8 it has supported `TCP_KEEPINTVL` and `TCP_KEEPCNT`. Check out [this mailing list](https://lists.apple.com/archives/macnetworkprog/2012/Jul/msg00005.html) and [the source code](https://github.com/apple/darwin-xnu/blob/main/bsd/netinet/tcp.h#L215-L230) for more details. ### Solaris Solaris claimed it supported the TCP-Alives mechanism, but `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` were not available on Solaris until the latest version 11.4. Therefore, we need to simulate the TCP-Alives mechanism on other platforms via `TCP_KEEPALIVE_THRESHOLD` + `TCP_KEEPALIVE_ABORT_THRESHOLD`. - [Solaris 11.3](https://docs.oracle.com/cd/E86824_01/html/E54777/tcp-7p.html) - [Solaris 11.4](https://docs.oracle.com/cd/E88353_01/html/E37851/tcp-4p.html) --------- Co-authored-by: Oran Agra --- src/anet.c | 149 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 117 insertions(+), 32 deletions(-) diff --git a/src/anet.c b/src/anet.c index 369e1c641..6ed40b32e 100644 --- a/src/anet.c +++ b/src/anet.c @@ -130,57 +130,142 @@ int anetCloexec(int fd) { return r; } -/* Set TCP keep alive option to detect dead peers. The interval option - * is only used for Linux as we are using Linux-specific APIs to set - * the probe send time, interval, and count. */ +/* Enable TCP keep-alive mechanism to detect dead peers, + * TCP_KEEPIDLE, TCP_KEEPINTVL and TCP_KEEPCNT will be set accordingly. */ int anetKeepAlive(char *err, int fd, int interval) { - int val = 1; - - if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) == -1) + int enabled = 1; + if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &enabled, sizeof(enabled))) { anetSetError(err, "setsockopt SO_KEEPALIVE: %s", strerror(errno)); return ANET_ERR; } -#ifdef __linux__ - /* Default settings are more or less garbage, with the keepalive time - * set to 7200 by default on Linux. Modify settings to make the feature - * actually useful. */ + int idle; + int intvl; + int cnt; - /* Send first probe after interval. */ - val = interval; - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0) { - anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno)); - return ANET_ERR; - } +/* There are platforms that are expected to support the full mechanism of TCP keep-alive, + * we want the compiler to emit warnings of unused variables if the preprocessor directives + * somehow fail, and other than those platforms, just omit these warnings if they happen. + */ +#if !(defined(_AIX) || defined(__APPLE__) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__illumos__) || defined(__linux__) || \ + defined(__NetBSD__) || defined(__sun)) + UNUSED(interval); + UNUSED(idle); + UNUSED(intvl); + UNUSED(cnt); +#endif - /* Send next probes after the specified interval. Note that we set the - * delay as interval / 3, as we send three probes before detecting - * an error (see the next setsockopt call). */ - val = interval/3; - if (val == 0) val = 1; - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0) { +/* The implementation of TCP keep-alive on Solaris/SmartOS is a bit unusual + * compared to other Unix-like systems. + * Thus, we need to specialize it on Solaris. */ +#ifdef __sun + /* There are two keep-alive mechanisms on Solaris: + * - By default, the first keep-alive probe is sent out after a TCP connection is idle for two hours. + * If the peer does not respond to the probe within eight minutes, the TCP connection is aborted. + * You can alter the interval for sending out the first probe using the socket option TCP_KEEPALIVE_THRESHOLD + * in milliseconds or TCP_KEEPIDLE in seconds. + * The system default is controlled by the TCP ndd parameter tcp_keepalive_interval. The minimum value is ten seconds. + * The maximum is ten days, while the default is two hours. If you receive no response to the probe, + * you can use the TCP_KEEPALIVE_ABORT_THRESHOLD socket option to change the time threshold for aborting a TCP connection. + * The option value is an unsigned integer in milliseconds. The value zero indicates that TCP should never time out and + * abort the connection when probing. The system default is controlled by the TCP ndd parameter tcp_keepalive_abort_interval. + * The default is eight minutes. + + * - The second implementation is activated if socket option TCP_KEEPINTVL and/or TCP_KEEPCNT are set. + * The time between each consequent probes is set by TCP_KEEPINTVL in seconds. + * The minimum value is ten seconds. The maximum is ten days, while the default is two hours. + * The TCP connection will be aborted after certain amount of probes, which is set by TCP_KEEPCNT, without receiving response. + */ + + idle = interval; + if (idle < 10) idle = 10; // kernel expects at least 10 seconds + if (idle > 10*24*60*60) idle = 10*24*60*60; // kernel expects at most 10 days + + /* `TCP_KEEPIDLE`, `TCP_KEEPINTVL`, and `TCP_KEEPCNT` were not available on Solaris + * until version 11.4, but let's take a chance here. */ + #if defined(TCP_KEEPIDLE) && defined(TCP_KEEPINTVL) && defined(TCP_KEEPCNT) + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) { + anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno)); + return ANET_ERR; + } + intvl = idle/3; + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) { + anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno)); + return ANET_ERR; + } + cnt = 3; + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) { + anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno)); + return ANET_ERR; + } + return ANET_OK; + #endif + + /* Fall back to the first implementation of tcp-alive mechanism for older Solaris, + * simulate the tcp-alive mechanism on other platforms via `TCP_KEEPALIVE_THRESHOLD` + `TCP_KEEPALIVE_ABORT_THRESHOLD`. + */ + idle *= 1000; // kernel expects milliseconds + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_THRESHOLD, &idle, sizeof(idle))) { anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno)); return ANET_ERR; } - /* Consider the socket in error state after three we send three ACK - * probes without getting a reply. */ - val = 3; - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0) { + /* Note that the consequent probes will not be sent at equal intervals on Solaris, + * but will be sent using the exponential backoff algorithm. */ + intvl = idle/3; + cnt = 3; + int time_to_abort = intvl * cnt; + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE_ABORT_THRESHOLD, &time_to_abort, sizeof(time_to_abort))) { anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno)); return ANET_ERR; } -#elif defined(__APPLE__) - /* Set idle time with interval */ - val = interval; - if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &val, sizeof(val)) < 0) { + + return ANET_OK; +#endif + +#ifdef TCP_KEEPIDLE + /* Default settings are more or less garbage, with the keepalive time + * set to 7200 by default on Linux and other Unix-like systems. + * Modify settings to make the feature actually useful. */ + + /* Send first probe after interval. */ + idle = interval; + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &idle, sizeof(idle))) { + anetSetError(err, "setsockopt TCP_KEEPIDLE: %s\n", strerror(errno)); + return ANET_ERR; + } +#elif defined(TCP_KEEPALIVE) + /* Darwin/macOS uses TCP_KEEPALIVE in place of TCP_KEEPIDLE. */ + idle = interval; + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &idle, sizeof(idle))) { anetSetError(err, "setsockopt TCP_KEEPALIVE: %s\n", strerror(errno)); return ANET_ERR; } -#else - ((void) interval); /* Avoid unused var warning for non Linux systems. */ +#endif + +#ifdef TCP_KEEPINTVL + /* Send next probes after the specified interval. Note that we set the + * delay as interval / 3, as we send three probes before detecting + * an error (see the next setsockopt call). */ + intvl = interval/3; + if (intvl == 0) intvl = 1; + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl))) { + anetSetError(err, "setsockopt TCP_KEEPINTVL: %s\n", strerror(errno)); + return ANET_ERR; + } +#endif + +#ifdef TCP_KEEPCNT + /* Consider the socket in error state after three we send three ACK + * probes without getting a reply. */ + cnt = 3; + if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt))) { + anetSetError(err, "setsockopt TCP_KEEPCNT: %s\n", strerror(errno)); + return ANET_ERR; + } #endif return ANET_OK;