From 08fca5ef31a1c7cdc8ee8f9c6ebfe0907ceffad3 Mon Sep 17 00:00:00 2001
From: John Sully <john@csquare.ca>
Date: Sun, 31 May 2020 23:22:25 -0400
Subject: [PATCH] sendfile has high latency in some scenarios, don't use it

Former-commit-id: 1eb0e3c1c604e71c54423f1d11b8c709c847a516
---
 src/config.h                                  |  6 ---
 src/replication.cpp                           | 50 -------------------
 tests/integration/replication-multimaster.tcl |  2 +-
 3 files changed, 1 insertion(+), 57 deletions(-)

diff --git a/src/config.h b/src/config.h
index 08b9101cb..022cb0033 100644
--- a/src/config.h
+++ b/src/config.h
@@ -139,12 +139,6 @@ void setproctitle(const char *fmt, ...);
 /* Byte ordering detection */
 #include <sys/types.h> /* This will likely define BYTE_ORDER */
 
-/* Define redis_sendfile. */
-#if defined(__linux__) || (defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_5))
-#define HAVE_SENDFILE 1
-ssize_t redis_sendfile(int out_fd, int in_fd, off_t offset, size_t count);
-#endif
-
 #ifndef BYTE_ORDER
 #if (BSD >= 199103)
 # include <machine/endian.h>
diff --git a/src/replication.cpp b/src/replication.cpp
index 2f8a74837..67a550cf8 100644
--- a/src/replication.cpp
+++ b/src/replication.cpp
@@ -1334,40 +1334,6 @@ void removeRDBUsedToSyncReplicas(void) {
     }
 }
 
-#if HAVE_SENDFILE
-/* Implements redis_sendfile to transfer data between file descriptors and
- * avoid transferring data to and from user space.
- * 
- * The function prototype is just like sendfile(2) on Linux. in_fd is a file
- * descriptor opened for reading and out_fd is a descriptor opened for writing.
- * offset specifies where to start reading data from in_fd. count is the number
- * of bytes to copy between the file descriptors.
- * 
- * The return value is the number of bytes written to out_fd, if the transfer
- * was successful. On error, -1 is returned, and errno is set appropriately. */
-#if defined(__linux__)
-#include <sys/sendfile.h>
-#endif
-ssize_t redis_sendfile(int out_fd, int in_fd, off_t offset, size_t count) {
-#if defined(__linux__)
-    return sendfile(out_fd, in_fd, &offset, count);
-
-#elif defined(__APPLE__)
-    off_t len = count;
-    /* Notice that it may return -1 and errno is set to EAGAIN even if some
-     * bytes have been sent successfully and the len argument is set correctly
-     * when using a socket marked for non-blocking I/O. */
-    if (sendfile(in_fd, out_fd, offset, &len, NULL, 0) == -1 &&
-        errno != EAGAIN) return -1;
-    else
-        return (ssize_t)len;
-
-#endif
-    errno = ENOSYS;
-    return -1;
-}
-#endif
-
 void sendBulkToSlave(connection *conn) {
     client *replica = (client*)connGetPrivateData(conn);
     serverAssert(FCorrectThread(replica));
@@ -1404,22 +1370,6 @@ void sendBulkToSlave(connection *conn) {
      * try to use sendfile system call if supported, unless tls is enabled.
      * fallback to normal read+write otherwise. */
     nwritten = 0;
-#if HAVE_SENDFILE
-    if (!g_pserver->tls_replication && !g_pserver->fActiveReplica) { // sendfile blocks too long for active replication
-        if ((nwritten = redis_sendfile(conn->fd,replica->repldbfd,
-            replica->repldboff,PROTO_IOBUF_LEN)) == -1)
-        {
-            if (errno != EAGAIN) {
-                serverLog(LL_WARNING,"Sendfile error sending DB to replica: %s",
-                    strerror(errno));
-                ul.unlock();
-                aeLock.arm(nullptr);
-                freeClient(replica);
-            }
-            return;
-        }
-    }
-#endif
     if (!nwritten) {
         ssize_t buflen;
         char buf[PROTO_IOBUF_LEN];
diff --git a/tests/integration/replication-multimaster.tcl b/tests/integration/replication-multimaster.tcl
index 86e2ff92e..858cff26a 100644
--- a/tests/integration/replication-multimaster.tcl
+++ b/tests/integration/replication-multimaster.tcl
@@ -34,7 +34,7 @@ start_server {overrides {hz 500 active-replica yes multi-master yes}} {
     test "$topology all nodes up" {
         for {set j 0} {$j < 4} {incr j} {
             wait_for_condition 50 100 {
-                [string match {*all_master_link_status:up*} [$R($j) info replication]]
+                [string match {*master_global_link_status:up*} [$R($j) info replication]]
             } else {
                 fail "Multimaster group didn't connect up in a reasonable period of time"
             }