From 624742d9b40d20afe8b694c2dc0b0349f8b7aaad Mon Sep 17 00:00:00 2001
From: antirez <antirez@gmail.com>
Date: Sat, 16 May 2020 17:15:35 +0200
Subject: [PATCH] Remove the client from CLOSE_ASAP list before caching the
 master.

This was broken in 1a7cd2c: we identified a crash in the CI, what
was happening before the fix should be like that:

1. The client gets in the async free list.
2. However freeClient() gets called again against the same client
   which is a master.
3. The client arrived in freeClient() with the CLOSE_ASAP flag set.
4. The master gets cached, but NOT removed from the CLOSE_ASAP linked
   list.
5. The master client that was cached was immediately removed since it
   was still in the list.
6. Redis accessed a freed cached master.

This is how the crash looked like:

=== REDIS BUG REPORT START: Cut & paste starting from here ===
1092:S 16 May 2020 11:44:09.731 # Redis 999.999.999 crashed by signal: 11
1092:S 16 May 2020 11:44:09.731 # Crashed running the instruction at: 0x447e18
1092:S 16 May 2020 11:44:09.731 # Accessing address: 0xffffffffffffffff
1092:S 16 May 2020 11:44:09.731 # Failed assertion:  (:0)

------ STACK TRACE ------
EIP:
src/redis-server 127.0.0.1:21300(readQueryFromClient+0x48)[0x447e18]

And the 0xffff address access likely comes from accessing an SDS that is
set to NULL (we go -1 offset to read the header).
---
 src/networking.c  | 21 ++++++++++++---------
 src/replication.c |  3 ---
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/networking.c b/src/networking.c
index 6de00be09..0a04c0489 100644
--- a/src/networking.c
+++ b/src/networking.c
@@ -1134,6 +1134,16 @@ void freeClient(client *c) {
     /* Notify module system that this client auth status changed. */
     moduleNotifyUserChanged(c);
 
+    /* If this client was scheduled for async freeing we need to remove it
+     * from the queue. Note that we need to do this here, because later
+     * we may call replicationCacheMaster() and the client should already
+     * be removed from the list of clients to free. */
+    if (c->flags & CLIENT_CLOSE_ASAP) {
+        ln = listSearchKey(server.clients_to_close,c);
+        serverAssert(ln != NULL);
+        listDelNode(server.clients_to_close,ln);
+    }
+
     /* If it is our master that's beging disconnected we should make sure
      * to cache the state to try a partial resynchronization later.
      *
@@ -1142,6 +1152,7 @@ void freeClient(client *c) {
     if (server.master && c->flags & CLIENT_MASTER) {
         serverLog(LL_WARNING,"Connection with master lost.");
         if (!(c->flags & (CLIENT_PROTOCOL_ERROR|CLIENT_BLOCKED))) {
+            c->flags &= ~(CLIENT_CLOSE_ASAP|CLIENT_CLOSE_AFTER_REPLY);
             replicationCacheMaster(c);
             return;
         }
@@ -1209,15 +1220,7 @@ void freeClient(client *c) {
      * we lost the connection with the master. */
     if (c->flags & CLIENT_MASTER) replicationHandleMasterDisconnection();
 
-    /* If this client was scheduled for async freeing we need to remove it
-     * from the queue. */
-    if (c->flags & CLIENT_CLOSE_ASAP) {
-        ln = listSearchKey(server.clients_to_close,c);
-        serverAssert(ln != NULL);
-        listDelNode(server.clients_to_close,ln);
-    }
-
-    /* Remove the contribution that this client gave to our
+   /* Remove the contribution that this client gave to our
      * incrementally computed memory usage. */
     server.stat_clients_type_memory[c->client_cron_last_memory_type] -=
         c->client_cron_last_memory_usage;
diff --git a/src/replication.c b/src/replication.c
index f433d6413..b10635f25 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -2688,9 +2688,6 @@ void replicationCacheMaster(client *c) {
     /* Unlink the client from the server structures. */
     unlinkClient(c);
 
-    /* Clear flags that can create issues once we reconnect the client. */
-    c->flags &= ~(CLIENT_CLOSE_ASAP|CLIENT_CLOSE_AFTER_REPLY);
-
     /* Reset the master client so that's ready to accept new commands:
      * we want to discard te non processed query buffers and non processed
      * offsets, including pending transactions, already populated arguments,