From f53e0337ef42d5ab6e02e12a4b31f097b427e02c Mon Sep 17 00:00:00 2001
From: John Sully <jsully@snapchat.com>
Date: Tue, 27 Jun 2023 15:37:28 -0400
Subject: [PATCH] Port load shedding and availability-zone (#202)

---
 keydb.conf     | 13 +++++++++++++
 src/config.cpp |  2 ++
 src/server.cpp | 37 +++++++++++++++++++++++++++++++++++--
 src/server.h   |  9 +++++++++
 4 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/keydb.conf b/keydb.conf
index 800657138..59ddf5abb 100644
--- a/keydb.conf
+++ b/keydb.conf
@@ -2088,3 +2088,16 @@ active-client-balancing yes
 # disk space or any other I/O error KeyDB will instead use memory.
 #
 # blob-support false
+
+# Begin load shedding if we use more than X% CPU relative to the number of server threads
+# E.g. if overload-protect-percent is set to 80 and there are 8 server-threads, then the 
+# actual CPU protection will be 8 * 100 * 0.80 = 640% CPU usage.
+#
+# Set to 0 to disable
+# overload-protect-percent 0
+
+# Inform KeyDB of the availability zone if running in a cloud environment.  Currently
+# this is only exposed via the info command for clients to use, but in the future we
+# we may also use this when making decisions for replication.
+#
+# availability-zone "us-east-1a"
\ No newline at end of file
diff --git a/src/config.cpp b/src/config.cpp
index 78cdb3877..ef901a5f8 100644
--- a/src/config.cpp
+++ b/src/config.cpp
@@ -2953,6 +2953,8 @@ standardConfig configs[] = {
     createBoolConfig("soft-shutdown", NULL, MODIFIABLE_CONFIG, g_pserver->config_soft_shutdown, 0, NULL, NULL),
     createBoolConfig("flash-disable-key-cache", NULL, MODIFIABLE_CONFIG, g_pserver->flash_disable_key_cache, 0, NULL, NULL),
     createSizeTConfig("semi-ordered-set-bucket-size", NULL, MODIFIABLE_CONFIG, 0, 1024, g_semiOrderedSetTargetBucketSize, 0, INTEGER_CONFIG, NULL, NULL),
+    createSDSConfig("availability-zone", NULL, MODIFIABLE_CONFIG, 0, g_pserver->sdsAvailabilityZone, "", NULL, NULL),
+    createIntConfig("overload-protect-percent", NULL, MODIFIABLE_CONFIG, 0, 200, g_pserver->overload_protect_threshold, 0, INTEGER_CONFIG, NULL, NULL),
 
 #ifdef USE_OPENSSL
     createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->tls_port, 0, INTEGER_CONFIG, NULL, updateTLSPort), /* TCP port. */
diff --git a/src/server.cpp b/src/server.cpp
index 95d19459b..fe1259aa2 100644
--- a/src/server.cpp
+++ b/src/server.cpp
@@ -1958,6 +1958,16 @@ void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) {
     *out_usage = o;
 }
 
+int closeClientOnOverload(client *c) {
+    if (g_pserver->overload_closed_clients > MAX_CLIENTS_SHED_PER_PERIOD) return false;
+    if (!g_pserver->is_overloaded) return false;
+    // Don't close masters, replicas, or pub/sub clients
+    if (c->flags & (CLIENT_MASTER | CLIENT_SLAVE | CLIENT_PENDING_WRITE | CLIENT_PUBSUB | CLIENT_BLOCKED)) return false;
+    freeClient(c);
+    ++g_pserver->overload_closed_clients;
+    return true;
+}
+
 /* This function is called by serverCron() and is used in order to perform
  * operations on clients that are important to perform constantly. For instance
  * we use this function in order to disconnect clients after a timeout, including
@@ -2028,6 +2038,7 @@ void clientsCron(int iel) {
             if (clientsCronTrackExpansiveClients(c, curr_peak_mem_usage_slot)) goto LContinue;
             if (clientsCronTrackClientsMemUsage(c)) goto LContinue;
             if (closeClientOnOutputBufferLimitReached(c, 0)) continue; // Client also free'd
+            if (closeClientOnOverload(c)) continue;
         LContinue:
             fastlock_unlock(&c->lock);
         }        
@@ -2581,6 +2592,26 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
         migrateCloseTimedoutSockets();
     }
 
+    /* Check for CPU Overload */
+    run_with_period(10'000) {
+        g_pserver->is_overloaded = false;
+        g_pserver->overload_closed_clients = 0;
+        static clock_t last = 0;
+        if (g_pserver->overload_protect_threshold > 0) {
+            clock_t cur = clock();
+            double perc = static_cast<double>(cur - last) / (CLOCKS_PER_SEC*10);
+            perc /= cserver.cthreads;
+            perc *= 100.0;
+            serverLog(LL_WARNING, "CPU Used: %.2f", perc);
+            if (perc > g_pserver->overload_protect_threshold) {
+                serverLog(LL_WARNING, "\tWARNING: CPU overload detected.");
+                g_pserver->is_overloaded = true;
+            }
+            last = cur;
+        }
+    }
+
+    /* Tune the fastlock to CPU load */
     run_with_period(30000) {
         /* Tune the fastlock to CPU load */
         fastlock_auto_adjust_waits();
@@ -5602,7 +5633,8 @@ sds genRedisInfoString(const char *section) {
             "configured_hz:%i\r\n"
             "lru_clock:%u\r\n"
             "executable:%s\r\n"
-            "config_file:%s\r\n",
+            "config_file:%s\r\n"
+            "availability_zone:%s\r\n",
             KEYDB_SET_VERSION,
             redisGitSHA1(),
             strtol(redisGitDirty(),NULL,10) > 0,
@@ -5628,7 +5660,8 @@ sds genRedisInfoString(const char *section) {
             g_pserver->config_hz,
             lruclock,
             cserver.executable ? cserver.executable : "",
-            cserver.configfile ? cserver.configfile : "");
+            cserver.configfile ? cserver.configfile : "",
+            g_pserver->sdsAvailabilityZone);
     }
 
     /* Clients */
diff --git a/src/server.h b/src/server.h
index 022cca2dd..a99529244 100644
--- a/src/server.h
+++ b/src/server.h
@@ -122,6 +122,9 @@ typedef long long ustime_t; /* microsecond time type. */
 #define LOADING_BOOT 1
 #define LOADING_REPLICATION 2
 
+#define OVERLOAD_PROTECT_PERIOD_MS 10'000 // 10 seconds
+#define MAX_CLIENTS_SHED_PER_PERIOD (OVERLOAD_PROTECT_PERIOD_MS / 10)  // Restrict to one client per 10ms
+
 extern int g_fTestMode;
 extern struct redisServer *g_pserver;
 
@@ -2744,6 +2747,12 @@ struct redisServer {
     uint16_t rglockSamples[s_lockContentionSamples];
     unsigned ilockRingHead = 0;
 
+
+    sds sdsAvailabilityZone;
+    int overload_protect_threshold = 0;
+    int is_overloaded = 0;
+    int overload_closed_clients = 0;
+
         int module_blocked_pipe[2]; /* Pipe used to awake the event loop if a
                             client blocked on a module command needs
                             to be processed. */