From 29b83f1ac8dd80a9c3214c1e1f0ff3b7730fb612 Mon Sep 17 00:00:00 2001
From: ranshid <88133677+ranshid@users.noreply.github.com>
Date: Mon, 21 Oct 2024 12:56:44 +0300
Subject: [PATCH] Introduce bgsave cancel (#757)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some cases bgsave child process can run for a long time exhausting
system resources. Although it is possible to kill the bgsave child
process from the system shell, sometimes it is not possible allowing OS
level access.

This PR adds a new subcommand to the BGSAVE command.
When user will issue `BGSAVE CANCEL`, it will do one of the 2:

1. In case a bgsave child process is currently running, the child
   process would be immediately killed thus terminating any
   save/replication full sync process.
2. In case a bgsave child process is SCHEDULED to run, the scheduled
   execution will be cancelled.

---------

Signed-off-by: ranshid <ranshid@amazon.com>
Signed-off-by: ranshid <88133677+ranshid@users.noreply.github.com>
Signed-off-by: Ran Shidlansik <ranshid@amazon.com>
Signed-off-by: Binbin <binloveplay1314@qq.com>
Co-authored-by: Binbin <binloveplay1314@qq.com>
Co-authored-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
---
 src/commands.def          | 11 ++++++--
 src/commands/bgsave.json  | 30 +++++++++++++++++---
 src/rdb.c                 | 20 ++++++++++++++
 tests/integration/rdb.tcl | 58 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 113 insertions(+), 6 deletions(-)

diff --git a/src/commands.def b/src/commands.def
index cd9f8e298..98a9b40f0 100644
--- a/src/commands.def
+++ b/src/commands.def
@@ -6408,6 +6408,7 @@ struct COMMAND_STRUCT ACL_Subcommands[] = {
 /* BGSAVE history */
 commandHistory BGSAVE_History[] = {
 {"3.2.2","Added the `SCHEDULE` option."},
+{"8.0.0","Added the `CANCEL` option."},
 };
 #endif
 
@@ -6421,9 +6422,15 @@ commandHistory BGSAVE_History[] = {
 #define BGSAVE_Keyspecs NULL
 #endif
 
+/* BGSAVE operation argument table */
+struct COMMAND_ARG BGSAVE_operation_Subargs[] = {
+{MAKE_ARG("schedule",ARG_TYPE_PURE_TOKEN,-1,"SCHEDULE",NULL,"3.2.2",CMD_ARG_NONE,0,NULL)},
+{MAKE_ARG("cancel",ARG_TYPE_PURE_TOKEN,-1,"CANCEL",NULL,"8.0.0",CMD_ARG_NONE,0,NULL)},
+};
+
 /* BGSAVE argument table */
 struct COMMAND_ARG BGSAVE_Args[] = {
-{MAKE_ARG("schedule",ARG_TYPE_PURE_TOKEN,-1,"SCHEDULE",NULL,"3.2.2",CMD_ARG_OPTIONAL,0,NULL)},
+{MAKE_ARG("operation",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_OPTIONAL,2,NULL),.subargs=BGSAVE_operation_Subargs},
 };
 
 /********** COMMAND COUNT ********************/
@@ -10989,7 +10996,7 @@ struct COMMAND_STRUCT serverCommandTable[] = {
 /* server */
 {MAKE_CMD("acl","A container for Access List Control commands.","Depends on subcommand.","6.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,ACL_History,0,ACL_Tips,0,NULL,-2,CMD_SENTINEL,0,ACL_Keyspecs,0,NULL,0),.subcommands=ACL_Subcommands},
 {MAKE_CMD("bgrewriteaof","Asynchronously rewrites the append-only file to disk.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,BGREWRITEAOF_History,0,BGREWRITEAOF_Tips,0,bgrewriteaofCommand,1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT,0,BGREWRITEAOF_Keyspecs,0,NULL,0)},
-{MAKE_CMD("bgsave","Asynchronously saves the database(s) to disk.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,BGSAVE_History,1,BGSAVE_Tips,0,bgsaveCommand,-1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT,0,BGSAVE_Keyspecs,0,NULL,1),.args=BGSAVE_Args},
+{MAKE_CMD("bgsave","Asynchronously saves the database(s) to disk.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,BGSAVE_History,2,BGSAVE_Tips,0,bgsaveCommand,-1,CMD_NO_ASYNC_LOADING|CMD_ADMIN|CMD_NOSCRIPT,0,BGSAVE_Keyspecs,0,NULL,1),.args=BGSAVE_Args},
 {MAKE_CMD("command","Returns detailed information about all commands.","O(N) where N is the total number of commands","2.8.13",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,COMMAND_History,0,COMMAND_Tips,1,commandCommand,-1,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,COMMAND_Keyspecs,0,NULL,0),.subcommands=COMMAND_Subcommands},
 {MAKE_CMD("config","A container for server configuration commands.","Depends on subcommand.","2.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,CONFIG_History,0,CONFIG_Tips,0,NULL,-2,0,0,CONFIG_Keyspecs,0,NULL,0),.subcommands=CONFIG_Subcommands},
 {MAKE_CMD("dbsize","Returns the number of keys in the database.","O(1)","1.0.0",CMD_DOC_NONE,NULL,NULL,"server",COMMAND_GROUP_SERVER,DBSIZE_History,0,DBSIZE_Tips,2,dbsizeCommand,1,CMD_READONLY|CMD_FAST,ACL_CATEGORY_KEYSPACE,DBSIZE_Keyspecs,0,NULL,0)},
diff --git a/src/commands/bgsave.json b/src/commands/bgsave.json
index f73d8a89b..cf1c92047 100644
--- a/src/commands/bgsave.json
+++ b/src/commands/bgsave.json
@@ -10,6 +10,10 @@
             [
                 "3.2.2",
                 "Added the `SCHEDULE` option."
+            ],
+            [
+                "8.0.0",
+                "Added the `CANCEL` option."
             ]
         ],
         "command_flags": [
@@ -19,11 +23,23 @@
         ],
         "arguments": [
             {
-                "name": "schedule",
-                "token": "SCHEDULE",
-                "type": "pure-token",
+                "name": "operation",
+                "type": "oneof",
                 "optional": true,
-                "since": "3.2.2"
+                "arguments": [
+                    {
+                        "name": "schedule",
+                        "token": "SCHEDULE",
+                        "type": "pure-token",
+                        "since": "3.2.2"
+                    },
+                    {
+                        "name": "cancel",
+                        "token": "CANCEL",
+                        "type": "pure-token",
+                        "since": "8.0.0"
+                    }
+                ]
             }
         ],
         "reply_schema": {
@@ -33,6 +49,12 @@
                 },
                 {
                     "const": "Background saving scheduled"
+                },
+                {
+                    "const": "Background saving cancelled"
+                },
+                {
+                    "const": "Scheduled background saving cancelled"
                 }
             ]
         }
diff --git a/src/rdb.c b/src/rdb.c
index bc2d03e86..e5ec4d8f3 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -3689,6 +3689,21 @@ void bgsaveCommand(client *c) {
     if (c->argc > 1) {
         if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr, "schedule")) {
             schedule = 1;
+        } else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr, "cancel")) {
+            /* Terminates an in progress BGSAVE */
+            if (server.child_type == CHILD_TYPE_RDB) {
+                /* There is an ongoing bgsave */
+                serverLog(LL_NOTICE, "Background saving will be aborted due to user request");
+                killRDBChild();
+                addReplyStatus(c, "Background saving cancelled");
+            } else if (server.rdb_bgsave_scheduled == 1) {
+                serverLog(LL_NOTICE, "Scheduled background saving will be cancelled due to user request");
+                server.rdb_bgsave_scheduled = 0;
+                addReplyStatus(c, "Scheduled background saving cancelled");
+            } else {
+                addReplyError(c, "Background saving is currently not in progress or scheduled");
+            }
+            return;
         } else {
             addReplyErrorObject(c, shared.syntaxerr);
             return;
@@ -3703,6 +3718,11 @@ void bgsaveCommand(client *c) {
     } else if (hasActiveChildProcess() || server.in_exec) {
         if (schedule || server.in_exec) {
             server.rdb_bgsave_scheduled = 1;
+            if (schedule) {
+                serverLog(LL_NOTICE, "Background saving scheduled due to user request");
+            } else {
+                serverLog(LL_NOTICE, "Background saving scheduled to run after transaction execution");
+            }
             addReplyStatus(c, "Background saving scheduled");
         } else {
             addReplyError(c, "Another child process is active (AOF?): can't BGSAVE right now. "
diff --git a/tests/integration/rdb.tcl b/tests/integration/rdb.tcl
index e3f92bf52..61cb0cea7 100644
--- a/tests/integration/rdb.tcl
+++ b/tests/integration/rdb.tcl
@@ -170,6 +170,64 @@ start_server {} {
         }
         assert_equal [s rdb_changes_since_last_save] 0
     }
+
+    test {bgsave cancel aborts save} {
+        r config set save ""
+        # Generating RDB will take some 100 seconds
+        r config set rdb-key-save-delay 1000000
+        populate 100 "" 16
+
+        r bgsave
+        wait_for_condition 50 100 {
+            [s rdb_bgsave_in_progress] == 1
+        } else {
+            fail "bgsave did not start in time"
+        }
+        set fork_child_pid [get_child_pid 0]
+        
+        assert {[r bgsave cancel] eq {Background saving cancelled}}
+        set temp_rdb [file join [lindex [r config get dir] 1] temp-${fork_child_pid}.rdb]
+        # Temp rdb must be deleted
+        wait_for_condition 50 100 {
+            ![file exists $temp_rdb]
+        } else {
+            fail "bgsave temp file was not deleted after cancel"
+        }
+
+         # Make sure no save is running and that bgsave return an error
+         wait_for_condition 50 100 {
+            [s rdb_bgsave_in_progress] == 0
+        } else {
+            fail "bgsave is currently running"
+        }
+        assert_error "ERR Background saving is currently not in progress or scheduled" {r bgsave cancel}
+    }
+
+    test {bgsave cancel schedulled request} {
+        r config set save ""
+        # Generating RDB will take some 100 seconds
+        r config set rdb-key-save-delay 1000000
+        populate 100 "" 16
+
+        # start a long AOF child
+        r bgrewriteaof
+        wait_for_condition 50 100 {
+            [s aof_rewrite_in_progress] == 1
+        } else {
+            fail "aof not started"
+        }
+        
+        # Make sure cancel return valid status
+        assert {[r bgsave schedule] eq {Background saving scheduled}}
+
+        # Cancel the scheduled save
+        assert {[r bgsave cancel] eq {Scheduled background saving cancelled}}
+
+        # Make sure a second call to bgsave cancel return an error
+        assert_error "ERR Background saving is currently not in progress or scheduled" {r bgsave cancel}
+    }
+
+
 }
 
 test {client freed during loading} {