Merge pull request #6144 from madolson/dev-unstable-cluster-down

Add configuration option for allowing reads on cluster down
2019-12-17 09:15:20 +01:00 · 2019-12-17 09:15:20 +01:00 · f3d67b3842
commit f3d67b3842
parent e9b99c78df 7b3e3d6a13
7 changed files with 69 additions and 12 deletions
--- a/redis.conf
+++ b/redis.conf
@ -1194,6 +1194,22 @@ lua-time-limit 5000
 #
 # cluster-replica-no-failover no
 # This option, when set to yes, allows nodes to serve read traffic while the
 # the cluster is in a down state, as long as it believes it owns the slots. 
 #
 # This is useful for two cases.  The first case is for when an application 
 # doesn't require consistency of data during node failures or network partitions.
 # One example of this is a cache, where as long as the node has the data it
 # should be able to serve it. 
 #
 # The second use case is for configurations that don't meet the recommended  
 # three shards but want to enable cluster mode and scale later. A 
 # master outage in a 1 or 2 shard configuration causes a read/write outage to the
 # entire cluster without this option set, with it set there is only a write outage.
 # Without a quorum of masters, slot ownership will not change automatically. 
 #
 # cluster-allow-reads-when-down no
 # In order to setup your cluster make sure to read the documentation
 # available at http://redis.io web site.
--- a/src/cluster.c
+++ b/src/cluster.c
@ -5476,8 +5476,8 @@ void readwriteCommand(client *c) {
 * already "down" but it is fragile to rely on the update of the global state,
 * so we also handle it here.
 *
- * CLUSTER_REDIR_DOWN_STATE if the cluster is down but the user attempts to
+ * CLUSTER_REDIR_DOWN_STATE and CLUSTER_REDIR_DOWN_RO_STATE if the cluster is 
- * execute a command that addresses one or more keys. */
+ * down but the user attempts to execute a command that addresses one or more keys. */
 clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *error_code) {
    clusterNode *n = NULL;
    robj *firstkey = NULL;
@ -5595,10 +5595,19 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
     * without redirections or errors in all the cases. */
    if (n == NULL) return myself;
-    /* Cluster is globally down but we got keys? We can't serve the request. */
+    /* Cluster is globally down but we got keys? We only serve the request
     * if it is a read command and when allow_reads_when_down is enabled. */
    if (server.cluster->state != CLUSTER_OK) {
-        if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE;
+        if (!server.cluster_allow_reads_when_down) {
-        return NULL;
+            if (error_code) *error_code = CLUSTER_REDIR_DOWN_STATE;
            return NULL;
        }
        if (!(cmd->flags & CMD_READONLY) && !(cmd->proc == evalCommand) 
                && !(cmd->proc == evalShaCommand)) {
            if (error_code) *error_code = CLUSTER_REDIR_DOWN_RO_STATE;
            return NULL;
        }
    }
    /* Return the hashslot by reference. */
@ -5667,6 +5676,8 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
        addReplySds(c,sdsnew("-TRYAGAIN Multiple keys request during rehashing of slot\r\n"));
    } else if (error_code == CLUSTER_REDIR_DOWN_STATE) {
        addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down\r\n"));
    } else if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) {
        addReplySds(c,sdsnew("-CLUSTERDOWN The cluster is down and only accepts read commands\r\n"));
    } else if (error_code == CLUSTER_REDIR_DOWN_UNBOUND) {
        addReplySds(c,sdsnew("-CLUSTERDOWN Hash slot not served\r\n"));
    } else if (error_code == CLUSTER_REDIR_MOVED ||
@ -5701,7 +5712,10 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
        dictEntry *de;
        dictIterator *di;
-        /* If the cluster is down, unblock the client with the right error. */
+        /* If the cluster is down, unblock the client with the right error.
         * If the cluster is configured to allow reads on cluster down, we
         * still want to emit this error since a write will be required
         * to unblock them which may never come.  */
        if (server.cluster->state == CLUSTER_FAIL) {
            clusterRedirectClient(c,NULL,0,CLUSTER_REDIR_DOWN_STATE);
            return 1;
--- a/src/cluster.h
+++ b/src/cluster.h
@ -29,6 +29,7 @@
 #define CLUSTER_REDIR_MOVED 4         /* -MOVED redirection required. */
 #define CLUSTER_REDIR_DOWN_STATE 5    /* -CLUSTERDOWN, global state. */
 #define CLUSTER_REDIR_DOWN_UNBOUND 6  /* -CLUSTERDOWN, unbound slot. */
 #define CLUSTER_REDIR_DOWN_RO_STATE 7 /* -CLUSTERDOWN, allow reads. */
 struct clusterNode;
--- a/src/config.c
+++ b/src/config.c
@ -2166,6 +2166,8 @@ standardConfig configs[] = {
    createBoolConfig("syslog-enabled", NULL, IMMUTABLE_CONFIG, server.syslog_enabled, 0, NULL, NULL),
    createBoolConfig("cluster-enabled", NULL, IMMUTABLE_CONFIG, server.cluster_enabled, 0, NULL, NULL),
    createBoolConfig("appendonly", NULL, MODIFIABLE_CONFIG, server.aof_enabled, 0, NULL, updateAppendonly),
    createBoolConfig("cluster-allow-reads-when-down", NULL, MODIFIABLE_CONFIG, server.cluster_allow_reads_when_down, 0, NULL, NULL),
    /* String Configs */
    createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL),
--- a/src/module.c
+++ b/src/module.c
@ -3174,6 +3174,9 @@ fmterr:
 * EINVAL: wrong command arity.
 * ENOENT: command does not exist.
 * EPERM:  operation in Cluster instance with key in non local slot.
 * EROFS:  operation in Cluster instance when a write command is sent
 *         in a readonly state.
 * ENETDOWN: operation in Cluster instance when cluster is down.
 *
 * This API is documented here: https://redis.io/topics/modules-intro
 */
@ -3231,13 +3234,20 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch
     * trying to access non-local keys, with the exception of commands
     * received from our master. */
    if (server.cluster_enabled && !(ctx->client->flags & CLIENT_MASTER)) {
        int error_code;
        /* Duplicate relevant flags in the module client. */
        c->flags &= ~(CLIENT_READONLY|CLIENT_ASKING);
        c->flags |= ctx->client->flags & (CLIENT_READONLY|CLIENT_ASKING);
-        if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL) !=
+        if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,&error_code) !=
                           server.cluster->myself)
        {
-            errno = EPERM;
+            if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) { 
                errno = EROFS;
            } else if (error_code == CLUSTER_REDIR_DOWN_STATE) { 
                errno = ENETDOWN;
            } else {
                errno = EPERM;
            }
            goto cleanup;
        }
    }
--- a/src/scripting.c
+++ b/src/scripting.c
@ -679,15 +679,27 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
    if (server.cluster_enabled && !server.loading &&
        !(server.lua_caller->flags & CLIENT_MASTER))
    {
        int error_code;
        /* Duplicate relevant flags in the lua client. */
        c->flags &= ~(CLIENT_READONLY|CLIENT_ASKING);
        c->flags |= server.lua_caller->flags & (CLIENT_READONLY|CLIENT_ASKING);
-        if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,NULL) !=
+        if (getNodeByQuery(c,c->cmd,c->argv,c->argc,NULL,&error_code) !=
                           server.cluster->myself)
        {
-            luaPushError(lua,
+            if (error_code == CLUSTER_REDIR_DOWN_RO_STATE) { 
-                "Lua script attempted to access a non local key in a "
+                luaPushError(lua,
-                "cluster node");
+                    "Lua script attempted to execute a write command while the "
                    "cluster is down and readonly");
            } else if (error_code == CLUSTER_REDIR_DOWN_STATE) { 
                luaPushError(lua,
                    "Lua script attempted to execute a command while the "
                    "cluster is down");
            } else {
                luaPushError(lua,
                    "Lua script attempted to access a non local key in a "
                    "cluster node");
            }
            goto cleanup;
        }
    }
--- a/src/server.h
+++ b/src/server.h
@ -1334,6 +1334,8 @@ struct redisServer {
                                      to set in order to suppress certain
                                      native Redis Cluster features. Check the
                                      REDISMODULE_CLUSTER_FLAG_*. */
    int cluster_allow_reads_when_down; /* Are reads allowed when the cluster
                                        is down? */
    /* Scripting */
    lua_State *lua; /* The Lua interpreter. We use just one for all clients */
    client *lua_client;   /* The "fake client" to query Redis from Lua */