diff --git a/src/config.cpp b/src/config.cpp new file mode 100644 index 000000000..be12bdcd3 --- /dev/null +++ b/src/config.cpp @@ -0,0 +1,3066 @@ +/* Configuration file parsing and CONFIG GET/SET commands implementation. + * + * Copyright (c) 2009-2012, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include "storage/rocksdbfactory.h" +#include "storage/teststorageprovider.h" +#include "cluster.h" + +#include +#include +#include +#ifdef __linux__ +#include +#endif + +const char *KEYDB_SET_VERSION = KEYDB_REAL_VERSION; +size_t g_semiOrderedSetTargetBucketSize = 0; // Its a header only class so nowhere else for this to go + +/*----------------------------------------------------------------------------- + * Config file name-value maps. + *----------------------------------------------------------------------------*/ + +typedef struct configEnum { + const char *name; + const int val; +} configEnum; + +configEnum maxmemory_policy_enum[] = { + {"volatile-lru", MAXMEMORY_VOLATILE_LRU}, + {"volatile-lfu", MAXMEMORY_VOLATILE_LFU}, + {"volatile-random",MAXMEMORY_VOLATILE_RANDOM}, + {"volatile-ttl",MAXMEMORY_VOLATILE_TTL}, + {"allkeys-lru",MAXMEMORY_ALLKEYS_LRU}, + {"allkeys-lfu",MAXMEMORY_ALLKEYS_LFU}, + {"allkeys-random",MAXMEMORY_ALLKEYS_RANDOM}, + {"noeviction",MAXMEMORY_NO_EVICTION}, + {NULL, 0} +}; + +configEnum syslog_facility_enum[] = { + {"user", LOG_USER}, + {"local0", LOG_LOCAL0}, + {"local1", LOG_LOCAL1}, + {"local2", LOG_LOCAL2}, + {"local3", LOG_LOCAL3}, + {"local4", LOG_LOCAL4}, + {"local5", LOG_LOCAL5}, + {"local6", LOG_LOCAL6}, + {"local7", LOG_LOCAL7}, + {NULL, 0} +}; + +configEnum loglevel_enum[] = { + {"debug", LL_DEBUG}, + {"verbose", LL_VERBOSE}, + {"notice", LL_NOTICE}, + {"warning", LL_WARNING}, + {NULL,0} +}; + +configEnum supervised_mode_enum[] = { + {"upstart", SUPERVISED_UPSTART}, + {"systemd", SUPERVISED_SYSTEMD}, + {"auto", SUPERVISED_AUTODETECT}, + {"no", SUPERVISED_NONE}, + {NULL, 0} +}; + +configEnum aof_fsync_enum[] = { + {"everysec", AOF_FSYNC_EVERYSEC}, + {"always", AOF_FSYNC_ALWAYS}, + {"no", AOF_FSYNC_NO}, + {NULL, 0} +}; + +configEnum repl_diskless_load_enum[] = { + {"disabled", REPL_DISKLESS_LOAD_DISABLED}, + {"on-empty-db", REPL_DISKLESS_LOAD_WHEN_DB_EMPTY}, + {"swapdb", REPL_DISKLESS_LOAD_SWAPDB}, + {NULL, 0} +}; + +configEnum storage_memory_model_enum[] = { + {"writeback", STORAGE_WRITEBACK}, + {"writethrough", STORAGE_WRITETHROUGH}, +}; + +configEnum tls_auth_clients_enum[] = { + {"no", TLS_CLIENT_AUTH_NO}, + {"yes", TLS_CLIENT_AUTH_YES}, + {"optional", TLS_CLIENT_AUTH_OPTIONAL}, + {NULL, 0} +}; + +configEnum oom_score_adj_enum[] = { + {"no", OOM_SCORE_ADJ_NO}, + {"yes", OOM_SCORE_RELATIVE}, + {"relative", OOM_SCORE_RELATIVE}, + {"absolute", OOM_SCORE_ADJ_ABSOLUTE}, + {NULL, 0} +}; + +configEnum acl_pubsub_default_enum[] = { + {"allchannels", USER_FLAG_ALLCHANNELS}, + {"resetchannels", 0}, + {NULL, 0} +}; + +configEnum sanitize_dump_payload_enum[] = { + {"no", SANITIZE_DUMP_NO}, + {"yes", SANITIZE_DUMP_YES}, + {"clients", SANITIZE_DUMP_CLIENTS}, + {NULL, 0} +}; + +/* Output buffer limits presets. */ +clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = { + {0, 0, 0}, /* normal */ + {1024*1024*256, 1024*1024*64, 60}, /* replica */ + {1024*1024*32, 1024*1024*8, 60} /* pubsub */ +}; + +/* OOM Score defaults */ +int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT] = { 0, 200, 800 }; + +/* Generic config infrastructure function pointers + * int is_valid_fn(val, err) + * Return 1 when val is valid, and 0 when invalid. + * Optionally set err to a static error string. + * int update_fn(val, prev, err) + * This function is called only for CONFIG SET command (not at config file parsing) + * It is called after the actual config is applied, + * Return 1 for success, and 0 for failure. + * Optionally set err to a static error string. + * On failure the config change will be reverted. + */ + +/* Configuration values that require no special handling to set, get, load or + * rewrite. */ +typedef struct boolConfigData { + int *config; /* The pointer to the server config this value is stored in */ + int default_value; /* The default value of the config on rewrite */ + int (*is_valid_fn)(int val, const char **err); /* Optional function to check validity of new value (generic doc above) */ + int (*update_fn)(int val, int prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */ +} boolConfigData; + +typedef struct stringConfigData { + char **config; /* Pointer to the server config this value is stored in. */ + const char *default_value; /* Default value of the config on rewrite. */ + int (*is_valid_fn)(char* val, const char **err); /* Optional function to check validity of new value (generic doc above) */ + int (*update_fn)(char* val, char* prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */ + int convert_empty_to_null; /* Boolean indicating if empty strings should + be stored as a NULL value. */ +} stringConfigData; + +typedef struct sdsConfigData { + sds *config; /* Pointer to the server config this value is stored in. */ + const char *default_value; /* Default value of the config on rewrite. */ + int (*is_valid_fn)(sds val, const char **err); /* Optional function to check validity of new value (generic doc above) */ + int (*update_fn)(sds val, sds prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */ + int convert_empty_to_null; /* Boolean indicating if empty SDS strings should + be stored as a NULL value. */ +} sdsConfigData; + +typedef struct enumConfigData { + int *config; /* The pointer to the server config this value is stored in */ + configEnum *enum_value; /* The underlying enum type this data represents */ + int default_value; /* The default value of the config on rewrite */ + int (*is_valid_fn)(int val, const char **err); /* Optional function to check validity of new value (generic doc above) */ + int (*update_fn)(int val, int prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */ +} enumConfigData; + +typedef enum numericType { + NUMERIC_TYPE_INT, + NUMERIC_TYPE_UINT, + NUMERIC_TYPE_LONG, + NUMERIC_TYPE_ULONG, + NUMERIC_TYPE_LONG_LONG, + NUMERIC_TYPE_ULONG_LONG, + NUMERIC_TYPE_SIZE_T, + NUMERIC_TYPE_SSIZE_T, + NUMERIC_TYPE_OFF_T, + NUMERIC_TYPE_TIME_T, +} numericType; + +typedef struct numericConfigData { + int is_memory; /* Indicates if this value can be loaded as a memory value */ + long long lower_bound; /* The lower bound of this numeric value */ + long long upper_bound; /* The upper bound of this numeric value */ + long long default_value; /* The default value of the config on rewrite */ + int (*is_valid_fn)(long long val, const char **err); /* Optional function to check validity of new value (generic doc above) */ + int (*update_fn)(long long val, long long prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */ + numericType numeric_type; /* An enum indicating the type of this value */ + union { + int *i; + unsigned int *ui; + long *l; + unsigned long *ul; + long long *ll; + unsigned long long *ull; + size_t *st; + ssize_t *sst; + off_t *ot; + time_t *tt; + } config; /* The pointer to the numeric config this value is stored in */ +} numericConfigData; + +typedef union typeData { + boolConfigData yesno; + stringConfigData string; + sdsConfigData sds; + enumConfigData enumd; + numericConfigData numeric; +} typeData; + +typedef struct typeInterface { + /* Called on server start, to init the server with default value */ + void (*init)(typeData data); + /* Called on server startup and CONFIG SET, returns 1 on success, 0 on error + * and can set a verbose err string, update is true when called from CONFIG SET */ + int (*set)(typeData data, sds value, int update, const char **err); + /* Called on CONFIG GET, required to add output to the client */ + void (*get)(client *c, typeData data); + /* Called on CONFIG REWRITE, required to rewrite the config state */ + void (*rewrite)(typeData data, const char *name, struct rewriteConfigState *state); +} typeInterface; + +typedef struct standardConfig { + const char *name; /* The user visible name of this config */ + const char *alias; /* An alias that can also be used for this config */ + const unsigned int flags; /* Flags for this specific config */ + typeInterface interface; /* The function pointers that define the type interface */ + typeData data; /* The type specific data exposed used by the interface */ +} standardConfig; + +#define MODIFIABLE_CONFIG 0 /* This is the implied default for a standard + * config, which is mutable. */ +#define IMMUTABLE_CONFIG (1ULL<<0) /* Can this value only be set at startup? */ +#define SENSITIVE_CONFIG (1ULL<<1) /* Does this value contain sensitive information */ + +extern standardConfig configs[]; + +/*----------------------------------------------------------------------------- + * Enum access functions + *----------------------------------------------------------------------------*/ + +/* Get enum value from name. If there is no match INT_MIN is returned. */ +int configEnumGetValue(configEnum *ce, char *name) { + while(ce->name != NULL) { + if (!strcasecmp(ce->name,name)) return ce->val; + ce++; + } + return INT_MIN; +} + +/* Get enum name from value. If no match is found NULL is returned. */ +const char *configEnumGetName(configEnum *ce, int val) { + while(ce->name != NULL) { + if (ce->val == val) return ce->name; + ce++; + } + return NULL; +} + +/* Wrapper for configEnumGetName() returning "unknown" instead of NULL if + * there is no match. */ +const char *configEnumGetNameOrUnknown(configEnum *ce, int val) { + const char *name = configEnumGetName(ce,val); + return name ? name : "unknown"; +} + +/* Used for INFO generation. */ +const char *evictPolicyToString(void) { + return configEnumGetNameOrUnknown(maxmemory_policy_enum,g_pserver->maxmemory_policy); +} + +/*----------------------------------------------------------------------------- + * Config file parsing + *----------------------------------------------------------------------------*/ + +int truefalsetoi(char *s) { + if (!strcasecmp(s,"true")) return 1; + else if (!strcasecmp(s,"false")) return 0; + else return -1; +} + +int yesnotoi(char *s) { + if (!strcasecmp(s,"yes")) return 1; + else if (!strcasecmp(s,"no")) return 0; + else return truefalsetoi(s); +} + + +void appendServerSaveParams(time_t seconds, int changes) { + g_pserver->saveparams = (saveparam*)zrealloc(g_pserver->saveparams,sizeof(struct saveparam)*(g_pserver->saveparamslen+1), MALLOC_LOCAL); + g_pserver->saveparams[g_pserver->saveparamslen].seconds = seconds; + g_pserver->saveparams[g_pserver->saveparamslen].changes = changes; + g_pserver->saveparamslen++; +} + +void resetServerSaveParams(void) { + zfree(g_pserver->saveparams); + g_pserver->saveparams = NULL; + g_pserver->saveparamslen = 0; +} + +void queueLoadModule(sds path, sds *argv, int argc) { + int i; + struct moduleLoadQueueEntry *loadmod; + + loadmod = (moduleLoadQueueEntry*)zmalloc(sizeof(struct moduleLoadQueueEntry), MALLOC_LOCAL); + loadmod->argv = (robj**)zmalloc(sizeof(robj*)*argc, MALLOC_LOCAL); + loadmod->path = sdsnew(path); + loadmod->argc = argc; + for (i = 0; i < argc; i++) { + loadmod->argv[i] = createRawStringObject(argv[i],sdslen(argv[i])); + } + listAddNodeTail(g_pserver->loadmodule_queue,loadmod); +} + +sds g_sdsProvider = nullptr; +sds g_sdsArgs = nullptr; + +bool initializeStorageProvider(const char **err) +{ + try + { + bool fTest = false; + if (g_sdsProvider == nullptr) + return true; + if (!strcasecmp(g_sdsProvider, "flash") && g_sdsArgs != nullptr) + { +#ifdef ENABLE_ROCKSDB + // Create The Storage Factory (if necessary) + serverLog(LL_NOTICE, "Initializing FLASH storage provider (this may take a long time)"); + adjustOpenFilesLimit(); + g_pserver->m_pstorageFactory = CreateRocksDBStorageFactory(g_sdsArgs, cserver.dbnum, cserver.storage_conf, cserver.storage_conf ? strlen(cserver.storage_conf) : 0); +#else + serverLog(LL_WARNING, "To use the flash storage provider please compile KeyDB with ENABLE_FLASH=yes"); + serverLog(LL_WARNING, "Exiting due to the use of an unsupported storage provider"); + exit(EXIT_FAILURE); +#endif + } + else if (!strcasecmp(g_sdsProvider, "test") && g_sdsArgs == nullptr) + { + g_pserver->m_pstorageFactory = new (MALLOC_LOCAL) TestStorageFactory(); + fTest = true; + } + + if (g_pserver->m_pstorageFactory != nullptr && !fTest) + { + // We need to set max memory to a sane default so keys are actually evicted properly + if (g_pserver->maxmemory == 0 && g_pserver->maxmemory_policy == MAXMEMORY_NO_EVICTION) + { +#ifdef __linux__ + struct sysinfo sys; + if (sysinfo(&sys) == 0) + { + // By default it's a little under half the memory. This gives sufficient room for background saving + g_pserver->maxmemory = static_cast(sys.totalram / 2.2); + g_pserver->maxmemory_policy = MAXMEMORY_ALLKEYS_LRU; + } +#else + serverLog(LL_WARNING, "Unable to dynamically set maxmemory, please set maxmemory and maxmemory-policy if you are using a storage provier"); +#endif + } + else if (g_pserver->maxmemory_policy == MAXMEMORY_NO_EVICTION) + { + g_pserver->maxmemory_policy = MAXMEMORY_ALLKEYS_LRU; + } + } + else + { + *err = "Unknown storage provider"; + } + return g_pserver->m_pstorageFactory != nullptr; + } + catch(std::string str) + { + serverLog(LL_WARNING, "ERROR: Failed to initialize %s storage provider. Details to follow below.", g_sdsProvider); + serverLog(LL_WARNING, "\t%s", str.c_str()); + serverLog(LL_WARNING, "KeyDB cannot start. Exiting."); + exit(EXIT_FAILURE); + } +} + +/* Parse an array of CONFIG_OOM_COUNT sds strings, validate and populate + * g_pserver->oom_score_adj_values if valid. + */ + +static int updateOOMScoreAdjValues(sds *args, const char **err, int apply) { + int i; + int values[CONFIG_OOM_COUNT]; + + for (i = 0; i < CONFIG_OOM_COUNT; i++) { + char *eptr; + long long val = strtoll(args[i], &eptr, 10); + + if (*eptr != '\0' || val < -2000 || val > 2000) { + if (err) *err = "Invalid oom-score-adj-values, elements must be between -2000 and 2000."; + return C_ERR; + } + + values[i] = val; + } + + /* Verify that the values make sense. If they don't omit a warning but + * keep the configuration, which may still be valid for privileged processes. + */ + + if (values[CONFIG_OOM_REPLICA] < values[CONFIG_OOM_MASTER] || + values[CONFIG_OOM_BGCHILD] < values[CONFIG_OOM_REPLICA]) { + serverLog(LL_WARNING, + "The oom-score-adj-values configuration may not work for non-privileged processes! " + "Please consult the documentation."); + } + + /* Store values, retain previous config for rollback in case we fail. */ + int old_values[CONFIG_OOM_COUNT]; + for (i = 0; i < CONFIG_OOM_COUNT; i++) { + old_values[i] = g_pserver->oom_score_adj_values[i]; + g_pserver->oom_score_adj_values[i] = values[i]; + } + + /* When parsing the config file, we want to apply only when all is done. */ + if (!apply) + return C_OK; + + /* Update */ + if (setOOMScoreAdj(-1) == C_ERR) { + /* Roll back */ + for (i = 0; i < CONFIG_OOM_COUNT; i++) + g_pserver->oom_score_adj_values[i] = old_values[i]; + + if (err) + *err = "Failed to apply oom-score-adj-values configuration, check server logs."; + + return C_ERR; + } + + return C_OK; +} + +void initConfigValues() { + for (standardConfig *config = configs; config->name != NULL; config++) { + config->interface.init(config->data); + } +} + +void loadServerConfigFromString(char *config) { + const char *err = NULL; + int linenum = 0, totlines, i; + int slaveof_linenum = 0; + sds *lines; + int save_loaded = 0; + + lines = sdssplitlen(config,strlen(config),"\n",1,&totlines); + + for (i = 0; i < totlines; i++) { + sds *argv; + int argc; + + linenum = i+1; + lines[i] = sdstrim(lines[i]," \t\r\n"); + + /* Skip comments and blank lines */ + if (lines[i][0] == '#' || lines[i][0] == '\0') continue; + + /* Split into arguments */ + argv = sdssplitargs(lines[i],&argc); + if (argv == NULL) { + err = "Unbalanced quotes in configuration line"; + goto loaderr; + } + + /* Skip this line if the resulting command vector is empty. */ + if (argc == 0) { + sdsfreesplitres(argv,argc); + continue; + } + sdstolower(argv[0]); + + /* Iterate the configs that are standard */ + int match = 0; + for (standardConfig *config = configs; config->name != NULL; config++) { + if ((!strcasecmp(argv[0],config->name) || + (config->alias && !strcasecmp(argv[0],config->alias)))) + { + if (argc != 2) { + err = "wrong number of arguments"; + goto loaderr; + } + if (!config->interface.set(config->data, argv[1], 0, &err)) { + goto loaderr; + } + + match = 1; + break; + } + } + + if (match) { + sdsfreesplitres(argv,argc); + continue; + } + + /* Execute config directives */ + if (!strcasecmp(argv[0],"bind") && argc >= 2) { + int j, addresses = argc-1; + + if (addresses > CONFIG_BINDADDR_MAX) { + err = "Too many bind addresses specified"; goto loaderr; + } + /* Free old bind addresses */ + for (j = 0; j < g_pserver->bindaddr_count; j++) { + zfree(g_pserver->bindaddr[j]); + } + for (j = 0; j < addresses; j++) + g_pserver->bindaddr[j] = zstrdup(argv[j+1]); + g_pserver->bindaddr_count = addresses; + } else if (!strcasecmp(argv[0],"unixsocketperm") && argc == 2) { + errno = 0; + g_pserver->unixsocketperm = (mode_t)strtol(argv[1], NULL, 8); + if (errno || g_pserver->unixsocketperm > 0777) { + err = "Invalid socket file permissions"; goto loaderr; + } + } else if (!strcasecmp(argv[0],"save")) { + /* We don't reset save params before loading, because if they're not part + * of the file the defaults should be used. + */ + if (!save_loaded) { + save_loaded = 1; + resetServerSaveParams(); + } + + if (argc == 3) { + int seconds = atoi(argv[1]); + int changes = atoi(argv[2]); + if (seconds < 1 || changes < 0) { + err = "Invalid save parameters"; goto loaderr; + } + appendServerSaveParams(seconds,changes); + } else if (argc == 2 && !strcasecmp(argv[1],"")) { + resetServerSaveParams(); + } + } else if (!strcasecmp(argv[0],"dir") && argc == 2) { + if (chdir(argv[1]) == -1) { + serverLog(LL_WARNING,"Can't chdir to '%s': %s", + argv[1], strerror(errno)); + exit(1); + } + } else if (!strcasecmp(argv[0],"logfile") && argc == 2) { + FILE *logfp; + + zfree(g_pserver->logfile); + g_pserver->logfile = zstrdup(argv[1]); + if (g_pserver->logfile[0] != '\0') { + /* Test if we are able to open the file. The server will not + * be able to abort just for this problem later... */ + logfp = fopen(g_pserver->logfile,"a"); + if (logfp == NULL) { + err = sdscatprintf(sdsempty(), + "Can't open the log file: %s", strerror(errno)); + goto loaderr; + } + fclose(logfp); + } + } else if (!strcasecmp(argv[0],"include") && argc == 2) { + loadServerConfig(argv[1], 0, NULL); + } else if ((!strcasecmp(argv[0],"slaveof") || + !strcasecmp(argv[0],"replicaof")) && argc == 3) { + slaveof_linenum = linenum; + if (!strcasecmp(argv[1], "no") && !strcasecmp(argv[2], "one")) { + if (listLength(g_pserver->masters)) { + listIter li; + listNode *ln; + listRewind(g_pserver->masters, &li); + while ((ln = listNext(&li))) + { + struct redisMaster *mi = (struct redisMaster*)listNodeValue(ln); + sdsfree(mi->masterauth); + zfree(mi->masteruser); + zfree(mi->repl_transfer_tmpfile); + delete mi->staleKeyMap; + zfree(mi); + listDelNode(g_pserver->masters, ln); + } + } + continue; + } + char *ptr; + int port = strtol(argv[2], &ptr, 10); + if (port < 0 || port > 65535 || *ptr != '\0') { + err= "Invalid master port"; goto loaderr; + } + replicationAddMaster(argv[1], port); + } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) { + if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) { + err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN"; + goto loaderr; + } + } else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){ + /* DEAD OPTION */ + } else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2) { + /* DEAD OPTION */ + } else if (!strcasecmp(argv[0],"rename-command") && argc == 3) { + struct redisCommand *cmd = lookupCommand(argv[1]); + int retval; + + if (!cmd) { + err = "No such command in rename-command"; + goto loaderr; + } + + /* If the target command name is the empty string we just + * remove it from the command table. */ + retval = dictDelete(g_pserver->commands, argv[1]); + serverAssert(retval == DICT_OK); + + /* Otherwise we re-add the command under a different name. */ + if (sdslen(argv[2]) != 0) { + sds copy = sdsdup(argv[2]); + + retval = dictAdd(g_pserver->commands, copy, cmd); + if (retval != DICT_OK) { + sdsfree(copy); + err = "Target command name already exists"; goto loaderr; + } + } + } else if (!strcasecmp(argv[0],"cluster-config-file") && argc == 2) { + zfree(g_pserver->cluster_configfile); + g_pserver->cluster_configfile = zstrdup(argv[1]); + } else if (!strcasecmp(argv[0],"client-output-buffer-limit") && + argc == 5) + { + int type = getClientTypeByName(argv[1]); + unsigned long long hard, soft; + int soft_seconds; + + if (type == -1 || type == CLIENT_TYPE_MASTER) { + err = "Unrecognized client limit class: the user specified " + "an invalid one, or 'master' which has no buffer limits."; + goto loaderr; + } + hard = memtoll(argv[2],NULL); + soft = memtoll(argv[3],NULL); + soft_seconds = atoi(argv[4]); + if (soft_seconds < 0) { + err = "Negative number of seconds in soft limit is invalid"; + goto loaderr; + } + cserver.client_obuf_limits[type].hard_limit_bytes = hard; + cserver.client_obuf_limits[type].soft_limit_bytes = soft; + cserver.client_obuf_limits[type].soft_limit_seconds = soft_seconds; + } else if (!strcasecmp(argv[0],"oom-score-adj-values") && argc == 1 + CONFIG_OOM_COUNT) { + if (updateOOMScoreAdjValues(&argv[1], &err, 0) == C_ERR) goto loaderr; + } else if (!strcasecmp(argv[0],"notify-keyspace-events") && argc == 2) { + int flags = keyspaceEventsStringToFlags(argv[1]); + + if (flags == -1) { + err = "Invalid event class character. Use 'g$lshzxeA'."; + goto loaderr; + } + g_pserver->notify_keyspace_events = flags; + } else if (!strcasecmp(argv[0],"user") && argc >= 2) { + int argc_err; + if (ACLAppendUserForLoading(argv,argc,&argc_err) == C_ERR) { + char buf[1024]; + const char *errmsg = ACLSetUserStringError(); + snprintf(buf,sizeof(buf),"Error in user declaration '%s': %s", + argv[argc_err],errmsg); + err = buf; + goto loaderr; + } + } else if (!strcasecmp(argv[0],"loadmodule") && argc >= 2) { + queueLoadModule(argv[1],&argv[2],argc-2); + } else if (!strcasecmp(argv[0],"sentinel")) { + /* argc == 1 is handled by main() as we need to enter the sentinel + * mode ASAP. */ + if (argc != 1) { + if (!g_pserver->sentinel_mode) { + err = "sentinel directive while not in sentinel mode"; + goto loaderr; + } + queueSentinelConfig(argv+1,argc-1,linenum,lines[i]); + } + } else if (!strcasecmp(argv[0],"scratch-file-path")) { +#ifdef USE_MEMKIND + storage_init(argv[1], g_pserver->maxmemory); +#else + err = "KeyDB not compliled with scratch-file support."; + goto loaderr; +#endif + } else if ((!strcasecmp(argv[0],"server-threads") || !strcasecmp(argv[0],"io-threads")) && argc == 2) { + cserver.cthreads = atoi(argv[1]); + if (cserver.cthreads <= 0 || cserver.cthreads > MAX_EVENT_LOOPS) { + err = "Invalid number of threads specified"; + goto loaderr; + } + } else if (!strcasecmp(argv[0],"server-thread-affinity") && argc == 2) { + if (strcasecmp(argv[1], "true") == 0) { + cserver.fThreadAffinity = TRUE; + } else if (strcasecmp(argv[1], "false") == 0) { + cserver.fThreadAffinity = FALSE; + } else { + int offset = atoi(argv[1]); + if (offset > 0) { + cserver.fThreadAffinity = TRUE; + cserver.threadAffinityOffset = offset-1; + } else { + err = "Unknown argument: server-thread-affinity expects either true or false"; + goto loaderr; + } + } + } else if (!strcasecmp(argv[0], "active-replica") && argc == 2) { + g_pserver->fActiveReplica = yesnotoi(argv[1]); + if (g_pserver->fActiveReplica && g_pserver->repl_slave_ro) { + g_pserver->repl_slave_ro = FALSE; + serverLog(LL_NOTICE, "Notice: \"active-replica yes\" implies \"replica-read-only no\""); + } + if (g_pserver->fActiveReplica == -1) { + g_pserver->fActiveReplica = CONFIG_DEFAULT_ACTIVE_REPLICA; + err = "argument must be 'yes' or 'no'"; goto loaderr; + } + if (listLength(g_pserver->masters) && g_pserver->fActiveReplica) { + err = "must not set replica-of config before active-replica config"; goto loaderr; + } + } else if (!strcasecmp(argv[0], "multi-master") && argc == 2) { + g_pserver->enable_multimaster = yesnotoi(argv[1]); + if (g_pserver->enable_multimaster == -1) { + g_pserver->enable_multimaster = CONFIG_DEFAULT_ENABLE_MULTIMASTER; + err = "argument must be 'yes' or 'no'"; goto loaderr; + } + if (listLength(g_pserver->masters) && g_pserver->enable_multimaster) { + err = "must not set replica-of config before multi-master config"; goto loaderr; + } + } else if (!strcasecmp(argv[0], "tls-allowlist")) { + if (argc < 2) { + err = "must supply at least one element in the allow list"; goto loaderr; + } + if (!g_pserver->tls_allowlist.empty()) { + err = "tls-allowlist may only be set once"; goto loaderr; + } + for (int i = 1; i < argc; i++) + g_pserver->tls_allowlist.emplace(argv[i], strlen(argv[i])); + } else if (!strcasecmp(argv[0], "tls-auditlog-blocklist")) { + if (argc < 2) { + err = "must supply at least one element in the block list"; goto loaderr; + } + if (!g_pserver->tls_auditlog_blocklist.empty()) { + err = "tls-auditlog-blocklist may only be set once"; goto loaderr; + } + for (int i = 1; i < argc; i++) + g_pserver->tls_auditlog_blocklist.emplace(argv[i], strlen(argv[i])); + } else if (!strcasecmp(argv[0], "version-override") && argc == 2) { + KEYDB_SET_VERSION = zstrdup(argv[1]); + serverLog(LL_WARNING, "Warning version is overriden to: %s\n", KEYDB_SET_VERSION); + } else if (!strcasecmp(argv[0],"testmode") && argc == 2){ + g_fTestMode = yesnotoi(argv[1]); + } else if (!strcasecmp(argv[0],"rdbfuzz-mode")) { + // NOP, handled in main + } else if (!strcasecmp(argv[0],"storage-provider") && argc >= 2) { + g_sdsProvider = sdsdup(argv[1]); + if (argc > 2) + g_sdsArgs = sdsdup(argv[2]); + } else if (!strcasecmp(argv[0],"is-flash-enabled") && argc == 1) { +#ifdef ENABLE_ROCKSDB + exit(EXIT_SUCCESS); +#else + exit(EXIT_FAILURE); +#endif + } else { + err = "Bad directive or wrong number of arguments"; goto loaderr; + } + sdsfreesplitres(argv,argc); + } + + /* Sanity checks. */ + if (g_pserver->cluster_enabled && listLength(g_pserver->masters)) { + linenum = slaveof_linenum; + i = linenum-1; + err = "replicaof directive not allowed in cluster mode"; + goto loaderr; + } + + /* To ensure backward compatibility and work while hz is out of range */ + if (g_pserver->config_hz < CONFIG_MIN_HZ) g_pserver->config_hz = CONFIG_MIN_HZ; + if (g_pserver->config_hz > CONFIG_MAX_HZ) g_pserver->config_hz = CONFIG_MAX_HZ; + + sdsfreesplitres(lines,totlines); + return; + +loaderr: + fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR (KeyDB %s) ***\n", + KEYDB_REAL_VERSION); + fprintf(stderr, "Reading the configuration file, at line %d\n", linenum); + fprintf(stderr, ">>> '%s'\n", lines[i]); + fprintf(stderr, "%s\n", err); + exit(1); +} + +/* Load the server configuration from the specified filename. + * The function appends the additional configuration directives stored + * in the 'options' string to the config file before loading. + * + * Both filename and options can be NULL, in such a case are considered + * empty. This way loadServerConfig can be used to just load a file or + * just load a string. */ +void loadServerConfig(char *filename, char config_from_stdin, char *options) { + sds config = sdsempty(); + char buf[CONFIG_MAX_LINE+1]; + FILE *fp; + + /* Load the file content */ + if (filename) { + if ((fp = fopen(filename,"r")) == NULL) { + serverLog(LL_WARNING, + "Fatal error, can't open config file '%s': %s", + filename, strerror(errno)); + exit(1); + } + while(fgets(buf,CONFIG_MAX_LINE+1,fp) != NULL) + config = sdscat(config,buf); + fclose(fp); + } + /* Append content from stdin */ + if (config_from_stdin) { + serverLog(LL_WARNING,"Reading config from stdin"); + fp = stdin; + while(fgets(buf,CONFIG_MAX_LINE+1,fp) != NULL) + config = sdscat(config,buf); + } + + /* Append the additional options */ + if (options) { + config = sdscat(config,"\n"); + config = sdscat(config,options); + } + loadServerConfigFromString(config); + sdsfree(config); +} + +/*----------------------------------------------------------------------------- + * CONFIG SET implementation + *----------------------------------------------------------------------------*/ + +#define config_set_bool_field(_name,_var) \ + } else if (!strcasecmp(szFromObj(c->argv[2]),_name)) { \ + int yn = yesnotoi(szFromObj(o)); \ + if (yn == -1) goto badfmt; \ + _var = yn; + +#define config_set_numerical_field(_name,_var,min,max) \ + } else if (!strcasecmp(szFromObj(c->argv[2]),_name)) { \ + if (getLongLongFromObject(o,&ll) == C_ERR) goto badfmt; \ + if (min != LLONG_MIN && ll < min) goto badfmt; \ + if (max != LLONG_MAX && ll > max) goto badfmt; \ + _var = ll; + +#define config_set_memory_field(_name,_var) \ + } else if (!strcasecmp(szFromObj(c->argv[2]),_name)) { \ + ll = memtoll(szFromObj(o),&err); \ + if (err || ll < 0) goto badfmt; \ + _var = ll; + +#define config_set_special_field(_name) \ + } else if (!strcasecmp(szFromObj(c->argv[2]),_name)) { + +#define config_set_special_field_with_alias(_name1,_name2) \ + } else if (!strcasecmp(szFromObj(c->argv[2]),_name1) || \ + !strcasecmp(szFromObj(c->argv[2]),_name2)) { + +#define config_set_else } else + +void configSetCommand(client *c) { + robj *o; + long long ll; + int err; + const char *errstr = NULL; + serverAssertWithInfo(c,c->argv[2],sdsEncodedObject(c->argv[2])); + + if (c->argc < 4 || c->argc > 4) { + o = nullptr; + // Variadic set is only supported for tls-allowlist + if (strcasecmp(szFromObj(c->argv[2]), "tls-allowlist")) { + addReplySubcommandSyntaxError(c); + return; + } + } else { + o = c->argv[3]; + serverAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3])); + } + + /* Iterate the configs that are standard */ + for (standardConfig *config = configs; config->name != NULL; config++) { + if (!(config->flags & IMMUTABLE_CONFIG) && + (!strcasecmp(szFromObj(c->argv[2]),config->name) || + (config->alias && !strcasecmp(szFromObj(c->argv[2]),config->alias)))) + { + if (config->flags & SENSITIVE_CONFIG) { + redactClientCommandArgument(c,3); + } + if (!config->interface.set(config->data,szFromObj(o),1,&errstr)) { + goto badfmt; + } + addReply(c,shared.ok); + return; + } + } + + if (0) { /* this starts the config_set macros else-if chain. */ + + /* Special fields that can't be handled with general macros. */ + config_set_special_field("bind") { + int vlen; + sds *v = sdssplitlen(szFromObj(o),sdslen(szFromObj(o))," ",1,&vlen); + + if (vlen < 1 || vlen > CONFIG_BINDADDR_MAX) { + addReplyError(c, "Too many bind addresses specified."); + sdsfreesplitres(v, vlen); + return; + } + + if (changeBindAddr(v, vlen, true) == C_ERR) { + addReplyError(c, "Failed to bind to specified addresses."); + sdsfreesplitres(v, vlen); + return; + } + // Now run the config change on the other threads + for (int ithread = 0; ithread < cserver.cthreads; ++ithread) { + if (&g_pserver->rgthreadvar[ithread] != serverTL) { + incrRefCount(o); + aePostFunction(g_pserver->rgthreadvar[ithread].el, [o]{ + int vlen; + sds *v = sdssplitlen(szFromObj(o),sdslen(szFromObj(o))," ",1,&vlen); + if (changeBindAddr(v, vlen, false) == C_ERR) { + serverLog(LL_WARNING, "Failed to change the bind address for a thread. Server will still be listening on old addresses."); + } + sdsfreesplitres(v, vlen); + decrRefCount(o); + }); + } + } + + sdsfreesplitres(v, vlen); + } config_set_special_field("save") { + int vlen, j; + sds *v = sdssplitlen(szFromObj(o),sdslen(szFromObj(o))," ",1,&vlen); + + /* Perform sanity check before setting the new config: + * - Even number of args + * - Seconds >= 1, changes >= 0 */ + if (vlen & 1) { + sdsfreesplitres(v,vlen); + goto badfmt; + } + for (j = 0; j < vlen; j++) { + char *eptr; + long val; + + val = strtoll(v[j], &eptr, 10); + if (eptr[0] != '\0' || + ((j & 1) == 0 && val < 1) || + ((j & 1) == 1 && val < 0)) { + sdsfreesplitres(v,vlen); + goto badfmt; + } + } + /* Finally set the new config */ + resetServerSaveParams(); + for (j = 0; j < vlen; j += 2) { + time_t seconds; + int changes; + + seconds = strtoll(v[j],NULL,10); + changes = strtoll(v[j+1],NULL,10); + appendServerSaveParams(seconds, changes); + } + sdsfreesplitres(v,vlen); + } config_set_special_field("dir") { + if (chdir((char*)ptrFromObj(o)) == -1) { + addReplyErrorFormat(c,"Changing directory: %s", strerror(errno)); + return; + } + } config_set_special_field("client-output-buffer-limit") { + int vlen, j; + sds *v = sdssplitlen(szFromObj(o),sdslen(szFromObj(o))," ",1,&vlen); + + /* We need a multiple of 4: */ + if (vlen % 4) { + sdsfreesplitres(v,vlen); + goto badfmt; + } + + /* Sanity check of single arguments, so that we either refuse the + * whole configuration string or accept it all, even if a single + * error in a single client class is present. */ + for (j = 0; j < vlen; j++) { + long val; + + if ((j % 4) == 0) { + int type = getClientTypeByName(v[j]); + if (type == -1 || type == CLIENT_TYPE_MASTER) { + sdsfreesplitres(v,vlen); + goto badfmt; + } + } else { + val = memtoll(v[j], &err); + if (err || val < 0) { + sdsfreesplitres(v,vlen); + goto badfmt; + } + } + } + /* Finally set the new config */ + for (j = 0; j < vlen; j += 4) { + unsigned long long hard, soft; + int soft_seconds; + + int type = getClientTypeByName(v[j]); + hard = memtoll(v[j+1],NULL); + soft = memtoll(v[j+2],NULL); + soft_seconds = strtoll(v[j+3],NULL,10); + + cserver.client_obuf_limits[type].hard_limit_bytes = hard; + cserver.client_obuf_limits[type].soft_limit_bytes = soft; + cserver.client_obuf_limits[type].soft_limit_seconds = soft_seconds; + } + sdsfreesplitres(v,vlen); + } config_set_special_field("oom-score-adj-values") { + int vlen; + int success = 1; + + sds *v = sdssplitlen(szFromObj(o), sdslen(szFromObj(o)), " ", 1, &vlen); + if (vlen != CONFIG_OOM_COUNT || updateOOMScoreAdjValues(v, &errstr, 1) == C_ERR) + success = 0; + + sdsfreesplitres(v, vlen); + if (!success) + goto badfmt; + } config_set_special_field("notify-keyspace-events") { + int flags = keyspaceEventsStringToFlags(szFromObj(o)); + + if (flags == -1) goto badfmt; + g_pserver->notify_keyspace_events = flags; + /* Numerical fields. + * config_set_numerical_field(name,var,min,max) */ + } config_set_numerical_field( + "watchdog-period",ll,0,INT_MAX) { + if (ll) + enableWatchdog(ll); + else + disableWatchdog(); + } config_set_special_field("tls-allowlist") { + g_pserver->tls_allowlist.clear(); + for (int i = 3; i < c->argc; ++i) { + robj *val = c->argv[i]; + g_pserver->tls_allowlist.emplace(szFromObj(val), sdslen(szFromObj(val))); + } + /* Everything else is an error... */ + } config_set_else { + addReplyErrorFormat(c,"Unsupported CONFIG parameter: %s", + (char*)ptrFromObj(c->argv[2])); + return; + } + + /* On success we just return a generic OK for all the options. */ + addReply(c,shared.ok); + return; + +badfmt: /* Bad format errors */ + if (errstr) { + addReplyErrorFormat(c,"Invalid argument '%s' for CONFIG SET '%s' - %s", + szFromObj(o), + szFromObj(c->argv[2]), + errstr); + } else { + addReplyErrorFormat(c,"Invalid argument '%s' for CONFIG SET '%s'", + szFromObj(o), + szFromObj(c->argv[2])); + } +} + +/*----------------------------------------------------------------------------- + * CONFIG GET implementation + *----------------------------------------------------------------------------*/ + +#define config_get_string_field(_name,_var) do { \ + if (stringmatch(pattern,_name,1)) { \ + addReplyBulkCString(c,_name); \ + addReplyBulkCString(c,_var ? _var : ""); \ + matches++; \ + } \ +} while(0) + +#define config_get_bool_field(_name,_var) do { \ + if (stringmatch(pattern,_name,1)) { \ + addReplyBulkCString(c,_name); \ + addReplyBulkCString(c,_var ? "yes" : "no"); \ + matches++; \ + } \ +} while(0) + +#define config_get_numerical_field(_name,_var) do { \ + if (stringmatch(pattern,_name,1)) { \ + ll2string(buf,sizeof(buf),_var); \ + addReplyBulkCString(c,_name); \ + addReplyBulkCString(c,buf); \ + matches++; \ + } \ +} while(0) + +void configGetCommand(client *c) { + robj *o = c->argv[2]; + void *replylen = addReplyDeferredLen(c); + char *pattern = szFromObj(o); + char buf[128]; + int matches = 0; + serverAssertWithInfo(c,o,sdsEncodedObject(o)); + + /* Iterate the configs that are standard */ + for (standardConfig *config = configs; config->name != NULL; config++) { + if (stringmatch(pattern,config->name,1)) { + addReplyBulkCString(c,config->name); + config->interface.get(c,config->data); + matches++; + } + if (config->alias && stringmatch(pattern,config->alias,1)) { + addReplyBulkCString(c,config->alias); + config->interface.get(c,config->data); + matches++; + } + } + + /* String values */ + config_get_string_field("logfile",g_pserver->logfile); + + /* Numerical values */ + config_get_numerical_field("watchdog-period",g_pserver->watchdog_period); + + /* Everything we can't handle with macros follows. */ + + if (stringmatch(pattern,"dir",1)) { + char buf[1024]; + + if (getcwd(buf,sizeof(buf)) == NULL) + buf[0] = '\0'; + + addReplyBulkCString(c,"dir"); + addReplyBulkCString(c,buf); + matches++; + } + if (stringmatch(pattern,"save",1)) { + sds buf = sdsempty(); + int j; + + for (j = 0; j < g_pserver->saveparamslen; j++) { + buf = sdscatprintf(buf,"%jd %d", + (intmax_t)g_pserver->saveparams[j].seconds, + g_pserver->saveparams[j].changes); + if (j != g_pserver->saveparamslen-1) + buf = sdscatlen(buf," ",1); + } + addReplyBulkCString(c,"save"); + addReplyBulkCString(c,buf); + sdsfree(buf); + matches++; + } + if (stringmatch(pattern,"client-output-buffer-limit",1)) { + sds buf = sdsempty(); + int j; + + for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) { + buf = sdscatprintf(buf,"%s %llu %llu %ld", + getClientTypeName(j), + cserver.client_obuf_limits[j].hard_limit_bytes, + cserver.client_obuf_limits[j].soft_limit_bytes, + (long) cserver.client_obuf_limits[j].soft_limit_seconds); + if (j != CLIENT_TYPE_OBUF_COUNT-1) + buf = sdscatlen(buf," ",1); + } + addReplyBulkCString(c,"client-output-buffer-limit"); + addReplyBulkCString(c,buf); + sdsfree(buf); + matches++; + } + if (stringmatch(pattern,"unixsocketperm",1)) { + char buf[32]; + snprintf(buf,sizeof(buf),"%lo",(unsigned long)g_pserver->unixsocketperm); + addReplyBulkCString(c,"unixsocketperm"); + addReplyBulkCString(c,buf); + matches++; + } + if (stringmatch(pattern,"slaveof",1) || + stringmatch(pattern,"replicaof",1)) + { + const char *optname = stringmatch(pattern,"slaveof",1) ? + "slaveof" : "replicaof"; + char buf[256]; + addReplyBulkCString(c,optname); + if (listLength(g_pserver->masters) == 0) + { + buf[0] = '\0'; + addReplyBulkCString(c,buf); + } + else + { + listIter li; + listNode *ln; + listRewind(g_pserver->masters, &li); + bool fFirst = true; + while ((ln = listNext(&li))) + { + if (!fFirst) + { + addReplyBulkCString(c,optname); + matches++; + } + fFirst = false; + struct redisMaster *mi = (struct redisMaster*)listNodeValue(ln); + snprintf(buf,sizeof(buf),"%s %d", + mi->masterhost, mi->masterport); + addReplyBulkCString(c,buf); + } + } + matches++; + } + if (stringmatch(pattern,"notify-keyspace-events",1)) { + sds flags = keyspaceEventsFlagsToString(g_pserver->notify_keyspace_events); + + addReplyBulkCString(c,"notify-keyspace-events"); + addReplyBulkSds(c,flags); + matches++; + } + if (stringmatch(pattern,"bind",1)) { + sds aux = sdsjoin(g_pserver->bindaddr,g_pserver->bindaddr_count," "); + + addReplyBulkCString(c,"bind"); + addReplyBulkCString(c,aux); + sdsfree(aux); + matches++; + } + if (stringmatch(pattern,"oom-score-adj-values",0)) { + sds buf = sdsempty(); + int j; + + for (j = 0; j < CONFIG_OOM_COUNT; j++) { + buf = sdscatprintf(buf,"%d", g_pserver->oom_score_adj_values[j]); + if (j != CONFIG_OOM_COUNT-1) + buf = sdscatlen(buf," ",1); + } + + addReplyBulkCString(c,"oom-score-adj-values"); + addReplyBulkCString(c,buf); + sdsfree(buf); + matches++; + } + if (stringmatch(pattern,"active-replica",1)) { + addReplyBulkCString(c,"active-replica"); + addReplyBulkCString(c, g_pserver->fActiveReplica ? "yes" : "no"); + matches++; + } + if (stringmatch(pattern, "tls-allowlist", 1)) { + addReplyBulkCString(c,"tls-allowlist"); + addReplyArrayLen(c, (long)g_pserver->tls_allowlist.size()); + for (auto &elem : g_pserver->tls_allowlist) { + addReplyBulkCBuffer(c, elem.get(), elem.size()); // addReplyBulkSds will free which we absolutely don't want + } + matches++; + } + + setDeferredMapLen(c,replylen,matches); +} + +/*----------------------------------------------------------------------------- + * CONFIG REWRITE implementation + *----------------------------------------------------------------------------*/ + +#define REDIS_CONFIG_REWRITE_SIGNATURE "# Generated by CONFIG REWRITE" + +/* We use the following dictionary type to store where a configuration + * option is mentioned in the old configuration file, so it's + * like "maxmemory" -> list of line numbers (first line is zero). */ +uint64_t dictSdsCaseHash(const void *key); +int dictSdsKeyCaseCompare(void *privdata, const void *key1, const void *key2); +void dictSdsDestructor(void *privdata, void *val); +void dictListDestructor(void *privdata, void *val); + +/* Sentinel config rewriting is implemented inside sentinel.c by + * rewriteConfigSentinelOption(). */ +void rewriteConfigSentinelOption(struct rewriteConfigState *state); + +dictType optionToLineDictType = { + dictSdsCaseHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCaseCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + dictListDestructor, /* val destructor */ + NULL /* allow to expand */ +}; + +dictType optionSetDictType = { + dictSdsCaseHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCaseCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + +/* The config rewrite state. */ +struct rewriteConfigState { + dict *option_to_line; /* Option -> list of config file lines map */ + dict *rewritten; /* Dictionary of already processed options */ + int numlines; /* Number of lines in current config */ + sds *lines; /* Current lines as an array of sds strings */ + int has_tail; /* True if we already added directives that were + not present in the original config file. */ + int force_all; /* True if we want all keywords to be force + written. Currently only used for testing. */ +}; + +/* Append the new line to the current configuration state. */ +void rewriteConfigAppendLine(struct rewriteConfigState *state, sds line) { + state->lines = (sds*)zrealloc(state->lines, sizeof(char*) * (state->numlines+1), MALLOC_LOCAL); + state->lines[state->numlines++] = line; +} + +/* Populate the option -> list of line numbers map. */ +void rewriteConfigAddLineNumberToOption(struct rewriteConfigState *state, sds option, int linenum) { + list *l = (list*)dictFetchValue(state->option_to_line,option); + + if (l == NULL) { + l = listCreate(); + dictAdd(state->option_to_line,sdsdup(option),l); + } + listAddNodeTail(l,(void*)(long)linenum); +} + +/* Add the specified option to the set of processed options. + * This is useful as only unused lines of processed options will be blanked + * in the config file, while options the rewrite process does not understand + * remain untouched. */ +void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char *option) { + sds opt = sdsnew(option); + + if (dictAdd(state->rewritten,opt,NULL) != DICT_OK) sdsfree(opt); +} + +/* Read the old file, split it into lines to populate a newly created + * config rewrite state, and return it to the caller. + * + * If it is impossible to read the old file, NULL is returned. + * If the old file does not exist at all, an empty state is returned. */ +struct rewriteConfigState *rewriteConfigReadOldFile(char *path) { + FILE *fp = fopen(path,"r"); + struct rewriteConfigState *state = (rewriteConfigState*)zmalloc(sizeof(*state), MALLOC_LOCAL); + char buf[CONFIG_MAX_LINE+1]; + int linenum = -1; + + if (fp == NULL && errno != ENOENT) return NULL; + + state->option_to_line = dictCreate(&optionToLineDictType,NULL); + state->rewritten = dictCreate(&optionSetDictType,NULL); + state->numlines = 0; + state->lines = NULL; + state->has_tail = 0; + state->force_all = 0; + if (fp == NULL) return state; + + /* Read the old file line by line, populate the state. */ + while(fgets(buf,CONFIG_MAX_LINE+1,fp) != NULL) { + int argc; + sds *argv; + sds line = sdstrim(sdsnew(buf),"\r\n\t "); + + linenum++; /* Zero based, so we init at -1 */ + + /* Handle comments and empty lines. */ + if (line[0] == '#' || line[0] == '\0') { + if (!state->has_tail && !strcmp(line,REDIS_CONFIG_REWRITE_SIGNATURE)) + state->has_tail = 1; + rewriteConfigAppendLine(state,line); + continue; + } + + /* Not a comment, split into arguments. */ + argv = sdssplitargs(line,&argc); + if (argv == NULL) { + /* Apparently the line is unparsable for some reason, for + * instance it may have unbalanced quotes. Load it as a + * comment. */ + sds aux = sdsnew("# ??? "); + aux = sdscatsds(aux,line); + sdsfree(line); + rewriteConfigAppendLine(state,aux); + continue; + } + + sdstolower(argv[0]); /* We only want lowercase config directives. */ + + /* Now we populate the state according to the content of this line. + * Append the line and populate the option -> line numbers map. */ + rewriteConfigAppendLine(state,line); + + /* Translate options using the word "slave" to the corresponding name + * "replica", before adding such option to the config name -> lines + * mapping. */ + char *p = strstr(argv[0],"slave"); + if (p) { + sds alt = sdsempty(); + alt = sdscatlen(alt,argv[0],p-argv[0]); + alt = sdscatlen(alt,"replica",7); + alt = sdscatlen(alt,p+5,strlen(p+5)); + sdsfree(argv[0]); + argv[0] = alt; + } + /* If this is sentinel config, we use sentinel "sentinel " as option + to avoid messing up the sequence. */ + if (g_pserver->sentinel_mode && argc > 1 && !strcasecmp(argv[0],"sentinel")) { + sds sentinelOption = sdsempty(); + sentinelOption = sdscatfmt(sentinelOption,"%S %S",argv[0],argv[1]); + rewriteConfigAddLineNumberToOption(state,sentinelOption,linenum); + sdsfree(sentinelOption); + } else { + rewriteConfigAddLineNumberToOption(state,argv[0],linenum); + } + sdsfreesplitres(argv,argc); + } + fclose(fp); + return state; +} + +/* Rewrite the specified configuration option with the new "line". + * It progressively uses lines of the file that were already used for the same + * configuration option in the old version of the file, removing that line from + * the map of options -> line numbers. + * + * If there are lines associated with a given configuration option and + * "force" is non-zero, the line is appended to the configuration file. + * Usually "force" is true when an option has not its default value, so it + * must be rewritten even if not present previously. + * + * The first time a line is appended into a configuration file, a comment + * is added to show that starting from that point the config file was generated + * by CONFIG REWRITE. + * + * "line" is either used, or freed, so the caller does not need to free it + * in any way. */ +void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force) { + sds o = sdsnew(option); + list *l = (list*)dictFetchValue(state->option_to_line,o); + + rewriteConfigMarkAsProcessed(state,option); + + if (!l && !force && !state->force_all) { + /* Option not used previously, and we are not forced to use it. */ + sdsfree(line); + sdsfree(o); + return; + } + + if (l) { + listNode *ln = listFirst(l); + int linenum = (long) ln->value; + + /* There are still lines in the old configuration file we can reuse + * for this option. Replace the line with the new one. */ + listDelNode(l,ln); + if (listLength(l) == 0) dictDelete(state->option_to_line,o); + sdsfree(state->lines[linenum]); + state->lines[linenum] = line; + } else { + /* Append a new line. */ + if (!state->has_tail) { + rewriteConfigAppendLine(state, + sdsnew(REDIS_CONFIG_REWRITE_SIGNATURE)); + state->has_tail = 1; + } + rewriteConfigAppendLine(state,line); + } + sdsfree(o); +} + +/* Write the long long 'bytes' value as a string in a way that is parsable + * inside keydb.conf. If possible uses the GB, MB, KB notation. */ +int rewriteConfigFormatMemory(char *buf, size_t len, long long bytes) { + int gb = 1024*1024*1024; + int mb = 1024*1024; + int kb = 1024; + + if (bytes && (bytes % gb) == 0) { + return snprintf(buf,len,"%lldgb",bytes/gb); + } else if (bytes && (bytes % mb) == 0) { + return snprintf(buf,len,"%lldmb",bytes/mb); + } else if (bytes && (bytes % kb) == 0) { + return snprintf(buf,len,"%lldkb",bytes/kb); + } else { + return snprintf(buf,len,"%lld",bytes); + } +} + +/* Rewrite a simple "option-name " configuration option. */ +void rewriteConfigBytesOption(struct rewriteConfigState *state, const char *option, long long value, long long defvalue) { + char buf[64]; + int force = value != defvalue; + sds line; + + rewriteConfigFormatMemory(buf,sizeof(buf),value); + line = sdscatprintf(sdsempty(),"%s %s",option,buf); + rewriteConfigRewriteLine(state,option,line,force); +} + +/* Rewrite a yes/no option. */ +void rewriteConfigYesNoOption(struct rewriteConfigState *state, const char *option, int value, int defvalue) { + int force = value != defvalue; + sds line = sdscatprintf(sdsempty(),"%s %s",option, + value ? "yes" : "no"); + + rewriteConfigRewriteLine(state,option,line,force); +} + +/* Rewrite a string option. */ +void rewriteConfigStringOption(struct rewriteConfigState *state, const char *option, const char *value, const char *defvalue) { + int force = 1; + sds line; + + /* String options set to NULL need to be not present at all in the + * configuration file to be set to NULL again at the next reboot. */ + if (value == NULL) { + rewriteConfigMarkAsProcessed(state,option); + return; + } + + /* Set force to zero if the value is set to its default. */ + if (defvalue && strcmp(value,defvalue) == 0) force = 0; + + line = sdsnew(option); + line = sdscatlen(line, " ", 1); + line = sdscatrepr(line, value, strlen(value)); + + rewriteConfigRewriteLine(state,option,line,force); +} + +/* Rewrite a SDS string option. */ +void rewriteConfigSdsOption(struct rewriteConfigState *state, const char *option, sds value, const sds defvalue) { + int force = 1; + sds line; + + /* If there is no value set, we don't want the SDS option + * to be present in the configuration at all. */ + if (value == NULL) { + rewriteConfigMarkAsProcessed(state, option); + return; + } + + /* Set force to zero if the value is set to its default. */ + if (defvalue && sdscmp(value, defvalue) == 0) force = 0; + + line = sdsnew(option); + line = sdscatlen(line, " ", 1); + line = sdscatrepr(line, value, sdslen(value)); + + rewriteConfigRewriteLine(state, option, line, force); +} + +/* Rewrite a numerical (long long range) option. */ +void rewriteConfigNumericalOption(struct rewriteConfigState *state, const char *option, long long value, long long defvalue) { + int force = value != defvalue; + sds line = sdscatprintf(sdsempty(),"%s %lld",option,value); + + rewriteConfigRewriteLine(state,option,line,force); +} + +/* Rewrite an octal option. */ +void rewriteConfigOctalOption(struct rewriteConfigState *state, const char *option, int value, int defvalue) { + int force = value != defvalue; + sds line = sdscatprintf(sdsempty(),"%s %o",option,value); + + rewriteConfigRewriteLine(state,option,line,force); +} + +/* Rewrite an enumeration option. It takes as usually state and option name, + * and in addition the enumeration array and the default value for the + * option. */ +void rewriteConfigEnumOption(struct rewriteConfigState *state, const char *option, int value, configEnum *ce, int defval) { + sds line; + const char *name = configEnumGetNameOrUnknown(ce,value); + int force = value != defval; + + line = sdscatprintf(sdsempty(),"%s %s",option,name); + rewriteConfigRewriteLine(state,option,line,force); +} + +/* Rewrite the save option. */ +void rewriteConfigSaveOption(struct rewriteConfigState *state) { + int j; + sds line; + + /* In Sentinel mode we don't need to rewrite the save parameters */ + if (g_pserver->sentinel_mode) { + rewriteConfigMarkAsProcessed(state,"save"); + return; + } + + /* Rewrite save parameters, or an empty 'save ""' line to avoid the + * defaults from being used. + */ + if (!g_pserver->saveparamslen) { + rewriteConfigRewriteLine(state,"save",sdsnew("save \"\""),1); + } else { + for (j = 0; j < g_pserver->saveparamslen; j++) { + line = sdscatprintf(sdsempty(),"save %ld %d", + (long) g_pserver->saveparams[j].seconds, g_pserver->saveparams[j].changes); + rewriteConfigRewriteLine(state,"save",line,1); + } + } + + /* Mark "save" as processed in case server.saveparamslen is zero. */ + rewriteConfigMarkAsProcessed(state,"save"); +} + +/* Rewrite the user option. */ +void rewriteConfigUserOption(struct rewriteConfigState *state) { + /* If there is a user file defined we just mark this configuration + * directive as processed, so that all the lines containing users + * inside the config file gets discarded. */ + if (g_pserver->acl_filename[0] != '\0') { + rewriteConfigMarkAsProcessed(state,"user"); + return; + } + + /* Otherwise scan the list of users and rewrite every line. Note that + * in case the list here is empty, the effect will just be to comment + * all the users directive inside the config file. */ + raxIterator ri; + raxStart(&ri,Users); + raxSeek(&ri,"^",NULL,0); + while(raxNext(&ri)) { + user *u = (user*)ri.data; + sds line = sdsnew("user "); + line = sdscatsds(line,u->name); + line = sdscatlen(line," ",1); + sds descr = ACLDescribeUser(u); + line = sdscatsds(line,descr); + sdsfree(descr); + rewriteConfigRewriteLine(state,"user",line,1); + } + raxStop(&ri); + + /* Mark "user" as processed in case there are no defined users. */ + rewriteConfigMarkAsProcessed(state,"user"); +} + +/* Rewrite the dir option, always using absolute paths.*/ +void rewriteConfigDirOption(struct rewriteConfigState *state) { + char cwd[1024]; + + if (getcwd(cwd,sizeof(cwd)) == NULL) { + rewriteConfigMarkAsProcessed(state,"dir"); + return; /* no rewrite on error. */ + } + rewriteConfigStringOption(state,"dir",cwd,NULL); +} + +/* Rewrite the slaveof option. */ +void rewriteConfigSlaveofOption(struct rewriteConfigState *state, const char *option) { + /* If this is a master, we want all the slaveof config options + * in the file to be removed. Note that if this is a cluster instance + * we don't want a slaveof directive inside keydb.conf. */ + if (g_pserver->cluster_enabled || listLength(g_pserver->masters) == 0) { + rewriteConfigMarkAsProcessed(state,option); + return; + } + + listIter li; + listNode *ln; + listRewind(g_pserver->masters, &li); + while ((ln = listNext(&li))) + { + struct redisMaster *mi = (struct redisMaster*)listNodeValue(ln); + sds line; + + line = sdscatprintf(sdsempty(),"%s %s %d", option, + mi->masterhost, mi->masterport); + rewriteConfigRewriteLine(state,option,line,1); + } +} + +/* Rewrite the notify-keyspace-events option. */ +void rewriteConfigNotifykeyspaceeventsOption(struct rewriteConfigState *state) { + int force = g_pserver->notify_keyspace_events != 0; + const char *option = "notify-keyspace-events"; + sds line, flags; + + flags = keyspaceEventsFlagsToString(g_pserver->notify_keyspace_events); + line = sdsnew(option); + line = sdscatlen(line, " ", 1); + line = sdscatrepr(line, flags, sdslen(flags)); + sdsfree(flags); + rewriteConfigRewriteLine(state,option,line,force); +} + +/* Rewrite the client-output-buffer-limit option. */ +void rewriteConfigClientoutputbufferlimitOption(struct rewriteConfigState *state) { + int j; + const char *option = "client-output-buffer-limit"; + + for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) { + int force = (cserver.client_obuf_limits[j].hard_limit_bytes != + clientBufferLimitsDefaults[j].hard_limit_bytes) || + (cserver.client_obuf_limits[j].soft_limit_bytes != + clientBufferLimitsDefaults[j].soft_limit_bytes) || + (cserver.client_obuf_limits[j].soft_limit_seconds != + clientBufferLimitsDefaults[j].soft_limit_seconds); + sds line; + char hard[64], soft[64]; + + rewriteConfigFormatMemory(hard,sizeof(hard), + cserver.client_obuf_limits[j].hard_limit_bytes); + rewriteConfigFormatMemory(soft,sizeof(soft), + cserver.client_obuf_limits[j].soft_limit_bytes); + + const char *tname = getClientTypeName(j); + if (!strcmp(tname,"slave")) tname = "replica"; + line = sdscatprintf(sdsempty(),"%s %s %s %s %ld", + option, tname, hard, soft, + (long) cserver.client_obuf_limits[j].soft_limit_seconds); + rewriteConfigRewriteLine(state,option,line,force); + } +} + +/* Rewrite the oom-score-adj-values option. */ +void rewriteConfigOOMScoreAdjValuesOption(struct rewriteConfigState *state) { + int force = 0; + int j; + const char *option = "oom-score-adj-values"; + sds line; + + line = sdsnew(option); + line = sdscatlen(line, " ", 1); + for (j = 0; j < CONFIG_OOM_COUNT; j++) { + if (g_pserver->oom_score_adj_values[j] != configOOMScoreAdjValuesDefaults[j]) + force = 1; + + line = sdscatprintf(line, "%d", g_pserver->oom_score_adj_values[j]); + if (j+1 != CONFIG_OOM_COUNT) + line = sdscatlen(line, " ", 1); + } + rewriteConfigRewriteLine(state,option,line,force); +} + +/* Rewrite the bind option. */ +void rewriteConfigBindOption(struct rewriteConfigState *state) { + int force = 1; + sds line, addresses; + const char *option = "bind"; + + /* Nothing to rewrite if we don't have bind addresses. */ + if (g_pserver->bindaddr_count == 0) { + rewriteConfigMarkAsProcessed(state,option); + return; + } + + /* Rewrite as bind ... */ + addresses = sdsjoin(g_pserver->bindaddr,g_pserver->bindaddr_count," "); + line = sdsnew(option); + line = sdscatlen(line, " ", 1); + line = sdscatsds(line, addresses); + sdsfree(addresses); + + rewriteConfigRewriteLine(state,option,line,force); +} + +/* Glue together the configuration lines in the current configuration + * rewrite state into a single string, stripping multiple empty lines. */ +sds rewriteConfigGetContentFromState(struct rewriteConfigState *state) { + sds content = sdsempty(); + int j, was_empty = 0; + + for (j = 0; j < state->numlines; j++) { + /* Every cluster of empty lines is turned into a single empty line. */ + if (sdslen(state->lines[j]) == 0) { + if (was_empty) continue; + was_empty = 1; + } else { + was_empty = 0; + } + content = sdscatsds(content,state->lines[j]); + content = sdscatlen(content,"\n",1); + } + return content; +} + +/* Free the configuration rewrite state. */ +void rewriteConfigReleaseState(struct rewriteConfigState *state) { + sdsfreesplitres(state->lines,state->numlines); + dictRelease(state->option_to_line); + dictRelease(state->rewritten); + zfree(state); +} + +/* At the end of the rewrite process the state contains the remaining + * map between "option name" => "lines in the original config file". + * Lines used by the rewrite process were removed by the function + * rewriteConfigRewriteLine(), all the other lines are "orphaned" and + * should be replaced by empty lines. + * + * This function does just this, iterating all the option names and + * blanking all the lines still associated. */ +void rewriteConfigRemoveOrphaned(struct rewriteConfigState *state) { + dictIterator *di = dictGetIterator(state->option_to_line); + dictEntry *de; + + while((de = dictNext(di)) != NULL) { + list *l = (list*)dictGetVal(de); + sds option = (sds)dictGetKey(de); + + /* Don't blank lines about options the rewrite process + * don't understand. */ + if (dictFind(state->rewritten,option) == NULL) { + serverLog(LL_DEBUG,"Not rewritten option: %s", option); + continue; + } + + while(listLength(l)) { + listNode *ln = listFirst(l); + int linenum = (long) ln->value; + + sdsfree(state->lines[linenum]); + state->lines[linenum] = sdsempty(); + listDelNode(l,ln); + } + } + dictReleaseIterator(di); +} + +/* This function replaces the old configuration file with the new content + * in an atomic manner. + * + * The function returns 0 on success, otherwise -1 is returned and errno + * is set accordingly. */ +int rewriteConfigOverwriteFile(char *configfile, sds content) { + int fd = -1; + int retval = -1; + char tmp_conffile[PATH_MAX]; + const char *tmp_suffix = ".XXXXXX"; + size_t offset = 0; + ssize_t written_bytes = 0; + + int tmp_path_len = snprintf(tmp_conffile, sizeof(tmp_conffile), "%s%s", configfile, tmp_suffix); + if (tmp_path_len <= 0 || (unsigned int)tmp_path_len >= sizeof(tmp_conffile)) { + serverLog(LL_WARNING, "Config file full path is too long"); + errno = ENAMETOOLONG; + return retval; + } + +#ifdef _GNU_SOURCE + fd = mkostemp(tmp_conffile, O_CLOEXEC); +#else + /* There's a theoretical chance here to leak the FD if a module thread forks & execv in the middle */ + fd = mkstemp(tmp_conffile); +#endif + + if (fd == -1) { + serverLog(LL_WARNING, "Could not create tmp config file (%s)", strerror(errno)); + return retval; + } + + while (offset < sdslen(content)) { + written_bytes = write(fd, content + offset, sdslen(content) - offset); + if (written_bytes <= 0) { + if (errno == EINTR) continue; /* FD is blocking, no other retryable errors */ + serverLog(LL_WARNING, "Failed after writing (%zd) bytes to tmp config file (%s)", offset, strerror(errno)); + goto cleanup; + } + offset+=written_bytes; + } + + if (fsync(fd)) + serverLog(LL_WARNING, "Could not sync tmp config file to disk (%s)", strerror(errno)); + else if (fchmod(fd, 0644 & ~g_pserver->umask) == -1) + serverLog(LL_WARNING, "Could not chmod config file (%s)", strerror(errno)); + else if (rename(tmp_conffile, configfile) == -1) + serverLog(LL_WARNING, "Could not rename tmp config file (%s)", strerror(errno)); + else { + retval = 0; + serverLog(LL_DEBUG, "Rewritten config file (%s) successfully", configfile); + } + +cleanup: + close(fd); + if (retval) unlink(tmp_conffile); + return retval; +} + +/* Rewrite the configuration file at "path". + * If the configuration file already exists, we try at best to retain comments + * and overall structure. + * + * Configuration parameters that are at their default value, unless already + * explicitly included in the old configuration file, are not rewritten. + * The force_all flag overrides this behavior and forces everything to be + * written. This is currently only used for testing purposes. + * + * On error -1 is returned and errno is set accordingly, otherwise 0. */ +int rewriteConfig(char *path, int force_all) { + struct rewriteConfigState *state; + sds newcontent; + int retval; + + /* Step 1: read the old config into our rewrite state. */ + if ((state = rewriteConfigReadOldFile(path)) == NULL) return -1; + if (force_all) state->force_all = 1; + + /* Step 2: rewrite every single option, replacing or appending it inside + * the rewrite state. */ + + /* Iterate the configs that are standard */ + for (standardConfig *config = configs; config->name != NULL; config++) { + config->interface.rewrite(config->data, config->name, state); + } + + rewriteConfigBindOption(state); + rewriteConfigOctalOption(state,"unixsocketperm",g_pserver->unixsocketperm,CONFIG_DEFAULT_UNIX_SOCKET_PERM); + rewriteConfigStringOption(state,"logfile",g_pserver->logfile,CONFIG_DEFAULT_LOGFILE); + rewriteConfigSaveOption(state); + rewriteConfigUserOption(state); + rewriteConfigDirOption(state); + rewriteConfigSlaveofOption(state,"replicaof"); + rewriteConfigStringOption(state,"cluster-config-file",g_pserver->cluster_configfile,CONFIG_DEFAULT_CLUSTER_CONFIG_FILE); + rewriteConfigNotifykeyspaceeventsOption(state); + rewriteConfigClientoutputbufferlimitOption(state); + rewriteConfigYesNoOption(state,"active-replica",g_pserver->fActiveReplica,CONFIG_DEFAULT_ACTIVE_REPLICA); + rewriteConfigStringOption(state, "version-override",KEYDB_SET_VERSION,KEYDB_REAL_VERSION); + rewriteConfigOOMScoreAdjValuesOption(state); + + if (!g_pserver->tls_allowlist.empty()) { + sds conf = sdsnew("tls-allowlist "); + for (auto &elem : g_pserver->tls_allowlist) { + conf = sdscatsds(conf, (sds)elem.get()); + conf = sdscat(conf, " "); + } + // trim the trailing space + sdsrange(conf, 0, -1); + rewriteConfigRewriteLine(state,"tls-allowlist",conf,1 /*force*/); + // note: conf is owned by rewriteConfigRewriteLine - no need to free + } else { + rewriteConfigMarkAsProcessed(state, "tls-allowlist"); // ensure the line is removed if it existed + } + + /* Rewrite Sentinel config if in Sentinel mode. */ + if (g_pserver->sentinel_mode) rewriteConfigSentinelOption(state); + + /* Step 3: remove all the orphaned lines in the old file, that is, lines + * that were used by a config option and are no longer used, like in case + * of multiple "save" options or duplicated options. */ + rewriteConfigRemoveOrphaned(state); + + /* Step 4: generate a new configuration file from the modified state + * and write it into the original file. */ + newcontent = rewriteConfigGetContentFromState(state); + retval = rewriteConfigOverwriteFile(cserver.configfile,newcontent); + + sdsfree(newcontent); + rewriteConfigReleaseState(state); + return retval; +} + +/*----------------------------------------------------------------------------- + * Configs that fit one of the major types and require no special handling + *----------------------------------------------------------------------------*/ +#define LOADBUF_SIZE 256 +static char loadbuf[LOADBUF_SIZE]; + +#define embedCommonConfig(config_name, config_alias, config_flags) \ + config_name, config_alias, config_flags, + +#define embedConfigInterface(initfn, setfn, getfn, rewritefn) { \ + initfn, setfn, getfn, rewritefn, \ +}, + +/* What follows is the generic config types that are supported. To add a new + * config with one of these types, add it to the standardConfig table with + * the creation macro for each type. + * + * Each type contains the following: + * * A function defining how to load this type on startup. + * * A function defining how to update this type on CONFIG SET. + * * A function defining how to serialize this type on CONFIG SET. + * * A function defining how to rewrite this type on CONFIG REWRITE. + * * A Macro defining how to create this type. + */ + +/* Bool Configs */ +static void boolConfigInit(typeData data) { + *data.yesno.config = data.yesno.default_value; +} + +static int boolConfigSet(typeData data, sds value, int update, const char **err) { + int yn = yesnotoi(value); + if (yn == -1) { + if ((yn = truefalsetoi(value)) == -1) { + *err = "argument must be 'yes' or 'no'"; + return 0; + } + } + if (data.yesno.is_valid_fn && !data.yesno.is_valid_fn(yn, err)) + return 0; + int prev = *(data.yesno.config); + *(data.yesno.config) = yn; + if (update && data.yesno.update_fn && !data.yesno.update_fn(yn, prev, err)) { + *(data.yesno.config) = prev; + return 0; + } + return 1; +} + +static void boolConfigGet(client *c, typeData data) { + addReplyBulkCString(c, *data.yesno.config ? "yes" : "no"); +} + +static void boolConfigRewrite(typeData data, const char *name, struct rewriteConfigState *state) { + rewriteConfigYesNoOption(state, name,*(data.yesno.config), data.yesno.default_value); +} + +constexpr standardConfig createBoolConfig(const char *name, const char *alias, unsigned flags, int &config_addr, int defaultValue, int (*is_valid)(int val, const char **err), int (*update)(int val, int prev, const char **err)) +{ + standardConfig conf = { + embedCommonConfig(name, alias, flags) + { boolConfigInit, boolConfigSet, boolConfigGet, boolConfigRewrite } + }; + conf.data.yesno.config = &config_addr; + conf.data.yesno.default_value = defaultValue; + conf.data.yesno.is_valid_fn = is_valid; + conf.data.yesno.update_fn = update; + return conf; +} + +/* String Configs */ +static void stringConfigInit(typeData data) { + *data.string.config = (data.string.convert_empty_to_null && !data.string.default_value) ? NULL : zstrdup(data.string.default_value); +} + +static int stringConfigSet(typeData data, sds value, int update, const char **err) { + if (data.string.is_valid_fn && !data.string.is_valid_fn(value, err)) + return 0; + char *prev = *data.string.config; + *data.string.config = (data.string.convert_empty_to_null && !value[0]) ? NULL : zstrdup(value); + if (update && data.string.update_fn && !data.string.update_fn(*data.string.config, prev, err)) { + zfree(*data.string.config); + *data.string.config = prev; + return 0; + } + zfree(prev); + return 1; +} + +static void stringConfigGet(client *c, typeData data) { + addReplyBulkCString(c, *data.string.config ? *data.string.config : ""); +} + +static void stringConfigRewrite(typeData data, const char *name, struct rewriteConfigState *state) { + rewriteConfigStringOption(state, name,*(data.string.config), data.string.default_value); +} + +/* SDS Configs */ +static void sdsConfigInit(typeData data) { + *data.sds.config = (data.sds.convert_empty_to_null && !data.sds.default_value) ? NULL: sdsnew(data.sds.default_value); +} + +static int sdsConfigSet(typeData data, sds value, int update, const char **err) { + if (data.sds.is_valid_fn && !data.sds.is_valid_fn(value, err)) + return 0; + sds prev = *data.sds.config; + *data.sds.config = (data.sds.convert_empty_to_null && (sdslen(value) == 0)) ? NULL : sdsdup(value); + if (update && data.sds.update_fn && !data.sds.update_fn(*data.sds.config, prev, err)) { + sdsfree(*data.sds.config); + *data.sds.config = prev; + return 0; + } + sdsfree(prev); + return 1; +} + +static void sdsConfigGet(client *c, typeData data) { + if (*data.sds.config) { + addReplyBulkSds(c, sdsdup(*data.sds.config)); + } else { + addReplyBulkCString(c, ""); + } +} + +static void sdsConfigRewrite(typeData data, const char *name, struct rewriteConfigState *state) { + sds sdsDefault = data.sds.default_value ? sdsnew(data.sds.default_value) : NULL; + rewriteConfigSdsOption(state, name, *(data.sds.config), sdsDefault); + if (sdsDefault) + sdsfree(sdsDefault); +} + + +#define ALLOW_EMPTY_STRING 0 +#define EMPTY_STRING_IS_NULL 1 + +constexpr standardConfig createStringConfig(const char *name, const char *alias, unsigned flags, int empty_to_null, char *&config_addr, const char *defaultValue, int (*is_valid)(char*,const char**), int (*update)(char*,char*,const char**)) { + standardConfig conf = { + embedCommonConfig(name, alias, flags) + embedConfigInterface(stringConfigInit, stringConfigSet, stringConfigGet, stringConfigRewrite) + }; + conf.data.string = { + &(config_addr), + (defaultValue), + (is_valid), + (update), + (empty_to_null), + }; + return conf; +} + +constexpr standardConfig createSDSConfig(const char *name, const char *alias, unsigned flags, int empty_to_null, sds &config_addr, const char *defaultValue, int (*is_valid)(char*,const char**), int (*update)(char*,char*,const char**)) { + standardConfig conf = { + embedCommonConfig(name, alias, flags) + embedConfigInterface(sdsConfigInit, sdsConfigSet, sdsConfigGet, sdsConfigRewrite) + }; + conf.data.sds = { + &(config_addr), + (defaultValue), + (is_valid), + (update), + (empty_to_null), + }; + return conf; +} + +/* Enum configs */ +static void enumConfigInit(typeData data) { + *data.enumd.config = data.enumd.default_value; +} + +static int enumConfigSet(typeData data, sds value, int update, const char **err) { + int enumval = configEnumGetValue(data.enumd.enum_value, value); + if (enumval == INT_MIN) { + sds enumerr = sdsnew("argument must be one of the following: "); + configEnum *enumNode = data.enumd.enum_value; + while(enumNode->name != NULL) { + enumerr = sdscatlen(enumerr, enumNode->name, + strlen(enumNode->name)); + enumerr = sdscatlen(enumerr, ", ", 2); + enumNode++; + } + sdsrange(enumerr,0,-3); /* Remove final ", ". */ + + strncpy(loadbuf, enumerr, LOADBUF_SIZE); + loadbuf[LOADBUF_SIZE - 1] = '\0'; + + sdsfree(enumerr); + *err = loadbuf; + return 0; + } + if (data.enumd.is_valid_fn && !data.enumd.is_valid_fn(enumval, err)) + return 0; + int prev = *(data.enumd.config); + *(data.enumd.config) = enumval; + if (update && data.enumd.update_fn && !data.enumd.update_fn(enumval, prev, err)) { + *(data.enumd.config) = prev; + return 0; + } + return 1; +} + +static void enumConfigGet(client *c, typeData data) { + addReplyBulkCString(c, configEnumGetNameOrUnknown(data.enumd.enum_value,*data.enumd.config)); +} + +static void enumConfigRewrite(typeData data, const char *name, struct rewriteConfigState *state) { + rewriteConfigEnumOption(state, name,*(data.enumd.config), data.enumd.enum_value, data.enumd.default_value); +} + +constexpr standardConfig createEnumConfig(const char *name, const char *alias, unsigned flags, configEnum *enumVal, int &config_addr, int defaultValue, int (*is_valid)(int,const char**), int (*update)(int,int,const char**)) { + standardConfig c = { + embedCommonConfig(name, alias, flags) + embedConfigInterface(enumConfigInit, enumConfigSet, enumConfigGet, enumConfigRewrite) + }; + c.data.enumd = { + &(config_addr), + (enumVal), + (defaultValue), + (is_valid), + (update), + }; + + return c; +} + +/* Gets a 'long long val' and sets it into the union, using a macro to get + * compile time type check. */ +#define SET_NUMERIC_TYPE(val) \ + if (data.numeric.numeric_type == NUMERIC_TYPE_INT) { \ + *(data.numeric.config.i) = (int) val; \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_UINT) { \ + *(data.numeric.config.ui) = (unsigned int) val; \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_LONG) { \ + *(data.numeric.config.l) = (long) val; \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_ULONG) { \ + *(data.numeric.config.ul) = (unsigned long) val; \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_LONG_LONG) { \ + *(data.numeric.config.ll) = (long long) val; \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG) { \ + *(data.numeric.config.ull) = (unsigned long long) val; \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) { \ + *(data.numeric.config.st) = (size_t) val; \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_SSIZE_T) { \ + *(data.numeric.config.sst) = (ssize_t) val; \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_OFF_T) { \ + *(data.numeric.config.ot) = (off_t) val; \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_TIME_T) { \ + *(data.numeric.config.tt) = (time_t) val; \ + } + +/* Gets a 'long long val' and sets it with the value from the union, using a + * macro to get compile time type check. */ +#define GET_NUMERIC_TYPE(val) \ + if (data.numeric.numeric_type == NUMERIC_TYPE_INT) { \ + val = *(data.numeric.config.i); \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_UINT) { \ + val = *(data.numeric.config.ui); \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_LONG) { \ + val = *(data.numeric.config.l); \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_ULONG) { \ + val = *(data.numeric.config.ul); \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_LONG_LONG) { \ + val = *(data.numeric.config.ll); \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG) { \ + val = *(data.numeric.config.ull); \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) { \ + val = *(data.numeric.config.st); \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_SSIZE_T) { \ + val = *(data.numeric.config.sst); \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_OFF_T) { \ + val = *(data.numeric.config.ot); \ + } else if (data.numeric.numeric_type == NUMERIC_TYPE_TIME_T) { \ + val = *(data.numeric.config.tt); \ + } + +/* Numeric configs */ +static void numericConfigInit(typeData data) { + SET_NUMERIC_TYPE(data.numeric.default_value) +} + +static int numericBoundaryCheck(typeData data, long long ll, const char **err) { + if (data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG || + data.numeric.numeric_type == NUMERIC_TYPE_UINT || + data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) { + /* Boundary check for unsigned types */ + unsigned long long ull = ll; + unsigned long long upper_bound = data.numeric.upper_bound; + unsigned long long lower_bound = data.numeric.lower_bound; + if (ull > upper_bound || ull < lower_bound) { + snprintf(loadbuf, LOADBUF_SIZE, + "argument must be between %llu and %llu inclusive", + lower_bound, + upper_bound); + *err = loadbuf; + return 0; + } + } else { + /* Boundary check for signed types */ + if (ll > data.numeric.upper_bound || ll < data.numeric.lower_bound) { + snprintf(loadbuf, LOADBUF_SIZE, + "argument must be between %lld and %lld inclusive", + data.numeric.lower_bound, + data.numeric.upper_bound); + *err = loadbuf; + return 0; + } + } + return 1; +} + +static int numericConfigSet(typeData data, sds value, int update, const char **err) { + long long ll, prev = 0; + if (data.numeric.is_memory) { + int memerr; + ll = memtoll(value, &memerr); + if (memerr || ll < 0) { + *err = "argument must be a memory value"; + return 0; + } + } else { + if (!string2ll(value, sdslen(value),&ll)) { + *err = "argument couldn't be parsed into an integer" ; + return 0; + } + } + + if (!numericBoundaryCheck(data, ll, err)) + return 0; + + if (data.numeric.is_valid_fn && !data.numeric.is_valid_fn(ll, err)) + return 0; + + GET_NUMERIC_TYPE(prev) + SET_NUMERIC_TYPE(ll) + + if (update && data.numeric.update_fn && !data.numeric.update_fn(ll, prev, err)) { + SET_NUMERIC_TYPE(prev) + return 0; + } + return 1; +} + +static void numericConfigGet(client *c, typeData data) { + char buf[128]; + long long value = 0; + + GET_NUMERIC_TYPE(value) + + ll2string(buf, sizeof(buf), value); + addReplyBulkCString(c, buf); +} + +static void numericConfigRewrite(typeData data, const char *name, struct rewriteConfigState *state) { + long long value = 0; + + GET_NUMERIC_TYPE(value) + + if (data.numeric.is_memory) { + rewriteConfigBytesOption(state, name, value, data.numeric.default_value); + } else { + rewriteConfigNumericalOption(state, name, value, data.numeric.default_value); + } +} + +#define INTEGER_CONFIG 0 +#define MEMORY_CONFIG 1 + +constexpr standardConfig embedCommonNumericalConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) { + standardConfig conf = { + embedCommonConfig(name, alias, flags) + embedConfigInterface(numericConfigInit, numericConfigSet, numericConfigGet, numericConfigRewrite) + }; + conf.data.numeric.is_memory = (memory); + conf.data.numeric.lower_bound = (lower); + conf.data.numeric.upper_bound = (upper); + conf.data.numeric.default_value = (defaultValue); + conf.data.numeric.is_valid_fn = (is_valid); + conf.data.numeric.update_fn = (update); + return conf; +} + +constexpr standardConfig createIntConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, int &config_addr, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) +{ + standardConfig conf = embedCommonNumericalConfig(name, alias, flags, lower, upper, defaultValue, memory, is_valid, update); + conf.data.numeric.numeric_type = NUMERIC_TYPE_INT; + conf.data.numeric.config.i = &config_addr; + return conf; +} + +constexpr standardConfig createUIntConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, unsigned int &config_addr, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) +{ + auto conf = embedCommonNumericalConfig(name, alias, flags, lower, upper, defaultValue, memory, is_valid, update); + conf.data.numeric.numeric_type = NUMERIC_TYPE_UINT; + conf.data.numeric.config.ui = &(config_addr); + return conf; +} + +constexpr standardConfig createLongConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, long &config_addr, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) { + auto conf = embedCommonNumericalConfig(name, alias, flags, lower, upper, defaultValue, memory, is_valid, update); + conf.data.numeric.numeric_type = NUMERIC_TYPE_LONG; + conf.data.numeric.config.l = &(config_addr); + return conf; +} + +constexpr standardConfig createULongConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, unsigned long &config_addr, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) { + auto conf = embedCommonNumericalConfig(name, alias, flags, lower, upper, defaultValue, memory, is_valid, update); + conf.data.numeric.numeric_type = NUMERIC_TYPE_ULONG; + conf.data.numeric.config.ul = &(config_addr); + return conf; +} + +constexpr standardConfig createLongLongConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, long long &config_addr, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) { + auto conf = embedCommonNumericalConfig(name, alias, flags, lower, upper, defaultValue, memory, is_valid, update); + conf.data.numeric.numeric_type = NUMERIC_TYPE_LONG_LONG; + conf.data.numeric.config.ll = &(config_addr); + return conf; +} + +constexpr standardConfig createULongLongConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, unsigned long long &config_addr, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) { + auto conf = embedCommonNumericalConfig(name, alias, flags, lower, upper, defaultValue, memory, is_valid, update); + conf.data.numeric.numeric_type = NUMERIC_TYPE_ULONG_LONG; + conf.data.numeric.config.ull = &(config_addr); + return conf; +} + +constexpr standardConfig createSizeTConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, size_t &config_addr, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) { + auto conf = embedCommonNumericalConfig(name, alias, flags, lower, upper, defaultValue, memory, is_valid, update); + conf.data.numeric.numeric_type = NUMERIC_TYPE_SIZE_T; + conf.data.numeric.config.st = &(config_addr); + return conf; +} + +constexpr standardConfig createSSizeTConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, ssize_t &config_addr, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) { + auto conf = embedCommonNumericalConfig(name, alias, flags, lower, upper, defaultValue, memory, is_valid, update); + conf.data.numeric.numeric_type = NUMERIC_TYPE_SSIZE_T; + conf.data.numeric.config.sst = &(config_addr); + return conf; +} + +constexpr standardConfig createTimeTConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, time_t &config_addr, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) { + auto conf = embedCommonNumericalConfig(name, alias, flags, lower, upper, defaultValue, memory, is_valid, update); + conf.data.numeric.numeric_type = NUMERIC_TYPE_TIME_T; + conf.data.numeric.config.tt = &(config_addr); + return conf; +} + +constexpr standardConfig createOffTConfig(const char *name, const char *alias, unsigned flags, long long lower, long long upper, off_t &config_addr, long long defaultValue, int memory, int (*is_valid)(long long, const char**), int (*update)(long long, long long, const char**)) { + auto conf = embedCommonNumericalConfig(name, alias, flags, lower, upper, defaultValue, memory, is_valid, update); + conf.data.numeric.numeric_type = NUMERIC_TYPE_OFF_T; + conf.data.numeric.config.ot = &(config_addr); + return conf; +} + +static int isValidActiveDefrag(int val, const char **err) { +#ifndef HAVE_DEFRAG + if (val) { + *err = "Active defragmentation cannot be enabled: it " + "requires a KeyDB server compiled with a modified Jemalloc " + "like the one shipped by default with the KeyDB source " + "distribution"; + return 0; + } +#else + UNUSED(val); + UNUSED(err); +#endif + return 1; +} + +static int isValidDBfilename(char *val, const char **err) { + if (!pathIsBaseName(val)) { + *err = "dbfilename can't be a path, just a filename"; + return 0; + } + return 1; +} + +static int isValidAOFfilename(char *val, const char **err) { + if (!pathIsBaseName(val)) { + *err = "appendfilename can't be a path, just a filename"; + return 0; + } + return 1; +} + +static int isValidS3Bucket(char *s3bucket, const char **err) { + int status = EXIT_FAILURE; + pid_t pid = fork(); + if (pid < 0) + { + *err = "couldn't fork to call aws cli"; + return 0; + } + + if (pid == 0) + { + execlp("aws", "aws", "s3", "ls", s3bucket, nullptr); + exit(EXIT_FAILURE); + } + else + { + waitpid(pid, &status, 0); + } + + if (status != EXIT_SUCCESS) { + *err = "could not access s3 bucket"; + return 0; + } + return 1; +} + +/* Validate specified string is a valid proc-title-template */ +static int isValidProcTitleTemplate(char *val, const char **err) { + if (!validateProcTitleTemplate(val)) { + *err = "template format is invalid or contains unknown variables"; + return 0; + } + return 1; +} + +static int updateProcTitleTemplate(char *val, char *prev, const char **err) { + UNUSED(val); + UNUSED(prev); + if (redisSetProcTitle(NULL) == C_ERR) { + *err = "failed to set process title"; + return 0; + } + return 1; +} + +static int updateHZ(long long val, long long prev, const char **err) { + UNUSED(prev); + UNUSED(err); + /* Hz is more a hint from the user, so we accept values out of range + * but cap them to reasonable values. */ + g_pserver->config_hz = val; + if (g_pserver->config_hz < CONFIG_MIN_HZ) g_pserver->config_hz = CONFIG_MIN_HZ; + if (g_pserver->config_hz > CONFIG_MAX_HZ) g_pserver->config_hz = CONFIG_MAX_HZ; + g_pserver->hz = g_pserver->config_hz; + return 1; +} + +static int updatePort(long long val, long long prev, const char **err) { + /* Do nothing if port is unchanged */ + if (val == prev) { + return 1; + } + + // Run this thread to make sure its valid + if (changeListenPort(val, &serverTL->ipfd, acceptTcpHandler, true) == C_ERR) { + *err = "Unable to listen on this port. Check server logs."; + return 0; + } + + // Now run the config change on the other threads + for (int ithread = 0; ithread < cserver.cthreads; ++ithread) { + if (&g_pserver->rgthreadvar[ithread] != serverTL) { + aePostFunction(g_pserver->rgthreadvar[ithread].el, [val]{ + if (changeListenPort(val, &serverTL->ipfd, acceptTcpHandler, false) == C_ERR) { + serverLog(LL_WARNING, "Failed to change the listen port for a thread. Server will still be listening on old ports."); + } + }); + } + } + + return 1; +} + +static int updateJemallocBgThread(int val, int prev, const char **err) { + UNUSED(prev); + UNUSED(err); + set_jemalloc_bg_thread(val); + return 1; +} + +static int updateReplBacklogSize(long long val, long long prev, const char **err) { + /* resizeReplicationBacklog sets g_pserver->repl_backlog_size, and relies on + * being able to tell when the size changes, so restore prev before calling it. */ + if (cserver.repl_backlog_disk_size) { + *err = "Unable to dynamically resize the backlog because disk backlog is enabled"; + return 0; + } + g_pserver->repl_backlog_size = prev; + g_pserver->repl_backlog_config_size = val; + resizeReplicationBacklog(val); + return 1; +} + +static int updateMaxmemory(long long val, long long prev, const char **err) { + UNUSED(prev); + UNUSED(err); + if (val) { + size_t used = zmalloc_used_memory()-freeMemoryGetNotCountedMemory(); + if ((unsigned long long)val < used) { + serverLog(LL_WARNING,"WARNING: the new maxmemory value set via CONFIG SET (%llu) is smaller than the current memory usage (%zu). This will result in key eviction and/or the inability to accept new write commands depending on the maxmemory-policy.", g_pserver->maxmemory, used); + } + performEvictions(false /*fPreSnapshot*/); + } + return 1; +} + +static int updateFlashMaxmemory(long long val, long long prev, const char **err) { + UNUSED(prev); + UNUSED(err); + if (val && g_pserver->m_pstorageFactory) { + size_t used = g_pserver->m_pstorageFactory->totalDiskspaceUsed(); + if ((unsigned long long)val < used) { + serverLog(LL_WARNING,"WARNING: the new maxstorage value set via CONFIG SET (%llu) is smaller than the current storage usage (%zu). This will result in key eviction and/or the inability to accept new write commands depending on the maxmemory-policy.", g_pserver->maxstorage, used); + } + performEvictions(false /*fPreSnapshot*/); + } + return 1; +} + +static int updateGoodSlaves(long long val, long long prev, const char **err) { + UNUSED(val); + UNUSED(prev); + UNUSED(err); + refreshGoodSlavesCount(); + return 1; +} + +static int updateMasterAuthConfig(sds, sds, const char **) { + updateMasterAuth(); + return 1; +} + +static int updateAppendonly(int val, int prev, const char **err) { + UNUSED(prev); + if (val == 0 && g_pserver->aof_state != AOF_OFF) { + stopAppendOnly(); + } else if (val && g_pserver->aof_state == AOF_OFF) { + if (startAppendOnly() == C_ERR) { + *err = "Unable to turn on AOF. Check server logs."; + return 0; + } + } + return 1; +} + +static int updateSighandlerEnabled(int val, int prev, const char **err) { + UNUSED(err); + UNUSED(prev); + if (val) + setupSignalHandlers(); + else + removeSignalHandlers(); + return 1; +} + +static int updateMaxclients(long long val, long long prev, const char **err) { + /* Try to check if the OS is capable of supporting so many FDs. */ + if (val > prev) { + adjustOpenFilesLimit(); + if (g_pserver->maxclients != val) { + static char msg[128]; + snprintf(msg, sizeof(msg), "The operating system is not able to handle the specified number of clients, try with %d", g_pserver->maxclients); + *err = msg; + if (g_pserver->maxclients > prev) { + g_pserver->maxclients = prev; + adjustOpenFilesLimit(); + } + return 0; + } + /* Change the SetSize for the current thread first. If any error, return the error message to the client, + * otherwise, continue to do the same for other threads */ + if ((unsigned int) aeGetSetSize(aeGetCurrentEventLoop()) < + g_pserver->maxclients + CONFIG_FDSET_INCR) + { + if (aeResizeSetSize(aeGetCurrentEventLoop(), + g_pserver->maxclients + CONFIG_FDSET_INCR) == AE_ERR) + { + *err = "The event loop API used by KeyDB is not able to handle the specified number of clients"; + return 0; + } + serverLog(LL_DEBUG,"Successfully changed the setsize for current thread %d", ielFromEventLoop(aeGetCurrentEventLoop())); + } + + for (int iel = 0; iel < cserver.cthreads; ++iel) + { + if (g_pserver->rgthreadvar[iel].el == aeGetCurrentEventLoop()){ + continue; + } + + if ((unsigned int) aeGetSetSize(g_pserver->rgthreadvar[iel].el) < + g_pserver->maxclients + CONFIG_FDSET_INCR) + { + int res = aePostFunction(g_pserver->rgthreadvar[iel].el, [iel] { + if (aeResizeSetSize(g_pserver->rgthreadvar[iel].el, g_pserver->maxclients + CONFIG_FDSET_INCR) == AE_ERR) { + serverLog(LL_WARNING,"Failed to change the setsize for Thread %d", iel); + } + }); + + if (res != AE_OK){ + static char msg[128]; + snprintf(msg, sizeof(msg),"Failed to post the request to change setsize for Thread %d", iel); + *err = msg; + return 0; + } + serverLog(LL_DEBUG,"Successfully post the request to change the setsize for thread %d", iel); + } + } + } + return 1; +} + +static int validateMultiMasterNoForward(int val, const char **) { + if (val) { + serverLog(LL_WARNING, "WARNING: multi-master-no-forward is set, you *must* use a mesh topology or dataloss will occur"); + } + return 1; +} + +static int updateOOMScoreAdj(int val, int prev, const char **err) { + UNUSED(prev); + + if (val) { + if (setOOMScoreAdj(-1) == C_ERR) { + *err = "Failed to set current oom_score_adj. Check server logs."; + return 0; + } + } + + return 1; +} + +int updateRequirePass(sds val, sds prev, const char **err) { + UNUSED(prev); + UNUSED(err); + /* The old "requirepass" directive just translates to setting + * a password to the default user. The only thing we do + * additionally is to remember the cleartext password in this + * case, for backward compatibility with Redis <= 5. */ + ACLUpdateDefaultUserPassword(val); + return 1; +} + +#ifdef USE_OPENSSL +static int updateTlsCfg(char *val, char *prev, const char **err) { + UNUSED(val); + UNUSED(prev); + UNUSED(err); + + /* If TLS is enabled, try to configure OpenSSL. */ + if ((g_pserver->tls_port || g_pserver->tls_replication || g_pserver->tls_cluster) + && tlsConfigure(&g_pserver->tls_ctx_config) == C_ERR) { + *err = "Unable to update TLS configuration. Check server logs."; + return 0; + } + return 1; +} +static int updateTlsCfgBool(int val, int prev, const char **err) { + UNUSED(val); + UNUSED(prev); + return updateTlsCfg(NULL, NULL, err); +} + +static int updateTlsCfgInt(long long val, long long prev, const char **err) { + UNUSED(val); + UNUSED(prev); + return updateTlsCfg(NULL, NULL, err); +} + +static int updateTLSPortThread(long long val, bool fFirstCall, const char **err) +{ + if (changeListenPort(val, &serverTL->tlsfd, acceptTLSHandler, fFirstCall) == C_ERR) { + *err = "Unable to listen on this port. Check server logs."; + return 0; + } + + return 1; +} + +static int updateTLSPort(long long val, long long prev, const char **err) { + /* Do nothing if port is unchanged */ + if (val == prev) { + return 1; + } + + /* Configure TLS if tls is enabled */ + if (prev == 0 && tlsConfigure(&g_pserver->tls_ctx_config) == C_ERR) { + *err = "Unable to update TLS configuration. Check server logs."; + return 0; + } + + // Do our thread first in case there is a config issue + if (!updateTLSPortThread(val, true /*fFirstCall*/, err)) + return 0; + + for (int ithread = 0; ithread < cserver.cthreads; ++ithread) { + if (ithread == serverTL - g_pserver->rgthreadvar) + continue; // we already did our thread + aePostFunction(g_pserver->rgthreadvar[ithread].el, [val]{ + const char **err = nullptr; + if (!updateTLSPortThread(val, false /*fFirstCall*/, err)) { + serverLog(LL_WARNING, "Failed to update TLS port for a thread: %s", *err); + serverLog(LL_WARNING, "\tKeyDB will still be listening on the old port for some threads."); + } + }); + } + + return 1; +} + +#endif /* USE_OPENSSL */ + +int fDummy = false; +standardConfig configs[] = { + /* Bool configs */ + createBoolConfig("rdbchecksum", NULL, IMMUTABLE_CONFIG, g_pserver->rdb_checksum, 1, NULL, NULL), + createBoolConfig("daemonize", NULL, IMMUTABLE_CONFIG, cserver.daemonize, 0, NULL, NULL), + createBoolConfig("lua-replicate-commands", NULL, MODIFIABLE_CONFIG, g_pserver->lua_always_replicate_commands, 1, NULL, NULL), + createBoolConfig("always-show-logo", NULL, IMMUTABLE_CONFIG, g_pserver->always_show_logo, 0, NULL, NULL), + createBoolConfig("enable-motd", NULL, IMMUTABLE_CONFIG, cserver.enable_motd, 1, NULL, NULL), + createBoolConfig("protected-mode", NULL, MODIFIABLE_CONFIG, g_pserver->protected_mode, 1, NULL, NULL), + createBoolConfig("rdbcompression", NULL, MODIFIABLE_CONFIG, g_pserver->rdb_compression, 1, NULL, NULL), + createBoolConfig("rdb-del-sync-files", NULL, MODIFIABLE_CONFIG, g_pserver->rdb_del_sync_files, 0, NULL, NULL), + createBoolConfig("activerehashing", NULL, MODIFIABLE_CONFIG, g_pserver->activerehashing, 1, NULL, NULL), + createBoolConfig("stop-writes-on-bgsave-error", NULL, MODIFIABLE_CONFIG, g_pserver->stop_writes_on_bgsave_err, 1, NULL, NULL), + createBoolConfig("set-proc-title", NULL, IMMUTABLE_CONFIG, cserver.set_proc_title, 1, NULL, NULL), /* Should setproctitle be used? */ + createBoolConfig("dynamic-hz", NULL, MODIFIABLE_CONFIG, g_pserver->dynamic_hz, 1, NULL, NULL), /* Adapt hz to # of clients.*/ + createBoolConfig("lazyfree-lazy-eviction", NULL, MODIFIABLE_CONFIG, g_pserver->lazyfree_lazy_eviction, 0, NULL, NULL), + createBoolConfig("lazyfree-lazy-expire", NULL, MODIFIABLE_CONFIG, g_pserver->lazyfree_lazy_expire, 0, NULL, NULL), + createBoolConfig("lazyfree-lazy-server-del", NULL, MODIFIABLE_CONFIG, g_pserver->lazyfree_lazy_server_del, 0, NULL, NULL), + createBoolConfig("lazyfree-lazy-user-del", NULL, MODIFIABLE_CONFIG, g_pserver->lazyfree_lazy_user_del , 0, NULL, NULL), + createBoolConfig("lazyfree-lazy-user-flush", NULL, MODIFIABLE_CONFIG, g_pserver->lazyfree_lazy_user_flush , 0, NULL, NULL), + createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, g_pserver->repl_disable_tcp_nodelay, 0, NULL, NULL), + createBoolConfig("repl-diskless-sync", NULL, MODIFIABLE_CONFIG, g_pserver->repl_diskless_sync, 0, NULL, NULL), + createBoolConfig("aof-rewrite-incremental-fsync", NULL, MODIFIABLE_CONFIG, g_pserver->aof_rewrite_incremental_fsync, 1, NULL, NULL), + createBoolConfig("no-appendfsync-on-rewrite", NULL, MODIFIABLE_CONFIG, g_pserver->aof_no_fsync_on_rewrite, 0, NULL, NULL), + createBoolConfig("cluster-require-full-coverage", NULL, MODIFIABLE_CONFIG, g_pserver->cluster_require_full_coverage, 1, NULL, NULL), + createBoolConfig("rdb-save-incremental-fsync", NULL, MODIFIABLE_CONFIG, g_pserver->rdb_save_incremental_fsync, 1, NULL, NULL), + createBoolConfig("aof-load-truncated", NULL, MODIFIABLE_CONFIG, g_pserver->aof_load_truncated, 1, NULL, NULL), + createBoolConfig("aof-use-rdb-preamble", NULL, MODIFIABLE_CONFIG, g_pserver->aof_use_rdb_preamble, 1, NULL, NULL), + createBoolConfig("cluster-replica-no-failover", "cluster-slave-no-failover", MODIFIABLE_CONFIG, g_pserver->cluster_slave_no_failover, 0, NULL, NULL), /* Failover by default. */ + createBoolConfig("replica-lazy-flush", "slave-lazy-flush", MODIFIABLE_CONFIG, g_pserver->repl_slave_lazy_flush, 0, NULL, NULL), + createBoolConfig("replica-serve-stale-data", "slave-serve-stale-data", MODIFIABLE_CONFIG, g_pserver->repl_serve_stale_data, 1, NULL, NULL), + createBoolConfig("replica-read-only", "slave-read-only", MODIFIABLE_CONFIG, g_pserver->repl_slave_ro, 1, NULL, NULL), + createBoolConfig("replica-ignore-maxmemory", "slave-ignore-maxmemory", MODIFIABLE_CONFIG, g_pserver->repl_slave_ignore_maxmemory, 1, NULL, NULL), + createBoolConfig("jemalloc-bg-thread", NULL, MODIFIABLE_CONFIG, cserver.jemalloc_bg_thread, 1, NULL, updateJemallocBgThread), + createBoolConfig("activedefrag", NULL, MODIFIABLE_CONFIG, cserver.active_defrag_enabled, 0, isValidActiveDefrag, NULL), + createBoolConfig("syslog-enabled", NULL, IMMUTABLE_CONFIG, g_pserver->syslog_enabled, 0, NULL, NULL), + createBoolConfig("cluster-enabled", NULL, IMMUTABLE_CONFIG, g_pserver->cluster_enabled, 0, NULL, NULL), + createBoolConfig("appendonly", NULL, MODIFIABLE_CONFIG, g_pserver->aof_enabled, 0, NULL, updateAppendonly), + createBoolConfig("cluster-allow-reads-when-down", NULL, MODIFIABLE_CONFIG, g_pserver->cluster_allow_reads_when_down, 0, NULL, NULL), + createBoolConfig("delete-on-evict", NULL, MODIFIABLE_CONFIG, cserver.delete_on_evict, 0, NULL, NULL), + createBoolConfig("use-fork", NULL, IMMUTABLE_CONFIG, cserver.fForkBgSave, 1, NULL, NULL), + createBoolConfig("io-threads-do-reads", NULL, IMMUTABLE_CONFIG, fDummy, 0, NULL, NULL), + createBoolConfig("time-thread-priority", NULL, IMMUTABLE_CONFIG, cserver.time_thread_priority, 0, NULL, NULL), + createBoolConfig("prefetch-enabled", NULL, MODIFIABLE_CONFIG, g_pserver->prefetch_enabled, 1, NULL, NULL), + createBoolConfig("allow-rdb-resize-op", NULL, MODIFIABLE_CONFIG, g_pserver->allowRdbResizeOp, 1, NULL, NULL), + createBoolConfig("crash-log-enabled", NULL, MODIFIABLE_CONFIG, g_pserver->crashlog_enabled, 1, NULL, updateSighandlerEnabled), + createBoolConfig("crash-memcheck-enabled", NULL, MODIFIABLE_CONFIG, g_pserver->memcheck_enabled, 1, NULL, NULL), + createBoolConfig("use-exit-on-panic", NULL, MODIFIABLE_CONFIG, g_pserver->use_exit_on_panic, 0, NULL, NULL), + createBoolConfig("disable-thp", NULL, MODIFIABLE_CONFIG, g_pserver->disable_thp, 1, NULL, NULL), + createBoolConfig("cluster-allow-replica-migration", NULL, MODIFIABLE_CONFIG, g_pserver->cluster_allow_replica_migration, 1, NULL, NULL), + createBoolConfig("replica-announced", NULL, MODIFIABLE_CONFIG, g_pserver->replica_announced, 1, NULL, NULL), + createBoolConfig("enable-async-commands", NULL, MODIFIABLE_CONFIG, g_pserver->enable_async_commands, 0, NULL, NULL), + createBoolConfig("multithread-load-enabled", NULL, MODIFIABLE_CONFIG, g_pserver->multithread_load_enabled, 0, NULL, NULL), + createBoolConfig("active-client-balancing", NULL, MODIFIABLE_CONFIG, g_pserver->active_client_balancing, 1, NULL, NULL), + + /* String Configs */ + createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, g_pserver->acl_filename, "", NULL, NULL), + createStringConfig("unixsocket", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->unixsocket, NULL, NULL, NULL), + createStringConfig("pidfile", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, cserver.pidfile, NULL, NULL, NULL), + createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->slave_announce_ip, NULL, NULL, NULL), + createStringConfig("masteruser", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, cserver.default_masteruser, NULL, NULL, updateMasterAuthConfig), + createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->cluster_announce_ip, NULL, NULL, NULL), + createStringConfig("syslog-ident", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, g_pserver->syslog_ident, "redis", NULL, NULL), + createStringConfig("dbfilename", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, g_pserver->rdb_filename, CONFIG_DEFAULT_RDB_FILENAME, isValidDBfilename, NULL), + createStringConfig("db-s3-object", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->rdb_s3bucketpath, NULL, isValidS3Bucket, NULL), + createStringConfig("appendfilename", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, g_pserver->aof_filename, "appendonly.aof", isValidAOFfilename, NULL), + createStringConfig("server_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->server_cpulist, NULL, NULL, NULL), + createStringConfig("bio_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->bio_cpulist, NULL, NULL, NULL), + createStringConfig("aof_rewrite_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->aof_rewrite_cpulist, NULL, NULL, NULL), + createStringConfig("bgsave_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->bgsave_cpulist, NULL, NULL, NULL), + createStringConfig("storage-provider-options", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, cserver.storage_conf, NULL, NULL, NULL), + createStringConfig("ignore-warnings", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, g_pserver->ignore_warnings, "", NULL, NULL), + createStringConfig("proc-title-template", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, cserver.proc_title_template, CONFIG_DEFAULT_PROC_TITLE_TEMPLATE, isValidProcTitleTemplate, updateProcTitleTemplate), + + /* SDS Configs */ + createSDSConfig("masterauth", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, cserver.default_masterauth, NULL, NULL, updateMasterAuthConfig), + createSDSConfig("requirepass", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->requirepass, NULL, NULL, updateRequirePass), + + /* Enum Configs */ + createEnumConfig("supervised", NULL, IMMUTABLE_CONFIG, supervised_mode_enum, cserver.supervised_mode, SUPERVISED_NONE, NULL, NULL), + createEnumConfig("syslog-facility", NULL, IMMUTABLE_CONFIG, syslog_facility_enum, g_pserver->syslog_facility, LOG_LOCAL0, NULL, NULL), + createEnumConfig("repl-diskless-load", NULL, MODIFIABLE_CONFIG, repl_diskless_load_enum, g_pserver->repl_diskless_load, REPL_DISKLESS_LOAD_DISABLED, NULL, NULL), + createEnumConfig("loglevel", NULL, MODIFIABLE_CONFIG, loglevel_enum, cserver.verbosity, LL_NOTICE, NULL, NULL), + createEnumConfig("maxmemory-policy", NULL, MODIFIABLE_CONFIG, maxmemory_policy_enum, g_pserver->maxmemory_policy, MAXMEMORY_NO_EVICTION, NULL, NULL), + createEnumConfig("appendfsync", NULL, MODIFIABLE_CONFIG, aof_fsync_enum, g_pserver->aof_fsync, AOF_FSYNC_EVERYSEC, NULL, NULL), + createEnumConfig("storage-cache-mode", NULL, IMMUTABLE_CONFIG, storage_memory_model_enum, cserver.storage_memory_model, STORAGE_WRITETHROUGH, NULL, NULL), + createEnumConfig("oom-score-adj", NULL, MODIFIABLE_CONFIG, oom_score_adj_enum, g_pserver->oom_score_adj, OOM_SCORE_ADJ_NO, NULL, updateOOMScoreAdj), + createEnumConfig("acl-pubsub-default", NULL, MODIFIABLE_CONFIG, acl_pubsub_default_enum, g_pserver->acl_pubsub_default, USER_FLAG_ALLCHANNELS, NULL, NULL), + createEnumConfig("sanitize-dump-payload", NULL, MODIFIABLE_CONFIG, sanitize_dump_payload_enum, cserver.sanitize_dump_payload, SANITIZE_DUMP_NO, NULL, NULL), + + /* Integer configs */ + createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, cserver.dbnum, 16, INTEGER_CONFIG, NULL, NULL), + createIntConfig("port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->port, 9880, INTEGER_CONFIG, NULL, updatePort), /* TCP port. */ + createIntConfig("auto-aof-rewrite-percentage", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->aof_rewrite_perc, 100, INTEGER_CONFIG, NULL, NULL), + createIntConfig("cluster-replica-validity-factor", "cluster-slave-validity-factor", MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->cluster_slave_validity_factor, 10, INTEGER_CONFIG, NULL, NULL), /* Slave max data age factor. */ + createIntConfig("list-max-ziplist-size", NULL, MODIFIABLE_CONFIG, INT_MIN, INT_MAX, g_pserver->list_max_ziplist_size, -2, INTEGER_CONFIG, NULL, NULL), + createIntConfig("tcp-keepalive", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, cserver.tcpkeepalive, 300, INTEGER_CONFIG, NULL, NULL), + createIntConfig("cluster-migration-barrier", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->cluster_migration_barrier, 1, INTEGER_CONFIG, NULL, NULL), + createIntConfig("active-defrag-cycle-min", NULL, MODIFIABLE_CONFIG, 1, 99, cserver.active_defrag_cycle_min, 1, INTEGER_CONFIG, NULL, NULL), /* Default: 1% CPU min (at lower threshold) */ + createIntConfig("active-defrag-cycle-max", NULL, MODIFIABLE_CONFIG, 1, 99, cserver.active_defrag_cycle_max, 25, INTEGER_CONFIG, NULL, NULL), /* Default: 25% CPU max (at upper threshold) */ + createIntConfig("active-defrag-threshold-lower", NULL, MODIFIABLE_CONFIG, 0, 1000, cserver.active_defrag_threshold_lower, 10, INTEGER_CONFIG, NULL, NULL), /* Default: don't defrag when fragmentation is below 10% */ + createIntConfig("active-defrag-threshold-upper", NULL, MODIFIABLE_CONFIG, 0, 1000, cserver.active_defrag_threshold_upper, 100, INTEGER_CONFIG, NULL, NULL), /* Default: maximum defrag force at 100% fragmentation */ + createIntConfig("lfu-log-factor", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->lfu_log_factor, 10, INTEGER_CONFIG, NULL, NULL), + createIntConfig("lfu-decay-time", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->lfu_decay_time, 1, INTEGER_CONFIG, NULL, NULL), + createIntConfig("replica-priority", "slave-priority", MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->slave_priority, 100, INTEGER_CONFIG, NULL, NULL), + createIntConfig("repl-diskless-sync-delay", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->repl_diskless_sync_delay, 5, INTEGER_CONFIG, NULL, NULL), + createIntConfig("maxmemory-samples", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, g_pserver->maxmemory_samples, 16, INTEGER_CONFIG, NULL, NULL), + createIntConfig("maxmemory-eviction-tenacity", NULL, MODIFIABLE_CONFIG, 0, 100, g_pserver->maxmemory_eviction_tenacity, 10, INTEGER_CONFIG, NULL, NULL), + createIntConfig("timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, cserver.maxidletime, 0, INTEGER_CONFIG, NULL, NULL), /* Default client timeout: infinite */ + createIntConfig("replica-announce-port", "slave-announce-port", MODIFIABLE_CONFIG, 0, 65535, g_pserver->slave_announce_port, 0, INTEGER_CONFIG, NULL, NULL), + createIntConfig("tcp-backlog", NULL, IMMUTABLE_CONFIG, 0, INT_MAX, g_pserver->tcp_backlog, 511, INTEGER_CONFIG, NULL, NULL), /* TCP listen backlog. */ + createIntConfig("cluster-announce-bus-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->cluster_announce_bus_port, 0, INTEGER_CONFIG, NULL, NULL), /* Default: Use +10000 offset. */ + createIntConfig("cluster-announce-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->cluster_announce_port, 0, INTEGER_CONFIG, NULL, NULL), /* Use g_pserver->port */ + createIntConfig("cluster-announce-tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->cluster_announce_tls_port, 0, INTEGER_CONFIG, NULL, NULL), /* Use server.tls_port */ + createIntConfig("repl-timeout", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, g_pserver->repl_timeout, 60, INTEGER_CONFIG, NULL, NULL), + createIntConfig("repl-ping-replica-period", "repl-ping-slave-period", MODIFIABLE_CONFIG, 1, INT_MAX, g_pserver->repl_ping_slave_period, 10, INTEGER_CONFIG, NULL, NULL), + createIntConfig("list-compress-depth", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->list_compress_depth, 0, INTEGER_CONFIG, NULL, NULL), + createIntConfig("rdb-key-save-delay", NULL, MODIFIABLE_CONFIG, INT_MIN, INT_MAX, g_pserver->rdb_key_save_delay, 0, INTEGER_CONFIG, NULL, NULL), + createIntConfig("key-load-delay", NULL, MODIFIABLE_CONFIG, INT_MIN, INT_MAX, g_pserver->key_load_delay, 0, INTEGER_CONFIG, NULL, NULL), + createIntConfig("active-expire-effort", NULL, MODIFIABLE_CONFIG, 1, 10, g_pserver->active_expire_effort, 1, INTEGER_CONFIG, NULL, NULL), /* From 1 to 10. */ + createIntConfig("hz", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->config_hz, CONFIG_DEFAULT_HZ, INTEGER_CONFIG, NULL, updateHZ), + createIntConfig("min-replicas-to-write", "min-slaves-to-write", MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->repl_min_slaves_to_write, 0, INTEGER_CONFIG, NULL, updateGoodSlaves), + createIntConfig("min-replicas-max-lag", "min-slaves-max-lag", MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->repl_min_slaves_max_lag, 10, INTEGER_CONFIG, NULL, updateGoodSlaves), + createIntConfig("min-clients-per-thread", NULL, MODIFIABLE_CONFIG, 0, 400, cserver.thread_min_client_threshold, 20, INTEGER_CONFIG, NULL, NULL), + createIntConfig("storage-flush-period", NULL, MODIFIABLE_CONFIG, 1, 10000, g_pserver->storage_flush_period, 500, INTEGER_CONFIG, NULL, NULL), + createIntConfig("replica-quorum", NULL, MODIFIABLE_CONFIG, -1, INT_MAX, g_pserver->repl_quorum, -1, INTEGER_CONFIG, NULL, NULL), + createIntConfig("replica-weighting-factor", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, g_pserver->replicaIsolationFactor, 2, INTEGER_CONFIG, NULL, NULL), + /* Unsigned int configs */ + createUIntConfig("maxclients", NULL, MODIFIABLE_CONFIG, 1, UINT_MAX, g_pserver->maxclients, 10000, INTEGER_CONFIG, NULL, updateMaxclients), + createUIntConfig("loading-process-events-interval-keys", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->loading_process_events_interval_keys, 8192, MEMORY_CONFIG, NULL, NULL), + createUIntConfig("maxclients-reserved", NULL, MODIFIABLE_CONFIG, 0, 100, g_pserver->maxclientsReserved, 0, INTEGER_CONFIG, NULL, NULL), + + /* Unsigned Long configs */ + createULongConfig("active-defrag-max-scan-fields", NULL, MODIFIABLE_CONFIG, 1, LONG_MAX, cserver.active_defrag_max_scan_fields, 1000, INTEGER_CONFIG, NULL, NULL), /* Default: keys with more than 1000 fields will be processed separately */ + createULongConfig("slowlog-max-len", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->slowlog_max_len, 128, INTEGER_CONFIG, NULL, NULL), + createULongConfig("acllog-max-len", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->acllog_max_len, 128, INTEGER_CONFIG, NULL, NULL), + + /* Long Long configs */ + createLongLongConfig("lua-time-limit", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->lua_time_limit, 5000, INTEGER_CONFIG, NULL, NULL),/* milliseconds */ + createLongLongConfig("cluster-node-timeout", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->cluster_node_timeout, 15000, INTEGER_CONFIG, NULL, NULL), + createLongLongConfig("slowlog-log-slower-than", NULL, MODIFIABLE_CONFIG, -1, LLONG_MAX, g_pserver->slowlog_log_slower_than, 10000, INTEGER_CONFIG, NULL, NULL), + createLongLongConfig("latency-monitor-threshold", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->latency_monitor_threshold, 0, INTEGER_CONFIG, NULL, NULL), + createLongLongConfig("proto-max-bulk-len", NULL, MODIFIABLE_CONFIG, 1024*1024, LLONG_MAX, g_pserver->proto_max_bulk_len, 512ll*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Bulk request max size */ + createLongLongConfig("stream-node-max-entries", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->stream_node_max_entries, 100, INTEGER_CONFIG, NULL, NULL), + createLongLongConfig("repl-backlog-size", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, g_pserver->repl_backlog_config_size, 1024*1024, MEMORY_CONFIG, NULL, updateReplBacklogSize), /* Default: 1mb */ + createLongLongConfig("repl-backlog-disk-reserve", NULL, IMMUTABLE_CONFIG, 0, LLONG_MAX, cserver.repl_backlog_disk_size, 0, MEMORY_CONFIG, NULL, NULL), + createLongLongConfig("max-snapshot-slip", NULL, MODIFIABLE_CONFIG, 0, 5000, g_pserver->snapshot_slip, 400, 0, NULL, NULL), + createLongLongConfig("max-rand-count", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX/2, g_pserver->rand_total_threshold, LONG_MAX/2, 0, NULL, NULL), + + /* Unsigned Long Long configs */ + createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory), + createULongLongConfig("maxstorage", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->maxstorage, 0, MEMORY_CONFIG, NULL, updateFlashMaxmemory), + + /* Size_t configs */ + createSizeTConfig("hash-max-ziplist-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->hash_max_ziplist_entries, 512, INTEGER_CONFIG, NULL, NULL), + createSizeTConfig("set-max-intset-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->set_max_intset_entries, 512, INTEGER_CONFIG, NULL, NULL), + createSizeTConfig("zset-max-ziplist-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->zset_max_ziplist_entries, 128, INTEGER_CONFIG, NULL, NULL), + createSizeTConfig("active-defrag-ignore-bytes", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, cserver.active_defrag_ignore_bytes, 100<<20, MEMORY_CONFIG, NULL, NULL), /* Default: don't defrag if frag overhead is below 100mb */ + createSizeTConfig("hash-max-ziplist-value", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->hash_max_ziplist_value, 64, MEMORY_CONFIG, NULL, NULL), + createSizeTConfig("stream-node-max-bytes", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->stream_node_max_bytes, 4096, MEMORY_CONFIG, NULL, NULL), + createSizeTConfig("zset-max-ziplist-value", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->zset_max_ziplist_value, 64, MEMORY_CONFIG, NULL, NULL), + createSizeTConfig("hll-sparse-max-bytes", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->hll_sparse_max_bytes, 3000, MEMORY_CONFIG, NULL, NULL), + createSizeTConfig("tracking-table-max-keys", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->tracking_table_max_keys, 1000000, INTEGER_CONFIG, NULL, NULL), /* Default: 1 million keys max. */ + createSizeTConfig("client-query-buffer-limit", NULL, MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, cserver.client_max_querybuf_len, 1024*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Default: 1GB max query buffer. */ + + /* Other configs */ + createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */ + createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL), + + /* KeyDB Specific Configs */ + createULongConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->loading_process_events_interval_bytes, 2*1024*1024, MEMORY_CONFIG, NULL, NULL), + createBoolConfig("multi-master-no-forward", NULL, MODIFIABLE_CONFIG, cserver.multimaster_no_forward, 0, validateMultiMasterNoForward, NULL), + createBoolConfig("allow-write-during-load", NULL, MODIFIABLE_CONFIG, g_pserver->fWriteDuringActiveLoad, 0, NULL, NULL), + createBoolConfig("force-backlog-disk-reserve", NULL, MODIFIABLE_CONFIG, cserver.force_backlog_disk, 0, NULL, NULL), + createBoolConfig("soft-shutdown", NULL, MODIFIABLE_CONFIG, g_pserver->config_soft_shutdown, 0, NULL, NULL), + createBoolConfig("flash-disable-key-cache", NULL, MODIFIABLE_CONFIG, g_pserver->flash_disable_key_cache, 0, NULL, NULL), + createSizeTConfig("semi-ordered-set-bucket-size", NULL, MODIFIABLE_CONFIG, 0, 1024, g_semiOrderedSetTargetBucketSize, 0, INTEGER_CONFIG, NULL, NULL), + createSDSConfig("availability-zone", NULL, MODIFIABLE_CONFIG, 0, g_pserver->sdsAvailabilityZone, "", NULL, NULL), + createIntConfig("overload-protect-percent", NULL, MODIFIABLE_CONFIG, 0, 200, g_pserver->overload_protect_threshold, 0, INTEGER_CONFIG, NULL, NULL), + createIntConfig("force-eviction-percent", NULL, MODIFIABLE_CONFIG, 0, 100, g_pserver->force_eviction_percent, 0, INTEGER_CONFIG, NULL, NULL), + createBoolConfig("enable-async-rehash", NULL, MODIFIABLE_CONFIG, g_pserver->enable_async_rehash, 1, NULL, NULL), + createBoolConfig("enable-keydb-fastsync", NULL, MODIFIABLE_CONFIG, g_pserver->fEnableFastSync, 0, NULL, NULL), + +#ifdef USE_OPENSSL + createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->tls_port, 0, INTEGER_CONFIG, NULL, updateTLSPort), /* TCP port. */ + createIntConfig("tls-session-cache-size", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->tls_ctx_config.session_cache_size, 20*1024, INTEGER_CONFIG, NULL, updateTlsCfgInt), + createIntConfig("tls-session-cache-timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->tls_ctx_config.session_cache_timeout, 300, INTEGER_CONFIG, NULL, updateTlsCfgInt), + createBoolConfig("tls-cluster", NULL, MODIFIABLE_CONFIG, g_pserver->tls_cluster, 0, NULL, updateTlsCfgBool), + createBoolConfig("tls-replication", NULL, MODIFIABLE_CONFIG, g_pserver->tls_replication, 0, NULL, updateTlsCfgBool), + createEnumConfig("tls-auth-clients", NULL, MODIFIABLE_CONFIG, tls_auth_clients_enum, g_pserver->tls_auth_clients, TLS_CLIENT_AUTH_YES, NULL, NULL), + createBoolConfig("tls-prefer-server-ciphers", NULL, MODIFIABLE_CONFIG, g_pserver->tls_ctx_config.prefer_server_ciphers, 0, NULL, updateTlsCfgBool), + createBoolConfig("tls-session-caching", NULL, MODIFIABLE_CONFIG, g_pserver->tls_ctx_config.session_caching, 1, NULL, updateTlsCfgBool), + createBoolConfig("tls-rotation", NULL, MODIFIABLE_CONFIG, g_pserver->tls_rotation, 0, NULL, updateTlsCfgBool), + createStringConfig("tls-cert-file", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.cert_file, NULL, NULL, updateTlsCfg), + createStringConfig("tls-key-file", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.key_file, NULL, NULL, updateTlsCfg), + createStringConfig("tls-key-file-pass", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.key_file_pass, NULL, NULL, updateTlsCfg), + createStringConfig("tls-client-cert-file", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.client_cert_file, NULL, NULL, updateTlsCfg), + createStringConfig("tls-client-key-file", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.client_key_file, NULL, NULL, updateTlsCfg), + createStringConfig("tls-client-key-file-pass", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.client_key_file_pass, NULL, NULL, updateTlsCfg), + createStringConfig("tls-dh-params-file", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.dh_params_file, NULL, NULL, updateTlsCfg), + createStringConfig("tls-ca-cert-file", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.ca_cert_file, NULL, NULL, updateTlsCfg), + createStringConfig("tls-ca-cert-dir", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.ca_cert_dir, NULL, NULL, updateTlsCfg), + createStringConfig("tls-protocols", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.protocols, NULL, NULL, updateTlsCfg), + createStringConfig("tls-ciphers", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.ciphers, NULL, NULL, updateTlsCfg), + createStringConfig("tls-ciphersuites", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.ciphersuites, NULL, NULL, updateTlsCfg), +#endif + + /* NULL Terminator */ + {NULL} +}; + +/*----------------------------------------------------------------------------- + * CONFIG command entry point + *----------------------------------------------------------------------------*/ + +void configCommand(client *c) { + /* Only allow CONFIG GET while loading. */ + if (g_pserver->loading && strcasecmp(szFromObj(c->argv[1]),"get")) { + addReplyError(c,"Only CONFIG GET is allowed during loading"); + return; + } + + if (c->argc == 2 && !strcasecmp(szFromObj(c->argv[1]),"help")) { + const char *help[] = { +"GET ", +" Return parameters matching the glob-like and their values.", +"SET ", +" Set the configuration to .", +"RESETSTAT", +" Reset statistics reported by the INFO command.", +"REWRITE", +" Rewrite the configuration file.", +NULL + }; + + addReplyHelp(c, help); + } else if (!strcasecmp(szFromObj(c->argv[1]),"set") && c->argc >= 3) { + configSetCommand(c); + } else if (!strcasecmp(szFromObj(c->argv[1]),"get") && c->argc == 3) { + configGetCommand(c); + } else if (!strcasecmp(szFromObj(c->argv[1]),"resetstat") && c->argc == 2) { + resetServerStats(); + resetCommandTableStats(); + resetErrorTableStats(); + addReply(c,shared.ok); + } else if (!strcasecmp(szFromObj(c->argv[1]),"rewrite") && c->argc == 2) { + if (cserver.configfile == NULL) { + addReplyError(c,"The server is running without a config file"); + return; + } + if (rewriteConfig(cserver.configfile, 0) == -1) { + serverLog(LL_WARNING,"CONFIG REWRITE failed: %s", strerror(errno)); + addReplyErrorFormat(c,"Rewriting config file: %s", strerror(errno)); + } else { + serverLog(LL_WARNING,"CONFIG REWRITE executed with success."); + addReply(c,shared.ok); + } + } else { + addReplySubcommandSyntaxError(c); + return; + } +} diff --git a/src/redis-cli.c b/src/redis-cli.c new file mode 100644 index 000000000..31640de36 --- /dev/null +++ b/src/redis-cli.c @@ -0,0 +1,7379 @@ +/* Redis CLI (command line interface) + * + * Copyright (c) 2009-2012, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "fmacros.h" +#include "version.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#ifdef USE_OPENSSL +#include +#include +#include +#endif +#include "sdscompat.h" /* Use hiredis' sds compat header that maps sds calls to their hi_ variants */ +#include /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */ +#include "adlist.h" +#include "zmalloc.h" +#include "linenoise.h" +#include "help.h" +#include "anet.h" +#include "ae.h" +#include "storage.h" +#include "motd.h" +#include "cli_common.h" +#include "mt19937-64.h" + +#include "redis-cli.h" + +redisContext *context; +struct config config; + +int g_fTestMode = 0; + +/* User preferences. */ +static struct pref { + int hints; +} pref; + +static volatile sig_atomic_t force_cancel_loop = 0; +static void usage(void); +static void slaveMode(void); +char *redisGitSHA1(void); +char *redisGitDirty(void); +static int cliConnect(int force); + +static char *getInfoField(char *info, char *field); +static long getLongInfoField(char *info, char *field); + +/* --latency-dist palettes. */ +int spectrum_palette_color_size = 19; +int spectrum_palette_color[] = {0,233,234,235,237,239,241,243,245,247,144,143,142,184,226,214,208,202,196}; + +int spectrum_palette_mono_size = 13; +int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253}; + +/* The actual palette in use. */ +int *spectrum_palette; +int spectrum_palette_size; + +int g_fInCrash = 0; + +const char *motd_url = ""; +const char *motd_cache_file = ""; + +/*------------------------------------------------------------------------------ + * Utility functions + *--------------------------------------------------------------------------- */ + +static void cliPushHandler(void *, void *); + +uint16_t crc16(const char *buf, int len); + +static long long ustime(void) { + struct timeval tv; + long long ust; + + gettimeofday(&tv, NULL); + ust = ((long long)tv.tv_sec)*1000000; + ust += tv.tv_usec; + return ust; +} + +static long long mstime(void) { + return ustime()/1000; +} + +static void cliRefreshPrompt(void) { + if (config.eval_ldb) return; + + sds prompt = sdsempty(); + if (config.hostsocket != NULL) { + prompt = sdscatfmt(prompt, "futriix %s", config.hostsocket); + } else { + char addr[256]; + anetFormatAddr(addr, sizeof(addr), config.hostip, config.hostport); + printf("\n"); + prompt = sdscatlen(prompt,addr,strlen(addr)); + prompt = sdscatlen(prompt, " futriix:~> ", 11); + } + + /* Add [dbnum] if needed */ + if (config.dbnum != 0) + prompt = sdscatfmt(prompt,"[%i]",config.dbnum); + + /* Add TX if in transaction state*/ + if (config.in_multi) + prompt = sdscatlen(prompt,"(TX)",4); + + /* Copy the prompt in the static buffer. */ + // prompt = sdscatlen(prompt,"> ",2); + snprintf(config.prompt,sizeof(config.prompt),"%s",prompt); + sdsfree(prompt); +} + +struct dictEntry; +void asyncFreeDictTable(struct dictEntry **de) { + zfree(de); +} + +/* Return the name of the dotfile for the specified 'dotfilename'. + * Normally it just concatenates user $HOME to the file specified + * in 'dotfilename'. However if the environment variable 'envoverride' + * is set, its value is taken as the path. + * + * The function returns NULL (if the file is /dev/null or cannot be + * obtained for some error), or an SDS string that must be freed by + * the user. */ +static sds getDotfilePath(char *envoverride, char *dotfilename) { + char *path = NULL; + sds dotPath = NULL; + + /* Check the env for a dotfile override. */ + path = getenv(envoverride); + if (path != NULL && *path != '\0') { + if (!strcmp("/dev/null", path)) { + return NULL; + } + + /* If the env is set, return it. */ + dotPath = sdsnew(path); + } else { + char *home = getenv("HOME"); + if (home != NULL && *home != '\0') { + /* If no override is set use $HOME/. */ + dotPath = sdscatprintf(sdsempty(), "%s/%s", home, dotfilename); + } + } + return dotPath; +} + +/* URL-style percent decoding. */ +#define isHexChar(c) (isdigit(c) || (c >= 'a' && c <= 'f')) +#define decodeHexChar(c) (isdigit(c) ? c - '0' : c - 'a' + 10) +#define decodeHex(h, l) ((decodeHexChar(h) << 4) + decodeHexChar(l)) + +static sds percentDecode(const char *pe, size_t len) { + const char *end = pe + len; + sds ret = sdsempty(); + const char *curr = pe; + + while (curr < end) { + if (*curr == '%') { + if ((end - curr) < 2) { + fprintf(stderr, "Incomplete URI encoding\n"); + exit(1); + } + + char h = tolower(*(++curr)); + char l = tolower(*(++curr)); + if (!isHexChar(h) || !isHexChar(l)) { + fprintf(stderr, "Illegal character in URI encoding\n"); + exit(1); + } + char c = decodeHex(h, l); + ret = sdscatlen(ret, &c, 1); + curr++; + } else { + ret = sdscatlen(ret, curr++, 1); + } + } + + return ret; +} + +/* Parse a URI and extract the server connection information. + * URI scheme is based on the the provisional specification[1] excluding support + * for query parameters. Valid URIs are: + * scheme: "redis://" + * authority: [[ ":"] "@"] [ [":" ]] + * path: ["/" []] + * + * [1]: https://www.iana.org/assignments/uri-schemes/prov/redis */ +static void parseRedisUri(const char *uri) { + + const char *scheme = "redis://"; + const char *tlsscheme = "rediss://"; + const char *curr = uri; + const char *end = uri + strlen(uri); + const char *userinfo, *username, *port, *host, *path; + + /* URI must start with a valid scheme. */ + if (!strncasecmp(tlsscheme, curr, strlen(tlsscheme))) { +#ifdef USE_OPENSSL + config.tls = 1; + curr += strlen(tlsscheme); +#else + fprintf(stderr,"rediss:// is only supported when futriix-cli is compiled with OpenSSL\n"); + exit(1); +#endif + } else if (!strncasecmp(scheme, curr, strlen(scheme))) { + curr += strlen(scheme); + } else { + fprintf(stderr,"Invalid URI scheme\n"); + exit(1); + } + if (curr == end) return; + + /* Extract user info. */ + if ((userinfo = strchr(curr,'@'))) { + if ((username = strchr(curr, ':')) && username < userinfo) { + config.user = percentDecode(curr, username - curr); + curr = username + 1; + } + + config.auth = percentDecode(curr, userinfo - curr); + curr = userinfo + 1; + } + if (curr == end) return; + + /* Extract host and port. */ + path = strchr(curr, '/'); + if (*curr != '/') { + host = path ? path - 1 : end; + if ((port = strchr(curr, ':'))) { + config.hostport = atoi(port + 1); + host = port - 1; + } + config.hostip = sdsnewlen(curr, host - curr + 1); + } + curr = path ? path + 1 : end; + if (curr == end) return; + + /* Extract database number. */ + config.input_dbnum = atoi(curr); +} + +/* _serverAssert is needed by dict */ +void _serverAssert(const char *estr, const char *file, int line) { + fprintf(stderr, "=== ASSERTION FAILED ==="); + fprintf(stderr, "==> %s:%d '%s' is not true",file,line,estr); + *((char*)-1) = 'x'; +} + +/*------------------------------------------------------------------------------ + * Help functions + *--------------------------------------------------------------------------- */ + +#define CLI_HELP_COMMAND 1 +#define CLI_HELP_GROUP 2 + +typedef struct { + int type; + int argc; + sds *argv; + sds full; + + /* Only used for help on commands */ + struct commandHelp *org; +} helpEntry; + +static helpEntry *helpEntries; +static int helpEntriesLen; + +static sds cliVersion(void) { + sds version; + version = sdscatprintf(sdsempty(), "%s", KEYDB_REAL_VERSION); + + /* Add git commit and working tree status when available */ + if (strtoll(redisGitSHA1(),NULL,16)) { + version = sdscatprintf(version, " (git:%s", redisGitSHA1()); + if (strtoll(redisGitDirty(),NULL,10)) + version = sdscatprintf(version, "-dirty"); + version = sdscat(version, ")"); + } + return version; +} + +static void cliInitHelp(void) { + int commandslen = sizeof(commandHelp)/sizeof(struct commandHelp); + int groupslen = sizeof(commandGroups)/sizeof(char*); + int i, len, pos = 0; + helpEntry tmp; + + helpEntriesLen = len = commandslen+groupslen; + helpEntries = zmalloc(sizeof(helpEntry)*len, MALLOC_LOCAL); + + for (i = 0; i < groupslen; i++) { + tmp.argc = 1; + tmp.argv = zmalloc(sizeof(sds), MALLOC_LOCAL); + tmp.argv[0] = sdscatprintf(sdsempty(),"@%s",commandGroups[i]); + tmp.full = tmp.argv[0]; + tmp.type = CLI_HELP_GROUP; + tmp.org = NULL; + helpEntries[pos++] = tmp; + } + + for (i = 0; i < commandslen; i++) { + tmp.argv = sdssplitargs(commandHelp[i].name,&tmp.argc); + tmp.full = sdsnew(commandHelp[i].name); + tmp.type = CLI_HELP_COMMAND; + tmp.org = &commandHelp[i]; + helpEntries[pos++] = tmp; + } +} + +/* cliInitHelp() setups the helpEntries array with the command and group + * names from the help.h file. However the Redis instance we are connecting + * to may support more commands, so this function integrates the previous + * entries with additional entries obtained using the COMMAND command + * available in recent versions of Redis. */ +static void cliIntegrateHelp(void) { + if (cliConnect(CC_QUIET) == REDIS_ERR) return; + + redisReply *reply = redisCommand(context, "COMMAND"); + if(reply == NULL || reply->type != REDIS_REPLY_ARRAY) return; + + /* Scan the array reported by COMMAND and fill only the entries that + * don't already match what we have. */ + for (size_t j = 0; j < reply->elements; j++) { + redisReply *entry = reply->element[j]; + if (entry->type != REDIS_REPLY_ARRAY || entry->elements < 4 || + entry->element[0]->type != REDIS_REPLY_STRING || + entry->element[1]->type != REDIS_REPLY_INTEGER || + entry->element[3]->type != REDIS_REPLY_INTEGER) return; + char *cmdname = entry->element[0]->str; + int i; + + for (i = 0; i < helpEntriesLen; i++) { + helpEntry *he = helpEntries+i; + if (!strcasecmp(he->argv[0],cmdname)) + break; + } + if (i != helpEntriesLen) continue; + + helpEntriesLen++; + helpEntries = zrealloc(helpEntries,sizeof(helpEntry)*helpEntriesLen, MALLOC_LOCAL); + helpEntry *new = helpEntries+(helpEntriesLen-1); + + new->argc = 1; + new->argv = zmalloc(sizeof(sds), MALLOC_LOCAL); + new->argv[0] = sdsnew(cmdname); + new->full = new->argv[0]; + new->type = CLI_HELP_COMMAND; + sdstoupper(new->argv[0]); + + struct commandHelp *ch = zmalloc(sizeof(*ch), MALLOC_LOCAL); + ch->name = new->argv[0]; + ch->params = sdsempty(); + int args = llabs(entry->element[1]->integer); + args--; /* Remove the command name itself. */ + if (entry->element[3]->integer == 1) { + ch->params = sdscat(ch->params,"key "); + args--; + } + while(args-- > 0) ch->params = sdscat(ch->params,"arg "); + if (entry->element[1]->integer < 0) + ch->params = sdscat(ch->params,"...options..."); + ch->summary = "Help not available"; + ch->group = 0; + ch->since = "not known"; + new->org = ch; + } + freeReplyObject(reply); +} + +/* Output command help to stdout. */ +static void cliOutputCommandHelp(struct commandHelp *help, int group) { + printf("\r\n \x1b[1m%s\x1b[0m \x1b[90m%s\x1b[0m\r\n", help->name, help->params); + printf(" \x1b[33msummary:\x1b[0m %s\r\n", help->summary); + printf(" \x1b[33msince:\x1b[0m %s\r\n", help->since); + if (group) { + printf(" \x1b[33mgroup:\x1b[0m %s\r\n", commandGroups[help->group]); + } +} + +/* Print generic help. */ +static void cliOutputGenericHelp(void) { + sds version = cliVersion(); + printf( + "futriix-cli %s\n" + "To get help about Redis commands type:\n" + " \"help @\" to get a list of commands in \n" + " \"help \" for help on \n" + " \"help \" to get a list of possible help topics\n" + " \"quit\" to exit\n" + "\n" + "To set futriix -cli preferences:n" + " \":set hints\" enable online hints\n" + " \":set nohints\" disable online hints\n" + "Set your preferences in ~/.redisclirc\n", + version + ); + sdsfree(version); +} + +/* Output all command help, filtering by group or command name. */ +static void cliOutputHelp(int argc, char **argv) { + int i, j, len; + int group = -1; + helpEntry *entry; + struct commandHelp *help; + + if (argc == 0) { + cliOutputGenericHelp(); + return; + } else if (argc > 0 && argv[0][0] == '@') { + len = sizeof(commandGroups)/sizeof(char*); + for (i = 0; i < len; i++) { + if (strcasecmp(argv[0]+1,commandGroups[i]) == 0) { + group = i; + break; + } + } + } + + assert(argc > 0); + for (i = 0; i < helpEntriesLen; i++) { + entry = &helpEntries[i]; + if (entry->type != CLI_HELP_COMMAND) continue; + + help = entry->org; + if (group == -1) { + /* Compare all arguments */ + if (argc <= entry->argc) { + for (j = 0; j < argc; j++) { + if (strcasecmp(argv[j],entry->argv[j]) != 0) break; + } + if (j == argc) { + cliOutputCommandHelp(help,1); + } + } + } else { + if (group == help->group) { + cliOutputCommandHelp(help,0); + } + } + } + printf("\r\n"); +} + +/* Linenoise completion callback. */ +static void completionCallback(const char *buf, linenoiseCompletions *lc) { + size_t startpos = 0; + int mask; + int i; + size_t matchlen; + sds tmp; + + if (strncasecmp(buf,"help ",5) == 0) { + startpos = 5; + while (isspace(buf[startpos])) startpos++; + mask = CLI_HELP_COMMAND | CLI_HELP_GROUP; + } else { + mask = CLI_HELP_COMMAND; + } + + for (i = 0; i < helpEntriesLen; i++) { + if (!(helpEntries[i].type & mask)) continue; + + matchlen = strlen(buf+startpos); + if (strncasecmp(buf+startpos,helpEntries[i].full,matchlen) == 0) { + tmp = sdsnewlen(buf,startpos); + tmp = sdscat(tmp,helpEntries[i].full); + linenoiseAddCompletion(lc,tmp); + sdsfree(tmp); + } + } +} + +/* Linenoise hints callback. */ +static char *hintsCallback(const char *buf, int *color, int *bold) { + if (!pref.hints) return NULL; + + int i, argc, buflen = strlen(buf); + sds *argv = sdssplitargs(buf,&argc); + int endspace = buflen && isspace(buf[buflen-1]); + + /* Check if the argument list is empty and return ASAP. */ + if (argc == 0) { + sdsfreesplitres(argv,argc); + return NULL; + } + + for (i = 0; i < helpEntriesLen; i++) { + if (!(helpEntries[i].type & CLI_HELP_COMMAND)) continue; + + if (strcasecmp(argv[0],helpEntries[i].full) == 0 || + strcasecmp(buf,helpEntries[i].full) == 0) + { + *color = 90; + *bold = 0; + sds hint = sdsnew(helpEntries[i].org->params); + + /* Remove arguments from the returned hint to show only the + * ones the user did not yet typed. */ + int toremove = argc-1; + while(toremove > 0 && sdslen(hint)) { + if (hint[0] == '[') break; + if (hint[0] == ' ') toremove--; + sdsrange(hint,1,-1); + } + + /* Add an initial space if needed. */ + if (!endspace) { + sds newhint = sdsnewlen(" ",1); + newhint = sdscatsds(newhint,hint); + sdsfree(hint); + hint = newhint; + } + + sdsfreesplitres(argv,argc); + return hint; + } + } + sdsfreesplitres(argv,argc); + return NULL; +} + +static void freeHintsCallback(void *ptr) { + sdsfree(ptr); +} + +/*------------------------------------------------------------------------------ + * Networking / parsing + *--------------------------------------------------------------------------- */ + +/* Unquote a null-terminated string and return it as a binary-safe sds. */ +static sds unquoteCString(char *str) { + int count; + sds *unquoted = sdssplitargs(str, &count); + sds res = NULL; + + if (unquoted && count == 1) { + res = unquoted[0]; + unquoted[0] = NULL; + } + + if (unquoted) + sdsfreesplitres(unquoted, count); + + return res; +} + +/* Send AUTH command to the server */ +static int cliAuth(redisContext *ctx, char *user, char *auth) { + redisReply *reply; + if (auth == NULL) return REDIS_OK; + + if (user == NULL) + reply = redisCommand(ctx,"AUTH %s",auth); + else + reply = redisCommand(ctx,"AUTH %s %s",user,auth); + + if (reply == NULL) { + fprintf(stderr, "\nI/O error\n"); + return REDIS_ERR; + } + + int result = REDIS_OK; + if (reply->type == REDIS_REPLY_ERROR) { + result = REDIS_ERR; + fprintf(stderr, "AUTH failed: %s\n", reply->str); + } + freeReplyObject(reply); + return result; +} + +/* Send SELECT input_dbnum to the server */ +static int cliSelect(void) { + redisReply *reply; + if (config.input_dbnum == config.dbnum) return REDIS_OK; + + reply = redisCommand(context,"SELECT %d",config.input_dbnum); + if (reply == NULL) { + fprintf(stderr, "\nI/O error\n"); + return REDIS_ERR; + } + + int result = REDIS_OK; + if (reply->type == REDIS_REPLY_ERROR) { + result = REDIS_ERR; + fprintf(stderr,"SELECT %d failed: %s\n",config.input_dbnum,reply->str); + } else { + config.dbnum = config.input_dbnum; + cliRefreshPrompt(); + } + freeReplyObject(reply); + return result; +} + +/* Select RESP3 mode if redis-cli was started with the -3 option. */ +static int cliSwitchProto(void) { + redisReply *reply; + if (config.resp3 == 0) return REDIS_OK; + + reply = redisCommand(context,"HELLO 3"); + if (reply == NULL) { + fprintf(stderr, "\nI/O error\n"); + return REDIS_ERR; + } + + int result = REDIS_OK; + if (reply->type == REDIS_REPLY_ERROR) { + result = REDIS_ERR; + fprintf(stderr,"HELLO 3 failed: %s\n",reply->str); + } + freeReplyObject(reply); + return result; +} + +/* Connect to the server. It is possible to pass certain flags to the function: + * CC_FORCE: The connection is performed even if there is already + * a connected socket. + * CC_QUIET: Don't print errors if connection fails. */ +static int cliConnect(int flags) { + if (context == NULL || flags & CC_FORCE) { + if (context != NULL) { + redisFree(context); + config.dbnum = 0; + config.in_multi = 0; + cliRefreshPrompt(); + } + + /* Do not use hostsocket when we got redirected in cluster mode */ + if (config.hostsocket == NULL || + (config.cluster_mode && config.cluster_reissue_command)) { + context = redisConnect(config.hostip,config.hostport); + } else { + context = redisConnectUnix(config.hostsocket); + } + + if (!context->err && config.tls) { + const char *err = NULL; + if (cliSecureConnection(context, config.sslconfig, &err) == REDIS_ERR && err) { + fprintf(stderr, "Could not negotiate a TLS connection: %s\n", err); + redisFree(context); + context = NULL; + return REDIS_ERR; + } + } + + if (context->err) { + if (!(flags & CC_QUIET)) { + fprintf(stderr,"Could not connect to futriix at "); + if (config.hostsocket == NULL || + (config.cluster_mode && config.cluster_reissue_command)) + { + fprintf(stderr, "%s:%d: %s\n", + config.hostip,config.hostport,context->errstr); + } else { + fprintf(stderr,"%s: %s\n", + config.hostsocket,context->errstr); + } + } + redisFree(context); + context = NULL; + return REDIS_ERR; + } + + + /* Set aggressive KEEP_ALIVE socket option in the Redis context socket + * in order to prevent timeouts caused by the execution of long + * commands. At the same time this improves the detection of real + * errors. */ + anetKeepAlive(NULL, context->fd, REDIS_CLI_KEEPALIVE_INTERVAL); + + /* Do AUTH, select the right DB, switch to RESP3 if needed. */ + if (cliAuth(context, config.user, config.auth) != REDIS_OK) + return REDIS_ERR; + if (cliSelect() != REDIS_OK) + return REDIS_ERR; + if (cliSwitchProto() != REDIS_OK) + return REDIS_ERR; + } + + /* Set a PUSH handler if configured to do so. */ + if (config.push_output) { + redisSetPushCallback(context, cliPushHandler); + } + + return REDIS_OK; +} + +/* In cluster, if server replies ASK, we will redirect to a different node. + * Before sending the real command, we need to send ASKING command first. */ +static int cliSendAsking() { + redisReply *reply; + + config.cluster_send_asking = 0; + if (context == NULL) { + return REDIS_ERR; + } + reply = redisCommand(context,"ASKING"); + if (reply == NULL) { + fprintf(stderr, "\nI/O error\n"); + return REDIS_ERR; + } + int result = REDIS_OK; + if (reply->type == REDIS_REPLY_ERROR) { + result = REDIS_ERR; + fprintf(stderr,"ASKING failed: %s\n",reply->str); + } + freeReplyObject(reply); + return result; +} + +static void cliPrintContextError(void) { + if (context == NULL) return; + fprintf(stderr,"Error: %s\n",context->errstr); +} + +static int isInvalidateReply(redisReply *reply) { + return reply->type == REDIS_REPLY_PUSH && reply->elements == 2 && + reply->element[0]->type == REDIS_REPLY_STRING && + !strncmp(reply->element[0]->str, "invalidate", 10) && + reply->element[1]->type == REDIS_REPLY_ARRAY; +} + +/* Special display handler for RESP3 'invalidate' messages. + * This function does not validate the reply, so it should + * already be confirmed correct */ +static sds cliFormatInvalidateTTY(redisReply *r) { + sds out = sdsnew("-> invalidate: "); + + for (size_t i = 0; i < r->element[1]->elements; i++) { + redisReply *key = r->element[1]->element[i]; + assert(key->type == REDIS_REPLY_STRING); + + out = sdscatfmt(out, "'%s'", key->str, key->len); + if (i < r->element[1]->elements - 1) + out = sdscatlen(out, ", ", 2); + } + + return sdscatlen(out, "\n", 1); +} + +static sds cliFormatReplyTTY(redisReply *r, char *prefix) { + sds out = sdsempty(); + switch (r->type) { + case REDIS_REPLY_ERROR: + out = sdscatprintf(out,"(error) %s\n", r->str); + break; + case REDIS_REPLY_STATUS: + out = sdscat(out,r->str); + out = sdscat(out,"\n"); + break; + case REDIS_REPLY_INTEGER: + out = sdscatprintf(out,"(integer) %lld\n",r->integer); + break; + case REDIS_REPLY_DOUBLE: + out = sdscatprintf(out,"(double) %s\n",r->str); + break; + case REDIS_REPLY_STRING: + case REDIS_REPLY_VERB: + /* If you are producing output for the standard output we want + * a more interesting output with quoted characters and so forth, + * unless it's a verbatim string type. */ + if (r->type == REDIS_REPLY_STRING) { + out = sdscatrepr(out,r->str,r->len); + out = sdscat(out,"\n"); + } else { + out = sdscatlen(out,r->str,r->len); + out = sdscat(out,"\n"); + } + break; + case REDIS_REPLY_NIL: + out = sdscat(out,"(nil)\n"); + break; + case REDIS_REPLY_BOOL: + out = sdscat(out,r->integer ? "(true)\n" : "(false)\n"); + break; + case REDIS_REPLY_ARRAY: + case REDIS_REPLY_MAP: + case REDIS_REPLY_SET: + case REDIS_REPLY_PUSH: + if (r->elements == 0) { + if (r->type == REDIS_REPLY_ARRAY) + out = sdscat(out,"(empty array)\n"); + else if (r->type == REDIS_REPLY_MAP) + out = sdscat(out,"(empty hash)\n"); + else if (r->type == REDIS_REPLY_SET) + out = sdscat(out,"(empty set)\n"); + else if (r->type == REDIS_REPLY_PUSH) + out = sdscat(out,"(empty push)\n"); + else + out = sdscat(out,"(empty aggregate type)\n"); + } else { + unsigned int i, idxlen = 0; + char _prefixlen[16]; + char _prefixfmt[16]; + sds _prefix; + sds tmp; + + /* Calculate chars needed to represent the largest index */ + i = r->elements; + if (r->type == REDIS_REPLY_MAP) i /= 2; + do { + idxlen++; + i /= 10; + } while(i); + + /* Prefix for nested multi bulks should grow with idxlen+2 spaces */ + memset(_prefixlen,' ',idxlen+2); + _prefixlen[idxlen+2] = '\0'; + _prefix = sdscat(sdsnew(prefix),_prefixlen); + + /* Setup prefix format for every entry */ + char numsep; + if (r->type == REDIS_REPLY_SET) numsep = '~'; + else if (r->type == REDIS_REPLY_MAP) numsep = '#'; + else numsep = ')'; + snprintf(_prefixfmt,sizeof(_prefixfmt),"%%s%%%ud%c ",idxlen,numsep); + + for (i = 0; i < r->elements; i++) { + unsigned int human_idx = (r->type == REDIS_REPLY_MAP) ? + i/2 : i; + human_idx++; /* Make it 1-based. */ + + /* Don't use the prefix for the first element, as the parent + * caller already prepended the index number. */ + out = sdscatprintf(out,_prefixfmt,i == 0 ? "" : prefix,human_idx); + + /* Format the multi bulk entry */ + tmp = cliFormatReplyTTY(r->element[i],_prefix); + out = sdscatlen(out,tmp,sdslen(tmp)); + sdsfree(tmp); + + /* For maps, format the value as well. */ + if (r->type == REDIS_REPLY_MAP) { + i++; + sdsrange(out,0,-2); + out = sdscat(out," => "); + tmp = cliFormatReplyTTY(r->element[i],_prefix); + out = sdscatlen(out,tmp,sdslen(tmp)); + sdsfree(tmp); + } + } + sdsfree(_prefix); + } + break; + default: + fprintf(stderr,"Unknown reply type: %d\n", r->type); + exit(1); + } + return out; +} + +int isColorTerm(void) { + char *t = getenv("TERM"); + return t != NULL && strstr(t,"xterm") != NULL; +} + +/* Helper function for sdsCatColorizedLdbReply() appending colorize strings + * to an SDS string. */ +sds sdscatcolor(sds o, char *s, size_t len, char *color) { + if (!isColorTerm()) return sdscatlen(o,s,len); + + int bold = strstr(color,"bold") != NULL; + int ccode = 37; /* Defaults to white. */ + if (strstr(color,"red")) ccode = 31; + else if (strstr(color,"green")) ccode = 32; + else if (strstr(color,"yellow")) ccode = 33; + else if (strstr(color,"blue")) ccode = 34; + else if (strstr(color,"magenta")) ccode = 35; + else if (strstr(color,"cyan")) ccode = 36; + else if (strstr(color,"white")) ccode = 37; + + o = sdscatfmt(o,"\033[%i;%i;49m",bold,ccode); + o = sdscatlen(o,s,len); + o = sdscat(o,"\033[0m"); + return o; +} + +/* Colorize Lua debugger status replies according to the prefix they + * have. */ +sds sdsCatColorizedLdbReply(sds o, char *s, size_t len) { + char *color = "white"; + + if (strstr(s,"")) color = "bold"; + if (strstr(s,"")) color = "green"; + if (strstr(s,"")) color = "cyan"; + if (strstr(s,"")) color = "red"; + if (strstr(s,"")) color = "bold"; + if (strstr(s,"") || strstr(s,"")) color = "magenta"; + if (len > 4 && isdigit(s[3])) { + if (s[1] == '>') color = "yellow"; /* Current line. */ + else if (s[2] == '#') color = "bold"; /* Break point. */ + } + return sdscatcolor(o,s,len,color); +} + +static sds cliFormatReplyRaw(redisReply *r) { + sds out = sdsempty(), tmp; + size_t i; + + switch (r->type) { + case REDIS_REPLY_NIL: + /* Nothing... */ + break; + case REDIS_REPLY_ERROR: + out = sdscatlen(out,r->str,r->len); + out = sdscatlen(out,"\n",1); + break; + case REDIS_REPLY_STATUS: + case REDIS_REPLY_STRING: + case REDIS_REPLY_VERB: + if (r->type == REDIS_REPLY_STATUS && config.eval_ldb) { + /* The Lua debugger replies with arrays of simple (status) + * strings. We colorize the output for more fun if this + * is a debugging session. */ + + /* Detect the end of a debugging session. */ + if (strstr(r->str,"") == r->str) { + config.enable_ldb_on_eval = 0; + config.eval_ldb = 0; + config.eval_ldb_end = 1; /* Signal the caller session ended. */ + config.output = OUTPUT_STANDARD; + cliRefreshPrompt(); + } else { + out = sdsCatColorizedLdbReply(out,r->str,r->len); + } + } else { + out = sdscatlen(out,r->str,r->len); + } + break; + case REDIS_REPLY_BOOL: + out = sdscat(out,r->integer ? "(true)" : "(false)"); + break; + case REDIS_REPLY_INTEGER: + out = sdscatprintf(out,"%lld",r->integer); + break; + case REDIS_REPLY_DOUBLE: + out = sdscatprintf(out,"%s",r->str); + break; + case REDIS_REPLY_SET: + case REDIS_REPLY_ARRAY: + case REDIS_REPLY_PUSH: + for (i = 0; i < r->elements; i++) { + if (i > 0) out = sdscat(out,config.mb_delim); + tmp = cliFormatReplyRaw(r->element[i]); + out = sdscatlen(out,tmp,sdslen(tmp)); + sdsfree(tmp); + } + break; + case REDIS_REPLY_MAP: + for (i = 0; i < r->elements; i += 2) { + if (i > 0) out = sdscat(out,config.mb_delim); + tmp = cliFormatReplyRaw(r->element[i]); + out = sdscatlen(out,tmp,sdslen(tmp)); + sdsfree(tmp); + + out = sdscatlen(out," ",1); + tmp = cliFormatReplyRaw(r->element[i+1]); + out = sdscatlen(out,tmp,sdslen(tmp)); + sdsfree(tmp); + } + break; + default: + fprintf(stderr,"Unknown reply type: %d\n", r->type); + exit(1); + } + return out; +} + +static sds cliFormatReplyCSV(redisReply *r) { + unsigned int i; + + sds out = sdsempty(); + switch (r->type) { + case REDIS_REPLY_ERROR: + out = sdscat(out,"ERROR,"); + out = sdscatrepr(out,r->str,strlen(r->str)); + break; + case REDIS_REPLY_STATUS: + out = sdscatrepr(out,r->str,r->len); + break; + case REDIS_REPLY_INTEGER: + out = sdscatprintf(out,"%lld",r->integer); + break; + case REDIS_REPLY_DOUBLE: + out = sdscatprintf(out,"%s",r->str); + break; + case REDIS_REPLY_STRING: + case REDIS_REPLY_VERB: + out = sdscatrepr(out,r->str,r->len); + break; + case REDIS_REPLY_NIL: + out = sdscat(out,"NULL"); + break; + case REDIS_REPLY_BOOL: + out = sdscat(out,r->integer ? "true" : "false"); + break; + case REDIS_REPLY_ARRAY: + case REDIS_REPLY_SET: + case REDIS_REPLY_PUSH: + case REDIS_REPLY_MAP: /* CSV has no map type, just output flat list. */ + for (i = 0; i < r->elements; i++) { + sds tmp = cliFormatReplyCSV(r->element[i]); + out = sdscatlen(out,tmp,sdslen(tmp)); + if (i != r->elements-1) out = sdscat(out,","); + sdsfree(tmp); + } + break; + default: + fprintf(stderr,"Unknown reply type: %d\n", r->type); + exit(1); + } + return out; +} + +/* Generate reply strings in various output modes */ +static sds cliFormatReply(redisReply *reply, int mode, int verbatim) { + sds out; + + if (verbatim) { + out = cliFormatReplyRaw(reply); + } else if (mode == OUTPUT_STANDARD) { + out = cliFormatReplyTTY(reply, ""); + } else if (mode == OUTPUT_RAW) { + out = cliFormatReplyRaw(reply); + out = sdscatsds(out, config.cmd_delim); + } else if (mode == OUTPUT_CSV) { + out = cliFormatReplyCSV(reply); + out = sdscatlen(out, "\n", 1); + } else { + fprintf(stderr, "Error: Unknown output encoding %d\n", mode); + exit(1); + } + + return out; +} + +/* Output any spontaneous PUSH reply we receive */ +static void cliPushHandler(void *privdata, void *reply) { + UNUSED(privdata); + sds out; + + if (config.output == OUTPUT_STANDARD && isInvalidateReply(reply)) { + out = cliFormatInvalidateTTY(reply); + } else { + out = cliFormatReply(reply, config.output, 0); + } + + fwrite(out, sdslen(out), 1, stdout); + + freeReplyObject(reply); + sdsfree(out); +} + +static int cliReadReply(int output_raw_strings) { + void *_reply; + redisReply *reply; + sds out = NULL; + int output = 1; + + if (redisGetReply(context,&_reply) != REDIS_OK) { + if (config.shutdown) { + redisFree(context); + context = NULL; + return REDIS_OK; + } + if (config.interactive) { + /* Filter cases where we should reconnect */ + if (context->err == REDIS_ERR_IO && + (errno == ECONNRESET || errno == EPIPE)) + return REDIS_ERR; + if (context->err == REDIS_ERR_EOF) + return REDIS_ERR; + } + cliPrintContextError(); + exit(1); + return REDIS_ERR; /* avoid compiler warning */ + } + + reply = (redisReply*)_reply; + + config.last_cmd_type = reply->type; + + /* Check if we need to connect to a different node and reissue the + * request. */ + if (config.cluster_mode && reply->type == REDIS_REPLY_ERROR && + (!strncmp(reply->str,"MOVED ",6) || !strncmp(reply->str,"ASK ",4))) + { + char *p = reply->str, *s; + int slot; + + output = 0; + /* Comments show the position of the pointer as: + * + * [S] for pointer 's' + * [P] for pointer 'p' + */ + s = strchr(p,' '); /* MOVED[S]3999 127.0.0.1:6381 */ + p = strchr(s+1,' '); /* MOVED[S]3999[P]127.0.0.1:6381 */ + *p = '\0'; + slot = atoi(s+1); + s = strrchr(p+1,':'); /* MOVED 3999[P]127.0.0.1[S]6381 */ + *s = '\0'; + sdsfree(config.hostip); + config.hostip = sdsnew(p+1); + config.hostport = atoi(s+1); + if (config.interactive) + printf("-> Redirected to slot [%d] located at %s:%d\n", + slot, config.hostip, config.hostport); + config.cluster_reissue_command = 1; + if (!strncmp(reply->str,"ASK ",4)) { + config.cluster_send_asking = 1; + } + cliRefreshPrompt(); + } else if (!config.interactive && config.set_errcode && + reply->type == REDIS_REPLY_ERROR) + { + fprintf(stderr,"%s\n",reply->str); + exit(1); + return REDIS_ERR; /* avoid compiler warning */ + } + + if (output) { + out = cliFormatReply(reply, config.output, output_raw_strings); + fwrite(out,sdslen(out),1,stdout); + fflush(stdout); + sdsfree(out); + } + freeReplyObject(reply); + return REDIS_OK; +} + +static int cliSendCommand(int argc, char **argv, long repeat) { + char *command = argv[0]; + size_t *argvlen; + int j, output_raw; + + if (!config.eval_ldb && /* In debugging mode, let's pass "help" to Redis. */ + (!strcasecmp(command,"help") || !strcasecmp(command,"?"))) { + cliOutputHelp(--argc, ++argv); + return REDIS_OK; + } + + if (context == NULL) return REDIS_ERR; + + output_raw = 0; + if (!strcasecmp(command,"info") || + !strcasecmp(command,"lolwut") || + (argc >= 2 && !strcasecmp(command,"debug") && + !strcasecmp(argv[1],"htstats")) || + (argc >= 2 && !strcasecmp(command,"debug") && + !strcasecmp(argv[1],"htstats-key")) || + (argc >= 2 && !strcasecmp(command,"memory") && + (!strcasecmp(argv[1],"malloc-stats") || + !strcasecmp(argv[1],"doctor"))) || + (argc == 2 && !strcasecmp(command,"cluster") && + (!strcasecmp(argv[1],"nodes") || + !strcasecmp(argv[1],"info"))) || + (argc >= 2 && !strcasecmp(command,"client") && + (!strcasecmp(argv[1],"list") || + !strcasecmp(argv[1],"info"))) || + (argc == 3 && !strcasecmp(command,"latency") && + !strcasecmp(argv[1],"graph")) || + (argc == 2 && !strcasecmp(command,"latency") && + !strcasecmp(argv[1],"doctor")) || + /* Format PROXY INFO command for Redis Cluster Proxy: + * https://github.com/artix75/redis-cluster-proxy */ + (argc >= 2 && !strcasecmp(command,"proxy") && + !strcasecmp(argv[1],"info"))) + { + output_raw = 1; + } + + if (!strcasecmp(command,"shutdown")) config.shutdown = 1; + if (!strcasecmp(command,"monitor")) config.monitor_mode = 1; + if (!strcasecmp(command,"subscribe") || + !strcasecmp(command,"psubscribe")) config.pubsub_mode = 1; + if (!strcasecmp(command,"sync") || + !strcasecmp(command,"psync")) config.slave_mode = 1; + + /* When the user manually calls SCRIPT DEBUG, setup the activation of + * debugging mode on the next eval if needed. */ + if (argc == 3 && !strcasecmp(argv[0],"script") && + !strcasecmp(argv[1],"debug")) + { + if (!strcasecmp(argv[2],"yes") || !strcasecmp(argv[2],"sync")) { + config.enable_ldb_on_eval = 1; + } else { + config.enable_ldb_on_eval = 0; + } + } + + /* Actually activate LDB on EVAL if needed. */ + if (!strcasecmp(command,"eval") && config.enable_ldb_on_eval) { + config.eval_ldb = 1; + config.output = OUTPUT_RAW; + } + + /* Setup argument length */ + argvlen = zmalloc(argc*sizeof(size_t), MALLOC_LOCAL); + for (j = 0; j < argc; j++) + argvlen[j] = sdslen(argv[j]); + + /* Negative repeat is allowed and causes infinite loop, + works well with the interval option. */ + while(repeat < 0 || repeat-- > 0) { + redisAppendCommandArgv(context,argc,(const char**)argv,argvlen); + while (config.monitor_mode) { + if (cliReadReply(output_raw) != REDIS_OK) exit(1); + fflush(stdout); + } + + if (config.pubsub_mode) { + if (config.output != OUTPUT_RAW) + printf("Reading messages... (press Ctrl-C to quit)\n"); + + /* Unset our default PUSH handler so this works in RESP2/RESP3 */ + redisSetPushCallback(context, NULL); + + while (config.pubsub_mode) { + if (cliReadReply(output_raw) != REDIS_OK) exit(1); + if (config.last_cmd_type == REDIS_REPLY_ERROR) { + if (config.push_output) { + redisSetPushCallback(context, cliPushHandler); + } + config.pubsub_mode = 0; + } + } + continue; + } + + if (config.slave_mode) { + printf("Entering replica output mode... (press Ctrl-C to quit)\n"); + slaveMode(); + config.slave_mode = 0; + zfree(argvlen); + return REDIS_ERR; /* Error = slaveMode lost connection to master */ + } + + if (cliReadReply(output_raw) != REDIS_OK) { + zfree(argvlen); + return REDIS_ERR; + } else { + /* Store database number when SELECT was successfully executed. */ + if (!strcasecmp(command,"select") && argc == 2 && + config.last_cmd_type != REDIS_REPLY_ERROR) + { + config.input_dbnum = config.dbnum = atoi(argv[1]); + cliRefreshPrompt(); + } else if (!strcasecmp(command,"auth") && (argc == 2 || argc == 3)) { + cliSelect(); + } else if (!strcasecmp(command,"multi") && argc == 1 && + config.last_cmd_type != REDIS_REPLY_ERROR) + { + config.in_multi = 1; + config.pre_multi_dbnum = config.dbnum; + cliRefreshPrompt(); + } else if (!strcasecmp(command,"exec") && argc == 1 && config.in_multi) { + config.in_multi = 0; + if (config.last_cmd_type == REDIS_REPLY_ERROR || + config.last_cmd_type == REDIS_REPLY_NIL) + { + config.input_dbnum = config.dbnum = config.pre_multi_dbnum; + } + cliRefreshPrompt(); + } else if (!strcasecmp(command,"discard") && argc == 1 && + config.last_cmd_type != REDIS_REPLY_ERROR) + { + config.in_multi = 0; + config.input_dbnum = config.dbnum = config.pre_multi_dbnum; + cliRefreshPrompt(); + } + } + if (config.cluster_reissue_command){ + /* If we need to reissue the command, break to prevent a + further 'repeat' number of dud interactions */ + break; + } + if (config.interval) usleep(config.interval); + fflush(stdout); /* Make it grep friendly */ + } + + zfree(argvlen); + return REDIS_OK; +} + +/* Send a command reconnecting the link if needed. */ +static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, ...) { + redisReply *reply = NULL; + int tries = 0; + va_list ap; + + assert(!c->err); + while(reply == NULL) { + while (c->err & (REDIS_ERR_IO | REDIS_ERR_EOF)) { + printf("\r\x1b[0K"); /* Cursor to left edge + clear line. */ + printf("Reconnecting... %d\r", ++tries); + fflush(stdout); + + redisFree(c); + c = redisConnect(config.hostip,config.hostport); + if (!c->err && config.tls) { + const char *err = NULL; + if (cliSecureConnection(c, config.sslconfig, &err) == REDIS_ERR && err) { + fprintf(stderr, "TLS Error: %s\n", err); + exit(1); + } + } + usleep(1000000); + } + + va_start(ap,fmt); + reply = redisvCommand(c,fmt,ap); + va_end(ap); + + if (c->err && !(c->err & (REDIS_ERR_IO | REDIS_ERR_EOF))) { + fprintf(stderr, "Error: %s\n", c->errstr); + exit(1); + } else if (tries > 0) { + printf("\r\x1b[0K"); /* Cursor to left edge + clear line. */ + } + } + + context = c; + return reply; +} + +/*------------------------------------------------------------------------------ + * User interface + *--------------------------------------------------------------------------- */ + +static int parseOptions(int argc, char **argv) { + int i; + + for (i = 1; i < argc; i++) { + int lastarg = i==argc-1; + + if (!strcmp(argv[i],"-h") && !lastarg) { + sdsfree(config.hostip); + config.hostip = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"-h") && lastarg) { + usage(); + } else if (!strcmp(argv[i],"--help")) { + usage(); + } else if (!strcmp(argv[i],"-x")) { + config.stdinarg = 1; + } else if (!strcmp(argv[i],"-p") && !lastarg) { + config.hostport = atoi(argv[++i]); + } else if (!strcmp(argv[i],"-s") && !lastarg) { + config.hostsocket = argv[++i]; + } else if (!strcmp(argv[i],"-r") && !lastarg) { + config.repeat = strtoll(argv[++i],NULL,10); + } else if (!strcmp(argv[i],"-i") && !lastarg) { + double seconds = atof(argv[++i]); + config.interval = seconds*1000000; + } else if (!strcmp(argv[i],"-n") && !lastarg) { + config.input_dbnum = atoi(argv[++i]); + } else if (!strcmp(argv[i], "--no-auth-warning")) { + config.no_auth_warning = 1; + } else if (!strcmp(argv[i], "--askpass")) { + config.askpass = 1; + } else if ((!strcmp(argv[i],"-a") || !strcmp(argv[i],"--pass")) + && !lastarg) + { + config.auth = argv[++i]; + } else if (!strcmp(argv[i],"--user") && !lastarg) { + config.user = argv[++i]; + } else if (!strcmp(argv[i],"-u") && !lastarg) { + parseRedisUri(argv[++i]); + } else if (!strcmp(argv[i],"--raw")) { + config.output = OUTPUT_RAW; + } else if (!strcmp(argv[i],"--no-raw")) { + config.output = OUTPUT_STANDARD; + } else if (!strcmp(argv[i],"--quoted-input")) { + config.quoted_input = 1; + } else if (!strcmp(argv[i],"--csv")) { + config.output = OUTPUT_CSV; + } else if (!strcmp(argv[i],"--latency")) { + config.latency_mode = 1; + } else if (!strcmp(argv[i],"--latency-dist")) { + config.latency_dist_mode = 1; + } else if (!strcmp(argv[i],"--mono")) { + spectrum_palette = spectrum_palette_mono; + spectrum_palette_size = spectrum_palette_mono_size; + } else if (!strcmp(argv[i],"--latency-history")) { + config.latency_mode = 1; + config.latency_history = 1; + } else if (!strcmp(argv[i],"--lru-test") && !lastarg) { + config.lru_test_mode = 1; + config.lru_test_sample_size = strtoll(argv[++i],NULL,10); + } else if (!strcmp(argv[i],"--slave")) { + config.slave_mode = 1; + } else if (!strcmp(argv[i],"--replica")) { + config.slave_mode = 1; + } else if (!strcmp(argv[i],"--stat")) { + config.stat_mode = 1; + } else if (!strcmp(argv[i],"--scan")) { + config.scan_mode = 1; + } else if (!strcmp(argv[i],"--pattern") && !lastarg) { + sdsfree(config.pattern); + config.pattern = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--quoted-pattern") && !lastarg) { + sdsfree(config.pattern); + config.pattern = unquoteCString(argv[++i]); + if (!config.pattern) { + fprintf(stderr,"Invalid quoted string specified for --quoted-pattern.\n"); + exit(1); + } + } else if (!strcmp(argv[i],"--intrinsic-latency") && !lastarg) { + config.intrinsic_latency_mode = 1; + config.intrinsic_latency_duration = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--rdb") && !lastarg) { + config.getrdb_mode = 1; + config.rdb_filename = argv[++i]; + } else if (!strcmp(argv[i],"--pipe")) { + config.pipe_mode = 1; + } else if (!strcmp(argv[i],"--pipe-timeout") && !lastarg) { + config.pipe_timeout = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--bigkeys")) { + config.bigkeys = 1; + } else if (!strcmp(argv[i],"--memkeys")) { + config.memkeys = 1; + config.memkeys_samples = 0; /* use redis default */ + } else if (!strcmp(argv[i],"--memkeys-samples")) { + config.memkeys = 1; + config.memkeys_samples = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--hotkeys")) { + config.hotkeys = 1; + } else if (!strcmp(argv[i],"--eval") && !lastarg) { + config.eval = argv[++i]; + } else if (!strcmp(argv[i],"--ldb")) { + config.eval_ldb = 1; + config.output = OUTPUT_RAW; + } else if (!strcmp(argv[i],"--ldb-sync-mode")) { + config.eval_ldb = 1; + config.eval_ldb_sync = 1; + config.output = OUTPUT_RAW; + } else if (!strcmp(argv[i],"-c")) { + config.cluster_mode = 1; + } else if (!strcmp(argv[i],"-d") && !lastarg) { + sdsfree(config.mb_delim); + config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"-D") && !lastarg) { + sdsfree(config.cmd_delim); + config.cmd_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"-e")) { + config.set_errcode = 1; + } else if (!strcmp(argv[i],"--verbose")) { + config.verbose = 1; + } else if (!strcmp(argv[i],"--cluster") && !lastarg) { + if (CLUSTER_MANAGER_MODE()) usage(); + char *cmd = argv[++i]; + int j = i; + while (j < argc && argv[j][0] != '-') j++; + if (j > i) j--; + createClusterManagerCommand(cmd, j - i, argv + i + 1); + i = j; + } else if (!strcmp(argv[i],"--cluster") && lastarg) { + usage(); + } else if ((!strcmp(argv[i],"--cluster-only-masters"))) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_MASTERS_ONLY; + } else if ((!strcmp(argv[i],"--cluster-only-replicas"))) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SLAVES_ONLY; + } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { + config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-master-id") && !lastarg) { + config.cluster_manager_command.master_id = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { + config.cluster_manager_command.from = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { + config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-from-user") && !lastarg) { + config.cluster_manager_command.from_user = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-from-pass") && !lastarg) { + config.cluster_manager_command.from_pass = argv[++i]; + } else if (!strcmp(argv[i], "--cluster-from-askpass")) { + config.cluster_manager_command.from_askpass = 1; + } else if (!strcmp(argv[i],"--cluster-weight") && !lastarg) { + if (config.cluster_manager_command.weight != NULL) { + fprintf(stderr, "WARNING: you cannot use --cluster-weight " + "more than once.\n" + "You can set more weights by adding them " + "as a space-separated list, ie:\n" + "--cluster-weight n1=w n2=w\n"); + exit(1); + } + int widx = i + 1; + char **weight = argv + widx; + int wargc = 0; + for (; widx < argc; widx++) { + if (strstr(argv[widx], "--") == argv[widx]) break; + if (strchr(argv[widx], '=') == NULL) break; + wargc++; + } + if (wargc > 0) { + config.cluster_manager_command.weight = weight; + config.cluster_manager_command.weight_argc = wargc; + i += wargc; + } + } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { + config.cluster_manager_command.slots = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { + config.cluster_manager_command.timeout = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { + config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-threshold") && !lastarg) { + config.cluster_manager_command.threshold = atof(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-yes")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_YES; + } else if (!strcmp(argv[i],"--cluster-simulate")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-replace")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_REPLACE; + } else if (!strcmp(argv[i],"--cluster-copy")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_COPY; + } else if (!strcmp(argv[i],"--cluster-slave")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SLAVE; + } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; + } else if (!strcmp(argv[i],"--cluster-search-multiple-owners")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_CHECK_OWNERS; + } else if (!strcmp(argv[i],"--cluster-fix-with-unreachable-masters")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS; +#ifdef USE_OPENSSL + } else if (!strcmp(argv[i],"--tls")) { + config.tls = 1; + } else if (!strcmp(argv[i],"--sni") && !lastarg) { + config.sslconfig.sni = argv[++i]; + } else if (!strcmp(argv[i],"--cacertdir") && !lastarg) { + config.sslconfig.cacertdir = argv[++i]; + } else if (!strcmp(argv[i],"--cacert") && !lastarg) { + config.sslconfig.cacert = argv[++i]; + } else if (!strcmp(argv[i],"--cert") && !lastarg) { + config.sslconfig.cert = argv[++i]; + } else if (!strcmp(argv[i],"--key") && !lastarg) { + config.sslconfig.key = argv[++i]; + } else if (!strcmp(argv[i],"--tls-ciphers") && !lastarg) { + config.sslconfig.ciphers = argv[++i]; + } else if (!strcmp(argv[i],"--insecure")) { + config.sslconfig.skip_cert_verify = 1; + #ifdef TLS1_3_VERSION + } else if (!strcmp(argv[i],"--tls-ciphersuites") && !lastarg) { + config.sslconfig.ciphersuites = argv[++i]; + #endif +#endif + } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { + sds version = cliVersion(); + printf("futriix-cli%s\n", version); + sdsfree(version); + exit(0); + } else if (!strcmp(argv[i],"--no-motd")) { + config.disable_motd = 1; + } else if (!strcmp(argv[i],"-3")) { + config.resp3 = 1; + } else if (!strcmp(argv[i],"--show-pushes") && !lastarg) { + char *argval = argv[++i]; + if (!strncasecmp(argval, "n", 1)) { + config.push_output = 0; + } else if (!strncasecmp(argval, "y", 1)) { + config.push_output = 1; + } else { + fprintf(stderr, "Unknown --show-pushes value '%s' " + "(valid: '[y]es', '[n]o')\n", argval); + } + } else if (!strcmp(argv[i],"--force")) { + config.force_mode = 1; + } else if (CLUSTER_MANAGER_MODE() && argv[i][0] != '-') { + if (config.cluster_manager_command.argc == 0) { + int j = i + 1; + while (j < argc && argv[j][0] != '-') j++; + int cmd_argc = j - i; + config.cluster_manager_command.argc = cmd_argc; + config.cluster_manager_command.argv = argv + i; + if (cmd_argc > 1) i = j - 1; + } + } else { + if (argv[i][0] == '-') { + fprintf(stderr, + "Unrecognized option or bad number of args for: '%s'\n", + argv[i]); + exit(1); + } else { + /* Likely the command name, stop here. */ + break; + } + } + } + + if (config.hostsocket && config.cluster_mode) { + fprintf(stderr,"Options -c and -s are mutually exclusive.\n"); + exit(1); + } + + /* --ldb requires --eval. */ + if (config.eval_ldb && config.eval == NULL) { + fprintf(stderr,"Options --ldb and --ldb-sync-mode require --eval.\n"); + fprintf(stderr,"Try %s --help for more information.\n", argv[0]); + exit(1); + } + + if (!config.no_auth_warning && config.auth != NULL) { + fputs("Warning: Using a password with '-a' or '-u' option on the command" + " line interface may not be safe.\n", stderr); + } + + return i; +} + +static void parseEnv() { + /* Set auth from env, but do not overwrite CLI arguments if passed */ + char *auth = getenv(REDIS_CLI_AUTH_ENV); + if (auth != NULL && config.auth == NULL) { + config.auth = auth; + } + + char *cluster_yes = getenv(REDIS_CLI_CLUSTER_YES_ENV); + if (cluster_yes != NULL && !strcmp(cluster_yes, "1")) { + config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_YES; + } +} + +static sds readArgFromStdin(void) { + char buf[1024]; + sds arg = sdsempty(); + + while(1) { + int nread = read(fileno(stdin),buf,1024); + + if (nread == 0) break; + else if (nread == -1) { + perror("Reading from standard input"); + exit(1); + } + arg = sdscatlen(arg,buf,nread); + } + return arg; +} + +static void usage(void) { + sds version = cliVersion(); + fprintf(stderr, +"futriix-cli%s\n" +"\n" +"Usage: futriix-cli[OPTIONS] [cmd [arg [arg ...]]]\n" +" -h Server hostname (default: 127.0.0.1).\n" +" -p Server port (default: 9880).\n" +" -s Server socket (overrides hostname and port).\n" +" -a Password to use when connecting to the server.\n" +" You can also use the " REDIS_CLI_AUTH_ENV " environment\n" +" variable to pass this password more safely\n" +" (if both are used, this argument takes precedence).\n" +" --user Used to send ACL style 'AUTH username pass'. Needs -a.\n" +" --pass Alias of -a for consistency with the new --user option.\n" +" --askpass Force user to input password with mask from STDIN.\n" +" If this argument is used, '-a' and " REDIS_CLI_AUTH_ENV "\n" +" environment variable will be ignored.\n" +" -u Server URI.\n" +" -r Execute specified command N times.\n" +" -i When -r is used, waits seconds per command.\n" +" It is possible to specify sub-second times like -i 0.1.\n" +" -n Database number.\n" +" -3 Start session in RESP3 protocol mode.\n" +" -x Read last argument from STDIN.\n" +" -d Delimiter between response bulks for raw formatting (default: \\n).\n" +" -D Delimiter between responses for raw formatting (default: \\n).\n" +" -c Enable cluster mode (follow -ASK and -MOVED redirections).\n" +" -e Return exit error code when command execution fails.\n" +#ifdef USE_OPENSSL +" --tls Establish a secure TLS connection.\n" +" --sni Server name indication for TLS.\n" +" --cacert CA Certificate file to verify with.\n" +" --cacertdir Directory where trusted CA certificates are stored.\n" +" If neither cacert nor cacertdir are specified, the default\n" +" system-wide trusted root certs configuration will apply.\n" +" --insecure Allow insecure TLS connection by skipping cert validation.\n" +" --cert Client certificate to authenticate with.\n" +" --key Private key file to authenticate with.\n" +" --tls-ciphers Sets the list of prefered ciphers (TLSv1.2 and below)\n" +" in order of preference from highest to lowest separated by colon (\":\").\n" +" See the ciphers(1ssl) manpage for more information about the syntax of this string.\n" +#ifdef TLS1_3_VERSION +" --tls-ciphersuites Sets the list of prefered ciphersuites (TLSv1.3)\n" +" in order of preference from highest to lowest separated by colon (\":\").\n" +" See the ciphers(1ssl) manpage for more information about the syntax of this string,\n" +" and specifically for TLSv1.3 ciphersuites.\n" +#endif +#endif +" --raw Use raw formatting for replies (default when STDOUT is\n" +" not a tty).\n" +" --no-raw Force formatted output even when STDOUT is not a tty.\n" +" --quoted-input Force input to be handled as quoted strings.\n" +" --csv Output in CSV format.\n" +" --show-pushes Whether to print RESP3 PUSH messages. Enabled by default when\n" +" STDOUT is a tty but can be overriden with --show-pushes no.\n" +" --stat Print rolling stats about server: mem, clients, ...\n" +" --latency Enter a special mode continuously sampling latency.\n" +" If you use this mode in an interactive session it runs\n" +" forever displaying real-time stats. Otherwise if --raw or\n" +" --csv is specified, or if you redirect the output to a non\n" +" TTY, it samples the latency for 1 second (you can use\n" +" -i to change the interval), then produces a single output\n" +" and exits.\n",version); + + fprintf(stderr, +" --latency-history Like --latency but tracking latency changes over time.\n" +" Default time interval is 15 sec. Change it using -i.\n" +" --latency-dist Shows latency as a spectrum, requires xterm 256 colors.\n" +" Default time interval is 1 sec. Change it using -i.\n" +" --lru-test Simulate a cache workload with an 80-20 distribution.\n" +" --replica Simulate a replica showing commands received from the master.\n" +" --rdb Transfer an RDB dump from remote server to local file.\n" +" Use filename of \"-\" to write to stdout.\n" +" --pipe Transfer raw KeyDB protocol from stdin to server.\n" +" --pipe-timeout In --pipe mode, abort with error if after sending all data.\n" +" no reply is received within seconds.\n" +" Default timeout: %d. Use 0 to wait forever.\n", + REDIS_CLI_DEFAULT_PIPE_TIMEOUT); + fprintf(stderr, +" --bigkeys Sample KeyDB keys looking for keys with many elements (complexity).\n" +" --memkeys Sample KeyDB keys looking for keys consuming a lot of memory.\n" +" --memkeys-samples Sample KeyDB keys looking for keys consuming a lot of memory.\n" +" And define number of key elements to sample\n" +" --hotkeys Sample KeyDB keys looking for hot keys.\n" +" only works when maxmemory-policy is *lfu.\n" +" --scan List all keys using the SCAN command.\n" +" --pattern Keys pattern when using the --scan, --bigkeys or --hotkeys\n" +" options (default: *).\n" +" --quoted-pattern Same as --pattern, but the specified string can be\n" +" quoted, in order to pass an otherwise non binary-safe string.\n" +" --intrinsic-latency Run a test to measure intrinsic system latency.\n" +" The test will run for the specified amount of seconds.\n" +" --eval Send an EVAL command using the Lua script at .\n" +" --ldb Used with --eval enable the Redis Lua debugger.\n" +" --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" +" this mode the server is blocked and script changes are\n" +" not rolled back from the server memory.\n" +" --cluster [args...] [opts...]\n" +" Cluster Manager command and arguments (see below).\n" +" --verbose Verbose mode.\n" +" --no-auth-warning Don't show warning message when using password on command\n" +" line interface.\n" +" --force Ignore validation and safety checks\n" +" --help Output this help and exit.\n" +" --version Output version and exit.\n" +"\n"); + /* Using another fprintf call to avoid -Woverlength-strings compile warning */ + fprintf(stderr, +"Cluster Manager Commands:\n" +" Use --cluster help to list all available cluster manager commands.\n" +"\n" +"Examples:\n" +" cat /etc/passwd | futriix-cli-x set mypasswd\n" +" futriix-cliget mypasswd\n" +" futriix-cli-r 100 lpush mylist x\n" +" futriix-cli-r 100 -i 1 info | grep used_memory_human:\n" +" futriix-cli--quoted-input set '\"null-\\x00-separated\"' value\n" +" futriix-cli--eval myscript.lua key1 key2 , arg1 arg2 arg3\n" +" futriix-cli--scan --pattern '*:12345*'\n" +"\n" +" (Note: when using --eval the comma separates KEYS[] from ARGV[] items)\n" +"\n" +"When no command is given, futriix-clistarts in interactive mode.\n" +"Type \"help\" in interactive mode for information on available commands\n" +"and settings.\n" +"\n"); + sdsfree(version); + exit(1); +} + +int confirmWithYes(const char *msg, int ignore_force) { + /* if --cluster-yes option is set and ignore_force is false, + * do not prompt for an answer */ + if (!ignore_force && + (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_YES)) { + return 1; + } + + printf("%s (type 'yes' to accept): ", msg); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + return (nread != 0 && !strcmp("yes", buf)); +} + +/* Create an sds array from argv, either as-is or by dequoting every + * element. When quoted is non-zero, may return a NULL to indicate an + * invalid quoted string. + */ +static sds *getSdsArrayFromArgv(int argc, char **argv, int quoted) { + sds *res = sds_malloc(sizeof(sds) * argc); + + for (int j = 0; j < argc; j++) { + if (quoted) { + sds unquoted = unquoteCString(argv[j]); + if (!unquoted) { + while (--j >= 0) sdsfree(res[j]); + sds_free(res); + return NULL; + } + res[j] = unquoted; + } else { + res[j] = sdsnew(argv[j]); + } + } + + return res; +} + +static int issueCommandRepeat(int argc, char **argv, long repeat) { + while (1) { + if (config.cluster_reissue_command || context == NULL || + context->err == REDIS_ERR_IO || context->err == REDIS_ERR_EOF) + { + if (cliConnect(CC_FORCE) != REDIS_OK) { + cliPrintContextError(); + config.cluster_reissue_command = 0; + return REDIS_ERR; + } + } + config.cluster_reissue_command = 0; + if (config.cluster_send_asking) { + if (cliSendAsking() != REDIS_OK) { + cliPrintContextError(); + return REDIS_ERR; + } + } + if (cliSendCommand(argc,argv,repeat) != REDIS_OK) { + cliPrintContextError(); + return REDIS_ERR; + } + + /* Issue the command again if we got redirected in cluster mode */ + if (config.cluster_mode && config.cluster_reissue_command) { + continue; + } + break; + } + return REDIS_OK; +} + +static int issueCommand(int argc, char **argv) { + return issueCommandRepeat(argc, argv, config.repeat); +} + +/* Split the user provided command into multiple SDS arguments. + * This function normally uses sdssplitargs() from sds.c which is able + * to understand "quoted strings", escapes and so forth. However when + * we are in Lua debugging mode and the "eval" command is used, we want + * the remaining Lua script (after "e " or "eval ") to be passed verbatim + * as a single big argument. */ +static sds *cliSplitArgs(char *line, int *argc) { + if (config.eval_ldb && (strstr(line,"eval ") == line || + strstr(line,"e ") == line)) + { + sds *argv = sds_malloc(sizeof(sds)*2); + *argc = 2; + int len = strlen(line); + int elen = line[1] == ' ' ? 2 : 5; /* "e " or "eval "? */ + argv[0] = sdsnewlen(line,elen-1); + argv[1] = sdsnewlen(line+elen,len-elen); + return argv; + } else { + return sdssplitargs(line,argc); + } +} + +/* Set the CLI preferences. This function is invoked when an interactive + * ":command" is called, or when reading ~/.redisclirc file, in order to + * set user preferences. */ +void cliSetPreferences(char **argv, int argc, int interactive) { + if (!strcasecmp(argv[0],":set") && argc >= 2) { + if (!strcasecmp(argv[1],"hints")) pref.hints = 1; + else if (!strcasecmp(argv[1],"nohints")) pref.hints = 0; + else { + printf("%sunknown futriix-clipreference '%s'\n", + interactive ? "" : ".redisclirc: ", + argv[1]); + } + } else { + printf("%sunknown futriix-cliinternal command '%s'\n", + interactive ? "" : ".redisclirc: ", + argv[0]); + } +} + +/* Load the ~/.redisclirc file if any. */ +void cliLoadPreferences(void) { + sds rcfile = getDotfilePath(REDIS_CLI_RCFILE_ENV,REDIS_CLI_RCFILE_DEFAULT); + if (rcfile == NULL) return; + FILE *fp = fopen(rcfile,"r"); + char buf[1024]; + + if (fp) { + while(fgets(buf,sizeof(buf),fp) != NULL) { + sds *argv; + int argc; + + argv = sdssplitargs(buf,&argc); + if (argc > 0) cliSetPreferences(argv,argc,0); + sdsfreesplitres(argv,argc); + } + fclose(fp); + } + sdsfree(rcfile); +} + +static void repl(void) { + sds historyfile = NULL; + int history = 0; + char *line; + int argc; + sds *argv; + + /* Initialize the help and, if possible, use the COMMAND command in order + * to retrieve missing entries. */ + cliInitHelp(); + cliIntegrateHelp(); + + config.interactive = 1; + linenoiseSetMultiLine(1); + linenoiseSetCompletionCallback(completionCallback); + linenoiseSetHintsCallback(hintsCallback); + linenoiseSetFreeHintsCallback(freeHintsCallback); + + /* Only use history and load the rc file when stdin is a tty. */ + if (isatty(fileno(stdin))) { + historyfile = getDotfilePath(REDIS_CLI_HISTFILE_ENV,REDIS_CLI_HISTFILE_DEFAULT); + //keep in-memory history always regardless if history file can be determined + history = 1; + if (historyfile != NULL) { + linenoiseHistoryLoad(historyfile); + } + cliLoadPreferences(); + } + + cliRefreshPrompt(); + while((line = linenoise(context ? config.prompt : "not connected> ")) != NULL) { + if (line[0] != '\0') { + long repeat = 1; + int skipargs = 0; + char *endptr = NULL; + + argv = cliSplitArgs(line,&argc); + + /* check if we have a repeat command option and + * need to skip the first arg */ + if (argv && argc > 0) { + errno = 0; + repeat = strtol(argv[0], &endptr, 10); + if (argc > 1 && *endptr == '\0') { + if (errno == ERANGE || errno == EINVAL || repeat <= 0) { + fputs("Invalid futriix-clirepeat command option value.\n", stdout); + sdsfreesplitres(argv, argc); + linenoiseFree(line); + continue; + } + skipargs = 1; + } else { + repeat = 1; + } + } + + /* Won't save auth or acl setuser commands in history file */ + int dangerous = 0; + if (argv && argc > 0) { + if (!strcasecmp(argv[skipargs], "auth")) { + dangerous = 1; + } else if (skipargs+1 < argc && + !strcasecmp(argv[skipargs], "acl") && + !strcasecmp(argv[skipargs+1], "setuser")) + { + dangerous = 1; + } + } + + if (!dangerous) { + if (history) linenoiseHistoryAdd(line); + if (historyfile) linenoiseHistorySave(historyfile); + } + + if (argv == NULL) { + printf("Invalid argument(s)\n"); + fflush(stdout); + linenoiseFree(line); + continue; + } else if (argc > 0) { + if (strcasecmp(argv[0],"quit") == 0 || + strcasecmp(argv[0],"exit") == 0) + { + exit(0); + } else if (argv[0][0] == ':') { + cliSetPreferences(argv,argc,1); + sdsfreesplitres(argv,argc); + linenoiseFree(line); + continue; + } else if (strcasecmp(argv[0],"restart") == 0) { + if (config.eval) { + config.eval_ldb = 1; + config.output = OUTPUT_RAW; + sdsfreesplitres(argv,argc); + linenoiseFree(line); + return; /* Return to evalMode to restart the session. */ + } else { + printf("Use 'restart' only in Lua debugging mode."); + } + } else if (argc == 3 && !strcasecmp(argv[0],"connect")) { + sdsfree(config.hostip); + config.hostip = sdsnew(argv[1]); + config.hostport = atoi(argv[2]); + cliRefreshPrompt(); + cliConnect(CC_FORCE); + } else if (argc == 1 && !strcasecmp(argv[0],"clear")) { + linenoiseClearScreen(); + } else { + long long start_time = mstime(), elapsed; + + issueCommandRepeat(argc-skipargs, argv+skipargs, repeat); + + /* If our debugging session ended, show the EVAL final + * reply. */ + if (config.eval_ldb_end) { + config.eval_ldb_end = 0; + cliReadReply(0); + printf("\n(Lua debugging session ended%s)\n\n", + config.eval_ldb_sync ? "" : + " -- dataset changes rolled back"); + } + + elapsed = mstime()-start_time; + if (elapsed >= 500 && + config.output == OUTPUT_STANDARD) + { + printf("(%.2fs)\n",(double)elapsed/1000); + } + } + } + /* Free the argument vector */ + sdsfreesplitres(argv,argc); + } + /* linenoise() returns malloc-ed lines like readline() */ + linenoiseFree(line); + } + exit(0); +} + +static int noninteractive(int argc, char **argv) { + int retval = 0; + sds *sds_args = getSdsArrayFromArgv(argc, argv, config.quoted_input); + if (!sds_args) { + printf("Invalid quoted string\n"); + return 1; + } + if (config.stdinarg) { + sds_args = sds_realloc(sds_args, (argc + 1) * sizeof(sds)); + sds_args[argc] = readArgFromStdin(); + argc++; + } + + retval = issueCommand(argc, sds_args); + sdsfreesplitres(sds_args, argc); + return retval; +} + +/*------------------------------------------------------------------------------ + * Eval mode + *--------------------------------------------------------------------------- */ + +static int evalMode(int argc, char **argv) { + sds script = NULL; + FILE *fp; + char buf[1024]; + size_t nread; + char **argv2; + int j, got_comma, keys; + int retval = REDIS_OK; + + while(1) { + if (config.eval_ldb) { + printf( + "Lua debugging session started, please use:\n" + "quit -- End the session.\n" + "restart -- Restart the script in debug mode again.\n" + "help -- Show Lua script debugging commands.\n\n" + ); + } + + sdsfree(script); + script = sdsempty(); + got_comma = 0; + keys = 0; + + /* Load the script from the file, as an sds string. */ + fp = fopen(config.eval,"r"); + if (!fp) { + fprintf(stderr, + "Can't open file '%s': %s\n", config.eval, strerror(errno)); + exit(1); + } + while((nread = fread(buf,1,sizeof(buf),fp)) != 0) { + script = sdscatlen(script,buf,nread); + } + fclose(fp); + + /* If we are debugging a script, enable the Lua debugger. */ + if (config.eval_ldb) { + redisReply *reply = redisCommand(context, + config.eval_ldb_sync ? + "SCRIPT DEBUG sync": "SCRIPT DEBUG yes"); + if (reply) freeReplyObject(reply); + } + + /* Create our argument vector */ + argv2 = zmalloc(sizeof(sds)*(argc+3), MALLOC_LOCAL); + argv2[0] = sdsnew("EVAL"); + argv2[1] = script; + for (j = 0; j < argc; j++) { + if (!got_comma && argv[j][0] == ',' && argv[j][1] == 0) { + got_comma = 1; + continue; + } + argv2[j+3-got_comma] = sdsnew(argv[j]); + if (!got_comma) keys++; + } + argv2[2] = sdscatprintf(sdsempty(),"%d",keys); + + /* Call it */ + int eval_ldb = config.eval_ldb; /* Save it, may be reverted. */ + retval = issueCommand(argc+3-got_comma, argv2); + if (eval_ldb) { + if (!config.eval_ldb) { + /* If the debugging session ended immediately, there was an + * error compiling the script. Show it and they don't enter + * the REPL at all. */ + printf("Eval debugging session can't start:\n"); + cliReadReply(0); + break; /* Return to the caller. */ + } else { + strncpy(config.prompt,"lua debugger> ",sizeof(config.prompt)); + repl(); + /* Restart the session if repl() returned. */ + cliConnect(CC_FORCE); + printf("\n"); + } + } else { + break; /* Return to the caller. */ + } + } + return retval; +} + +/*------------------------------------------------------------------------------ + * Cluster Manager + *--------------------------------------------------------------------------- */ + +/* The Cluster Manager global structure */ +struct cluster_manager; + + +typedef int clusterManagerCommandProc(int argc, char **argv); +typedef int (*clusterManagerOnReplyError)(redisReply *reply, + clusterManagerNode *n, int bulk_idx); + +/* Cluster Manager helper functions */ + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); +static void clusterManagerNodeResetSlots(clusterManagerNode *node); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_count); +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); +static void clusterManagerShowClusterInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +void clusterManagerWaitForClusterJoin(void); +int clusterManagerCheckCluster(int quiet); +void clusterManagerOnError(sds err); +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int len); +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array); +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr); +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node); + +/* Cluster Manager commands. */ + +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandAddNode(int argc, char **argv); +static int clusterManagerCommandDeleteNode(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandFix(int argc, char **argv); +static int clusterManagerCommandReshard(int argc, char **argv); +static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandSetTimeout(int argc, char **argv); +static int clusterManagerCommandImport(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); +static int clusterManagerCommandBackup(int argc, char **argv); + +typedef struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; + char *args; + char *options; +} clusterManagerCommandDef; + +clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", + "replicas "}, + {"check", clusterManagerCommandCheck, -1, "host:port", + "search-multiple-owners"}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", + "search-multiple-owners,fix-with-unreachable-masters"}, + {"reshard", clusterManagerCommandReshard, -1, "host:port", + "from ,to ,slots ,yes,timeout ,pipeline ," + "replace"}, + {"rebalance", clusterManagerCommandRebalance, -1, "host:port", + "weight ,use-empty-masters," + "timeout ,simulate,pipeline ,threshold ,replace"}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, + {"call", clusterManagerCommandCall, -2, + "host:port command arg arg .. arg", "only-masters,only-replicas"}, + {"set-timeout", clusterManagerCommandSetTimeout, 2, + "host:port milliseconds", NULL}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,from-user ,from-pass ,from-askpass,copy,replace"}, + {"backup", clusterManagerCommandBackup, 2, "host:port backup_directory", + NULL}, + {"help", clusterManagerCommandHelp, 0, NULL, NULL} +}; + +typedef struct clusterManagerOptionDef { + char *name; + char *desc; +} clusterManagerOptionDef; + +clusterManagerOptionDef clusterManagerOptions[] = { + {"--cluster-yes", "Automatic yes to cluster commands prompts"} +}; + +static void getRDB(clusterManagerNode *node); + +void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; +} + + +static clusterManagerCommandProc *validateClusterManagerCommand(void) { + int i, commands_count = sizeof(clusterManagerCommands) / + sizeof(clusterManagerCommandDef); + clusterManagerCommandProc *proc = NULL; + char *cmdname = config.cluster_manager_command.name; + int argc = config.cluster_manager_command.argc; + for (i = 0; i < commands_count; i++) { + clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + if (!strcmp(cmddef.name, cmdname)) { + if ((cmddef.arity > 0 && argc != cmddef.arity) || + (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { + fprintf(stderr, "[ERR] Wrong number of arguments for " + "specified --cluster sub command\n"); + return NULL; + } + proc = cmddef.proc; + } + } + if (!proc) fprintf(stderr, "Unknown --cluster subcommand\n"); + return proc; +} + +int parseClusterNodeAddress(char *addr, char **ip_ptr, int *port_ptr, + int *bus_port_ptr) +{ + char *c = strrchr(addr, '@'); + if (c != NULL) { + *c = '\0'; + if (bus_port_ptr != NULL) + *bus_port_ptr = atoi(c + 1); + } + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + *ip_ptr = addr; + *port_ptr = atoi(++c); + } else return 0; + return 1; +} + +/* Get host ip and port from command arguments. If only one argument has + * been provided it must be in the form of 'ip:port', elsewhere + * the first argument must be the ip and the second one the port. + * If host and port can be detected, it returns 1 and it stores host and + * port into variables referenced by'ip_ptr' and 'port_ptr' pointers, + * elsewhere it returns 0. */ +static int getClusterHostFromCmdArgs(int argc, char **argv, + char **ip_ptr, int *port_ptr) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + if (!parseClusterNodeAddress(addr, &ip, &port, NULL)) return 0; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) return 0; + else { + *ip_ptr = ip; + *port_ptr = port; + } + return 1; +} + +static void freeClusterManagerNodeFlags(list *flags) { + listIter li; + listNode *ln; + listRewind(flags, &li); + while ((ln = listNext(&li)) != NULL) { + sds flag = ln->value; + sdsfree(flag); + } + listRelease(flags); +} + +void freeClusterManagerNode(clusterManagerNode *node) { + if (node->context != NULL) redisFree(node->context); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *fn = ln->value; + freeClusterManagerNode(fn); + } + listRelease(node->friends); + node->friends = NULL; + } + if (node->name != NULL) sdsfree(node->name); + if (node->replicate != NULL) sdsfree(node->replicate); + if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) + sdsfree(node->ip); + int i; + if (node->migrating != NULL) { + for (i = 0; i < node->migrating_count; i++) sdsfree(node->migrating[i]); + zfree(node->migrating); + } + if (node->importing != NULL) { + for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]); + zfree(node->importing); + } + if (node->flags_str != NULL) { + freeClusterManagerNodeFlags(node->flags_str); + node->flags_str = NULL; + } + zfree(node); +} + +void freeClusterManager(void); + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { + clusterManagerNode *node = zmalloc(sizeof(*node), MALLOC_LOCAL); + node->context = NULL; + node->name = NULL; + node->ip = ip; + node->port = port; + node->current_epoch = 0; + node->ping_sent = 0; + node->ping_recv = 0; + node->flags = 0; + node->flags_str = NULL; + node->replicate = NULL; + node->dirty = 0; + node->friends = NULL; + node->migrating = NULL; + node->importing = NULL; + node->migrating_count = 0; + node->importing_count = 0; + node->replicas_count = 0; + node->weight = 1.0f; + node->balance = 0; + clusterManagerNodeResetSlots(node); + return node; +} + +static sds clusterManagerGetNodeRDBFilename(clusterManagerNode *node) { + assert(config.cluster_manager_command.backup_dir); + sds filename = sdsnew(config.cluster_manager_command.backup_dir); + if (filename[sdslen(filename) - 1] != '/') + filename = sdscat(filename, "/"); + filename = sdscatprintf(filename, "redis-node-%s-%d-%s.rdb", node->ip, + node->port, node->name); + return filename; +} + +/* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the + * latest case, if the 'err' arg is not NULL, it gets allocated with a copy + * of reply error (it's up to the caller function to free it), elsewhere + * the error is directly printed. */ +int clusterManagerCheckRedisReply(clusterManagerNode *n, + redisReply *r, char **err) +{ + int is_err = 0; + if (!r || (is_err = (r->type == REDIS_REPLY_ERROR))) { + if (is_err) { + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char), MALLOC_LOCAL); + strcpy(*err, r->str); + } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, r->str); + } + return 0; + } + return 1; +} + +/* Call MULTI command on a cluster node. */ +static int clusterManagerStartTransaction(clusterManagerNode *node) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "MULTI"); + int success = clusterManagerCheckRedisReply(node, reply, NULL); + if (reply) freeReplyObject(reply); + return success; +} + +/* Call EXEC command on a cluster node. */ +static int clusterManagerExecTransaction(clusterManagerNode *node, + clusterManagerOnReplyError onerror) +{ + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "EXEC"); + int success = clusterManagerCheckRedisReply(node, reply, NULL); + if (success) { + if (reply->type != REDIS_REPLY_ARRAY) { + success = 0; + goto cleanup; + } + size_t i; + for (i = 0; i < reply->elements; i++) { + redisReply *r = reply->element[i]; + char *err = NULL; + success = clusterManagerCheckRedisReply(node, r, &err); + if (!success && onerror) success = onerror(r, node, i); + if (err) { + if (!success) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + if (!success) break; + } + } +cleanup: + if (reply) freeReplyObject(reply); + return success; +} + +static int clusterManagerNodeConnect(clusterManagerNode *node) { + if (node->context) redisFree(node->context); + struct timeval tv; + tv.tv_sec = config.cluster_manager_command.timeout / 1000; + tv.tv_usec = (config.cluster_manager_command.timeout % 1000) * 1000; + node->context = redisConnectWithTimeout(node->ip, node->port, tv); + if (!node->context->err && config.tls) { + const char *err = NULL; + if (cliSecureConnection(node->context, config.sslconfig, &err) == REDIS_ERR && err) { + fprintf(stderr,"TLS Error: %s\n", err); + redisFree(node->context); + node->context = NULL; + return 0; + } + } + if (node->context->err) { + fprintf(stderr,"Could not connect to KeyDB at "); + fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, + node->context->errstr); + redisFree(node->context); + node->context = NULL; + return 0; + } + /* Set aggressive KEEP_ALIVE socket option in the Redis context socket + * in order to prevent timeouts caused by the execution of long + * commands. At the same time this improves the detection of real + * errors. */ + anetKeepAlive(NULL, node->context->fd, REDIS_CLI_KEEPALIVE_INTERVAL); + if (config.auth) { + redisReply *reply; + if (config.user == NULL) + reply = redisCommand(node->context,"AUTH %s", config.auth); + else + reply = redisCommand(node->context,"AUTH %s %s", + config.user,config.auth); + int ok = clusterManagerCheckRedisReply(node, reply, NULL); + if (reply != NULL) freeReplyObject(reply); + if (!ok) return 0; + } + return 1; +} + +static void clusterManagerRemoveNodeFromList(list *nodelist, + clusterManagerNode *node) { + listIter li; + listNode *ln; + listRewind(nodelist, &li); + while ((ln = listNext(&li)) != NULL) { + if (node == ln->value) { + listDelNode(nodelist, ln); + break; + } + } +} + +/* Return the node with the specified name (ID) or NULL. */ +static clusterManagerNode *clusterManagerNodeByName(const char *name) { + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && !sdscmp(n->name, lcname)) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + +/* Like clusterManagerNodeByName but the specified name can be just the first + * part of the node ID as long as the prefix in unique across the + * cluster. + */ +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char*name) +{ + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && + strstr(n->name, lcname) == n->name) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + +static void clusterManagerNodeResetSlots(clusterManagerNode *node) { + memset(node->slots, 0, sizeof(node->slots)); + node->slots_count = 0; +} + +/* Call "INFO" redis command on the specified node and return the reply. */ +static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, + char **err) +{ + redisReply *info = CLUSTER_MANAGER_COMMAND(node, "INFO"); + if (err != NULL) *err = NULL; + if (info == NULL) return NULL; + if (info->type == REDIS_REPLY_ERROR) { + if (err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char), MALLOC_LOCAL); + strcpy(*err, info->str); + } + freeReplyObject(info); + return NULL; + } + return info; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); + if (info == NULL) return 0; + int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); + freeReplyObject(info); + return is_cluster; +} + +/* Checks whether the node is empty. Node is considered not-empty if it has + * some key or if it already knows other nodes */ +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); + int is_empty = 1; + if (info == NULL) return 0; + if (strstr(info->str, "db0:") != NULL) { + is_empty = 0; + goto result; + } + freeReplyObject(info); + info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (err != NULL) *err = NULL; + if (!clusterManagerCheckRedisReply(node, info, err)) { + is_empty = 0; + goto result; + } + long known_nodes = getLongInfoField(info->str, "cluster_known_nodes"); + is_empty = (known_nodes == 1); +result: + freeReplyObject(info); + return is_empty; +} + +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_count) +{ + clusterManagerNode **offenders = NULL; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, + NULL, NULL); + if (score == 0) goto cleanup; + clusterManagerLogInfo(">>> Trying to optimize slaves allocation " + "for anti-affinity\n"); + int node_len = cluster_manager.nodes->len; + int maxiter = 500 * node_len; // Effort is proportional to cluster size... + srand(time(NULL)); + while (maxiter > 0) { + int offending_len = 0; + if (offenders != NULL) { + zfree(offenders); + offenders = NULL; + } + score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, + &offenders, + &offending_len); + if (score == 0 || offending_len == 0) break; // Optimal anti affinity reached + /* We'll try to randomly swap a slave's assigned master causing + * an affinity problem with another random slave, to see if we + * can improve the affinity. */ + int rand_idx = rand() % offending_len; + clusterManagerNode *first = offenders[rand_idx], + *second = NULL; + clusterManagerNode **other_replicas = zcalloc((node_len - 1) * + sizeof(*other_replicas), MALLOC_LOCAL); + int other_replicas_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n != first && n->replicate != NULL) + other_replicas[other_replicas_count++] = n; + } + if (other_replicas_count == 0) { + zfree(other_replicas); + break; + } + rand_idx = rand() % other_replicas_count; + second = other_replicas[rand_idx]; + char *first_master = first->replicate, + *second_master = second->replicate; + first->replicate = second_master, first->dirty = 1; + second->replicate = first_master, second->dirty = 1; + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, + NULL, NULL); + /* If the change actually makes thing worse, revert. Otherwise + * leave as it is because the best solution may need a few + * combined swaps. */ + if (new_score > score) { + first->replicate = first_master; + second->replicate = second_master; + } + zfree(other_replicas); + maxiter--; + } + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL); + char *msg; + int perfect = (score == 0); + int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : + CLUSTER_MANAGER_LOG_LVL_WARN); + if (perfect) msg = "[OK] Perfect anti-affinity obtained!"; + else if (score >= 10000) + msg = ("[WARNING] Some slaves are in the same host as their master"); + else + msg=("[WARNING] Some slaves of the same master are in the same host"); + clusterManagerLog(log_level, "%s\n", msg); +cleanup: + zfree(offenders); +} + +/* Return a representable string of the node's flags */ +static sds clusterManagerNodeFlagString(clusterManagerNode *node) { + sds flags = sdsempty(); + if (!node->flags_str) return flags; + int empty = 1; + listIter li; + listNode *ln; + listRewind(node->flags_str, &li); + while ((ln = listNext(&li)) != NULL) { + sds flag = ln->value; + if (strcmp(flag, "myself") == 0) continue; + if (!empty) flags = sdscat(flags, ","); + flags = sdscatfmt(flags, "%S", flag); + empty = 0; + } + return flags; +} + +/* Return a representable string of the node's slots */ +static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { + sds slots = sdsempty(); + int first_range_idx = -1, last_slot_idx = -1, i; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int has_slot = node->slots[i]; + if (has_slot) { + if (first_range_idx == -1) { + if (sdslen(slots)) slots = sdscat(slots, ","); + first_range_idx = i; + slots = sdscatfmt(slots, "[%u", i); + } + last_slot_idx = i; + } else { + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) + slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + last_slot_idx = -1; + first_range_idx = -1; + } + } + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + return slots; +} + +static sds clusterManagerNodeGetJSON(clusterManagerNode *node, + unsigned long error_count) +{ + sds json = sdsempty(); + sds replicate = sdsempty(); + if (node->replicate) + replicate = sdscatprintf(replicate, "\"%s\"", node->replicate); + else + replicate = sdscat(replicate, "null"); + sds slots = clusterManagerNodeSlotsString(node); + sds flags = clusterManagerNodeFlagString(node); + char *p = slots; + while ((p = strchr(p, '-')) != NULL) + *(p++) = ','; + json = sdscatprintf(json, + " {\n" + " \"name\": \"%s\",\n" + " \"host\": \"%s\",\n" + " \"port\": %d,\n" + " \"replicate\": %s,\n" + " \"slots\": [%s],\n" + " \"slots_count\": %d,\n" + " \"flags\": \"%s\",\n" + " \"current_epoch\": %llu", + node->name, + node->ip, + node->port, + replicate, + slots, + node->slots_count, + flags, + (unsigned long long)node->current_epoch + ); + if (error_count > 0) { + json = sdscatprintf(json, ",\n \"cluster_errors\": %lu", + error_count); + } + if (node->migrating_count > 0 && node->migrating != NULL) { + int i = 0; + sds migrating = sdsempty(); + for (; i < node->migrating_count; i += 2) { + sds slot = node->migrating[i]; + sds dest = node->migrating[i + 1]; + if (slot && dest) { + if (sdslen(migrating) > 0) migrating = sdscat(migrating, ","); + migrating = sdscatfmt(migrating, "\"%S\": \"%S\"", slot, dest); + } + } + if (sdslen(migrating) > 0) + json = sdscatfmt(json, ",\n \"migrating\": {%S}", migrating); + sdsfree(migrating); + } + if (node->importing_count > 0 && node->importing != NULL) { + int i = 0; + sds importing = sdsempty(); + for (; i < node->importing_count; i += 2) { + sds slot = node->importing[i]; + sds from = node->importing[i + 1]; + if (slot && from) { + if (sdslen(importing) > 0) importing = sdscat(importing, ","); + importing = sdscatfmt(importing, "\"%S\": \"%S\"", slot, from); + } + } + if (sdslen(importing) > 0) + json = sdscatfmt(json, ",\n \"importing\": {%S}", importing); + sdsfree(importing); + } + json = sdscat(json, "\n }"); + sdsfree(replicate); + sdsfree(slots); + sdsfree(flags); + return json; +} + + +/* ----------------------------------------------------------------------------- + * Key space handling + * -------------------------------------------------------------------------- */ + +/* We have 16384 hash slots. The hash slot of a given key is obtained + * as the least significant 14 bits of the crc16 of the key. + * + * However if the key contains the {...} pattern, only the part between + * { and } is hashed. This may be useful in the future to force certain + * keys to be in the same node (assuming no resharding is in progress). */ +static unsigned int clusterManagerKeyHashSlot(char *key, int keylen) { + int s, e; /* start-end indexes of { and } */ + + for (s = 0; s < keylen; s++) + if (key[s] == '{') break; + + /* No '{' ? Hash the whole key. This is the base case. */ + if (s == keylen) return crc16(key,keylen) & 0x3FFF; + + /* '{' found? Check if we have the corresponding '}'. */ + for (e = s+1; e < keylen; e++) + if (key[e] == '}') break; + + /* No '}' or nothing between {} ? Hash the whole key. */ + if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF; + + /* If we are here there is both a { and a } on its right. Hash + * what is in the middle between { and }. */ + return crc16(key+s+1,e-s-1) & 0x3FFF; +} + +/* Return a string representation of the cluster node. */ +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { + sds info = sdsempty(); + sds spaces = sdsempty(); + int i; + for (i = 0; i < indent; i++) spaces = sdscat(spaces, " "); + if (indent) info = sdscat(info, spaces); + int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); + char *role = (is_master ? "M" : "S"); + sds slots = NULL; + if (node->dirty && node->replicate != NULL) + info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port); + else { + slots = clusterManagerNodeSlotsString(node); + sds flags = clusterManagerNodeFlagString(node); + info = sdscatfmt(info, "%s: %S %s:%u\n" + "%s slots:%S (%u slots) " + "%S", + role, node->name, node->ip, node->port, spaces, + slots, node->slots_count, flags); + sdsfree(slots); + sdsfree(flags); + } + if (node->replicate != NULL) + info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); + else if (node->replicas_count) + info = sdscatfmt(info, "\n%s %U additional replica(s)", + spaces, node->replicas_count); + sdsfree(spaces); + return info; +} + +void clusterManagerShowNodes(void) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds info = clusterManagerNodeInfo(node, 0); + printf("%s\n", (char *) info); + sdsfree(info); + } +} + +static void clusterManagerShowClusterInfo(void) { + int masters = 0; + int keys = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) { + if (!node->name) continue; + int replicas = 0; + int dbsize = -1; + char name[9]; + memcpy(name, node->name, 8); + name[8] = '\0'; + listIter ri; + listNode *rn; + listRewind(cluster_manager.nodes, &ri); + while ((rn = listNext(&ri)) != NULL) { + clusterManagerNode *n = rn->value; + if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_SLAVE)) + continue; + if (n->replicate && !strcmp(n->replicate, node->name)) + replicas++; + } + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "DBSIZE"); + if (reply != NULL && reply->type == REDIS_REPLY_INTEGER) + dbsize = reply->integer; + if (dbsize < 0) { + char *err = ""; + if (reply != NULL && reply->type == REDIS_REPLY_ERROR) + err = reply->str; + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + if (reply != NULL) freeReplyObject(reply); + return; + }; + if (reply != NULL) freeReplyObject(reply); + printf("%s:%d (%s...) -> %d keys | %d slots | %d slaves.\n", + node->ip, node->port, name, dbsize, + node->slots_count, replicas); + masters++; + keys += dbsize; + } + } + clusterManagerLogOk("[OK] %d keys in %d masters.\n", keys, masters); + float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; + printf("%.2f keys per slot on average.\n", keys_per_slot); +} + +/* Flush dirty slots configuration of the node by calling CLUSTER ADDSLOTS */ +static int clusterManagerAddSlots(clusterManagerNode *node, char**err) +{ + redisReply *reply = NULL; + void *_reply = NULL; + int success = 1; + /* First two args are used for the command itself. */ + int argc = node->slots_count + 2; + sds *argv = zmalloc(argc * sizeof(*argv), MALLOC_LOCAL); + size_t *argvlen = zmalloc(argc * sizeof(*argvlen), MALLOC_LOCAL); + argv[0] = "CLUSTER"; + argv[1] = "ADDSLOTS"; + argvlen[0] = 7; + argvlen[1] = 8; + *err = NULL; + int i, argv_idx = 2; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + if (argv_idx >= argc) break; + if (node->slots[i]) { + argv[argv_idx] = sdsfromlonglong((long long) i); + argvlen[argv_idx] = sdslen(argv[argv_idx]); + argv_idx++; + } + } + if (!argv_idx) { + success = 0; + goto cleanup; + } + redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); + if (redisGetReply(node->context, &_reply) != REDIS_OK) { + success = 0; + goto cleanup; + } + reply = (redisReply*) _reply; + success = clusterManagerCheckRedisReply(node, reply, err); +cleanup: + zfree(argvlen); + if (argv != NULL) { + for (i = 2; i < argc; i++) sdsfree(argv[i]); + zfree(argv); + } + if (reply != NULL) freeReplyObject(reply); + return success; +} + +/* Get the node the slot is assigned to from the point of view of node *n. + * If the slot is unassigned or if the reply is an error, return NULL. + * Use the **err argument in order to check wether the slot is unassigned + * or the reply resulted in an error. */ +static clusterManagerNode *clusterManagerGetSlotOwner(clusterManagerNode *n, + int slot, char **err) +{ + assert(slot >= 0 && slot < CLUSTER_MANAGER_SLOTS); + clusterManagerNode *owner = NULL; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SLOTS"); + if (clusterManagerCheckRedisReply(n, reply, err)) { + assert(reply->type == REDIS_REPLY_ARRAY); + size_t i; + for (i = 0; i < reply->elements; i++) { + redisReply *r = reply->element[i]; + assert(r->type == REDIS_REPLY_ARRAY && r->elements >= 3); + int from, to; + from = r->element[0]->integer; + to = r->element[1]->integer; + if (slot < from || slot > to) continue; + redisReply *nr = r->element[2]; + assert(nr->type == REDIS_REPLY_ARRAY && nr->elements >= 2); + char *name = NULL; + if (nr->elements >= 3) + name = nr->element[2]->str; + if (name != NULL) + owner = clusterManagerNodeByName(name); + else { + char *ip = nr->element[0]->str; + assert(ip != NULL); + int port = (int) nr->element[1]->integer; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *nd = ln->value; + if (strcmp(nd->ip, ip) == 0 && port == nd->port) { + owner = nd; + break; + } + } + } + if (owner) break; + } + } + if (reply) freeReplyObject(reply); + return owner; +} + +/* Set slot status to "importing" or "migrating" */ +int clusterManagerSetSlot(clusterManagerNode *node1, + clusterManagerNode *node2, + int slot, const char *status, char **err) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " + "SETSLOT %d %s %s", + slot, status, + (char *) node2->name); + if (err != NULL) *err = NULL; + if (!reply) return 0; + int success = 1; + if (reply->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char), MALLOC_LOCAL); + strcpy(*err, reply->str); + } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(node1, reply->str); + goto cleanup; + } +cleanup: + freeReplyObject(reply); + return success; +} + +int clusterManagerClearSlotStatus(clusterManagerNode *node, int slot) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, + "CLUSTER SETSLOT %d %s", slot, "STABLE"); + int success = clusterManagerCheckRedisReply(node, reply, NULL); + if (reply) freeReplyObject(reply); + return success; +} + +static int clusterManagerDelSlot(clusterManagerNode *node, int slot, + int ignore_unassigned_err) +{ + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, + "CLUSTER DELSLOTS %d", slot); + char *err = NULL; + int success = clusterManagerCheckRedisReply(node, reply, &err); + if (!success && reply && reply->type == REDIS_REPLY_ERROR && + ignore_unassigned_err) + { + char *get_owner_err = NULL; + clusterManagerNode *assigned_to = + clusterManagerGetSlotOwner(node, slot, &get_owner_err); + if (!assigned_to) { + if (get_owner_err == NULL) success = 1; + else { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, get_owner_err); + zfree(get_owner_err); + } + } + } + if (!success && err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + if (reply) freeReplyObject(reply); + return success; +} + +static int clusterManagerAddSlot(clusterManagerNode *node, int slot) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, + "CLUSTER ADDSLOTS %d", slot); + int success = clusterManagerCheckRedisReply(node, reply, NULL); + if (reply) freeReplyObject(reply); + return success; +} + +signed int clusterManagerCountKeysInSlot(clusterManagerNode *node, + int slot) +{ + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, + "CLUSTER COUNTKEYSINSLOT %d", slot); + int count = -1; + int success = clusterManagerCheckRedisReply(node, reply, NULL); + if (success && reply->type == REDIS_REPLY_INTEGER) count = reply->integer; + if (reply) freeReplyObject(reply); + return count; +} + +static int clusterManagerBumpEpoch(clusterManagerNode *node) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER BUMPEPOCH"); + int success = clusterManagerCheckRedisReply(node, reply, NULL); + if (reply) freeReplyObject(reply); + return success; +} + +/* Callback used by clusterManagerSetSlotOwner transaction. It should ignore + * errors except for ADDSLOTS errors. + * Return 1 if the error should be ignored. */ +static int clusterManagerOnSetOwnerErr(redisReply *reply, + clusterManagerNode *n, int bulk_idx) +{ + UNUSED(reply); + UNUSED(n); + /* Only raise error when ADDSLOTS fail (bulk_idx == 1). */ + return (bulk_idx != 1); +} + +int clusterManagerSetSlotOwner(clusterManagerNode *owner, + int slot, + int do_clear) +{ + int success = clusterManagerStartTransaction(owner); + if (!success) return 0; + /* Ensure the slot is not already assigned. */ + clusterManagerDelSlot(owner, slot, 1); + /* Add the slot and bump epoch. */ + clusterManagerAddSlot(owner, slot); + if (do_clear) clusterManagerClearSlotStatus(owner, slot); + clusterManagerBumpEpoch(owner); + success = clusterManagerExecTransaction(owner, clusterManagerOnSetOwnerErr); + return success; +} + +/* Get the hash for the values of the specified keys in *keys_reply for the + * specified nodes *n1 and *n2, by calling DEBUG DIGEST-VALUE redis command + * on both nodes. Every key with same name on both nodes but having different + * values will be added to the *diffs list. Return 0 in case of reply + * error. */ +static int clusterManagerCompareKeysValues(clusterManagerNode *n1, + clusterManagerNode *n2, + redisReply *keys_reply, + list *diffs) +{ + size_t i, argc = keys_reply->elements + 2; + static const char *hash_zero = "0000000000000000000000000000000000000000"; + char **argv = zcalloc(argc * sizeof(char *), MALLOC_LOCAL); + size_t *argv_len = zcalloc(argc * sizeof(size_t), MALLOC_LOCAL); + argv[0] = "DEBUG"; + argv_len[0] = 5; + argv[1] = "DIGEST-VALUE"; + argv_len[1] = 12; + for (i = 0; i < keys_reply->elements; i++) { + redisReply *entry = keys_reply->element[i]; + int idx = i + 2; + argv[idx] = entry->str; + argv_len[idx] = entry->len; + } + int success = 0; + void *_reply1 = NULL, *_reply2 = NULL; + redisReply *r1 = NULL, *r2 = NULL; + redisAppendCommandArgv(n1->context,argc, (const char**)argv,argv_len); + success = (redisGetReply(n1->context, &_reply1) == REDIS_OK); + if (!success) goto cleanup; + r1 = (redisReply *) _reply1; + redisAppendCommandArgv(n2->context,argc, (const char**)argv,argv_len); + success = (redisGetReply(n2->context, &_reply2) == REDIS_OK); + if (!success) goto cleanup; + r2 = (redisReply *) _reply2; + success = (r1->type != REDIS_REPLY_ERROR && r2->type != REDIS_REPLY_ERROR); + if (r1->type == REDIS_REPLY_ERROR) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n1, r1->str); + success = 0; + } + if (r2->type == REDIS_REPLY_ERROR) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n2, r2->str); + success = 0; + } + if (!success) goto cleanup; + assert(keys_reply->elements == r1->elements && + keys_reply->elements == r2->elements); + for (i = 0; i < keys_reply->elements; i++) { + char *key = keys_reply->element[i]->str; + char *hash1 = r1->element[i]->str; + char *hash2 = r2->element[i]->str; + /* Ignore keys that don't exist in both nodes. */ + if (strcmp(hash1, hash_zero) == 0 || strcmp(hash2, hash_zero) == 0) + continue; + if (strcmp(hash1, hash2) != 0) listAddNodeTail(diffs, key); + } +cleanup: + if (r1) freeReplyObject(r1); + if (r2) freeReplyObject(r2); + zfree(argv); + zfree(argv_len); + return success; +} + +/* Migrate keys taken from reply->elements. It returns the reply from the + * MIGRATE command, or NULL if something goes wrong. If the argument 'dots' + * is not NULL, a dot will be printed for every migrated key. */ +static redisReply *clusterManagerMigrateKeysInReply(clusterManagerNode *source, + clusterManagerNode *target, + redisReply *reply, + int replace, int timeout, + char *dots) +{ + redisReply *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + int c = (replace ? 8 : 7); + if (config.auth) c += 2; + if (config.user) c += 1; + size_t argc = c + reply->elements; + size_t i, offset = 6; // Keys Offset + argv = zcalloc(argc * sizeof(char *), MALLOC_LOCAL); + argv_len = zcalloc(argc * sizeof(size_t), MALLOC_LOCAL); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, sizeof(portstr), "%d", target->port); + snprintf(timeoutstr, sizeof(timeoutstr), "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + if (replace) { + argv[offset] = "REPLACE"; + argv_len[offset] = 7; + offset++; + } + if (config.auth) { + if (config.user) { + argv[offset] = "AUTH2"; + argv_len[offset] = 5; + offset++; + argv[offset] = config.user; + argv_len[offset] = strlen(config.user); + offset++; + argv[offset] = config.auth; + argv_len[offset] = strlen(config.auth); + offset++; + } else { + argv[offset] = "AUTH"; + argv_len[offset] = 4; + offset++; + argv[offset] = config.auth; + argv_len[offset] = strlen(config.auth); + offset++; + } + } + argv[offset] = "KEYS"; + argv_len[offset] = 4; + offset++; + for (i = 0; i < reply->elements; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + offset; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnewlen(entry->str, entry->len); + argv_len[idx] = entry->len; + if (dots) dots[i] = '.'; + } + if (dots) dots[reply->elements] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + int success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (i = 0; i < reply->elements; i++) sdsfree(argv[i + offset]); + if (!success) goto cleanup; + migrate_reply = (redisReply *) _reply; +cleanup: + zfree(argv); + zfree(argv_len); + return migrate_reply; +} + +/* Migrate all keys in the given slot from source to target.*/ +static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int timeout, + int pipeline, int verbose, + char **err) +{ + int success = 1; + int do_fix = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX; + int do_replace = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_REPLACE; + while (1) { + char *dots = NULL; + redisReply *reply = NULL, *migrate_reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " + "GETKEYSINSLOT %d %d", slot, + pipeline); + success = (reply != NULL); + if (!success) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char), MALLOC_LOCAL); + strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, *err); + } + goto next; + } + assert(reply->type == REDIS_REPLY_ARRAY); + size_t count = reply->elements; + if (count == 0) { + freeReplyObject(reply); + break; + } + if (verbose) dots = zmalloc((count+1) * sizeof(char), MALLOC_LOCAL); + /* Calling MIGRATE command. */ + migrate_reply = clusterManagerMigrateKeysInReply(source, target, + reply, 0, timeout, + dots); + if (migrate_reply == NULL) goto next; + if (migrate_reply->type == REDIS_REPLY_ERROR) { + int is_busy = strstr(migrate_reply->str, "BUSYKEY") != NULL; + int not_served = 0; + if (!is_busy) { + /* Check if the slot is unassigned (not served) in the + * source node's configuration. */ + char *get_owner_err = NULL; + clusterManagerNode *served_by = + clusterManagerGetSlotOwner(source, slot, &get_owner_err); + if (!served_by) { + if (get_owner_err == NULL) not_served = 1; + else { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, + get_owner_err); + zfree(get_owner_err); + } + } + } + /* Try to handle errors. */ + if (is_busy || not_served) { + /* If the key's slot is not served, try to assign slot + * to the target node. */ + if (do_fix && not_served) { + clusterManagerLogWarn("*** Slot was not served, setting " + "owner to node %s:%d.\n", + target->ip, target->port); + clusterManagerSetSlot(source, target, slot, "node", NULL); + } + /* If the key already exists in the target node (BUSYKEY), + * check whether its value is the same in both nodes. + * In case of equal values, retry migration with the + * REPLACE option. + * In case of different values: + * - If the migration is requested by the fix command, stop + * and warn the user. + * - In other cases (ie. reshard), proceed only if the user + * launched the command with the --cluster-replace option.*/ + if (is_busy) { + clusterManagerLogWarn("\n*** Target key exists\n"); + if (!do_replace) { + clusterManagerLogWarn("*** Checking key values on " + "both nodes...\n"); + list *diffs = listCreate(); + success = clusterManagerCompareKeysValues(source, + target, reply, diffs); + if (!success) { + clusterManagerLogErr("*** Value check failed!\n"); + listRelease(diffs); + goto next; + } + if (listLength(diffs) > 0) { + success = 0; + clusterManagerLogErr( + "*** Found %d key(s) in both source node and " + "target node having different values.\n" + " Source node: %s:%d\n" + " Target node: %s:%d\n" + " Keys(s):\n", + listLength(diffs), + source->ip, source->port, + target->ip, target->port); + listIter dli; + listNode *dln; + listRewind(diffs, &dli); + while((dln = listNext(&dli)) != NULL) { + char *k = dln->value; + clusterManagerLogErr(" - %s\n", k); + } + clusterManagerLogErr("Please fix the above key(s) " + "manually and try again " + "or relaunch the command \n" + "with --cluster-replace " + "option to force key " + "overriding.\n"); + listRelease(diffs); + goto next; + } + listRelease(diffs); + } + clusterManagerLogWarn("*** Replacing target keys...\n"); + } + freeReplyObject(migrate_reply); + migrate_reply = clusterManagerMigrateKeysInReply(source, + target, + reply, + is_busy, + timeout, + NULL); + success = (migrate_reply != NULL && + migrate_reply->type != REDIS_REPLY_ERROR); + } else success = 0; + if (!success) { + if (migrate_reply != NULL) { + if (err) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char), MALLOC_LOCAL); + strcpy(*err, migrate_reply->str); + } + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, + migrate_reply->str); + } + goto next; + } + } + if (verbose) { + printf("%s", dots); + fflush(stdout); + } +next: + if (reply != NULL) freeReplyObject(reply); + if (migrate_reply != NULL) freeReplyObject(migrate_reply); + if (dots) zfree(dots); + if (!success) break; + } + return success; +} + +/* Move slots between source and target nodes using MIGRATE. + * + * Options: + * CLUSTER_MANAGER_OPT_VERBOSE -- Print a dot for every moved key. + * CLUSTER_MANAGER_OPT_COLD -- Move keys without opening slots / + * reconfiguring the nodes. + * CLUSTER_MANAGER_OPT_UPDATE -- Update node->slots for source/target nodes. + * CLUSTER_MANAGER_OPT_QUIET -- Don't print info messages. +*/ +int clusterManagerMoveSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int opts, char**err) +{ + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) { + printf("Moving slot %d from %s:%d to %s:%d: ", slot, source->ip, + source->port, target->ip, target->port); + fflush(stdout); + } + if (err != NULL) *err = NULL; + int pipeline = config.cluster_manager_command.pipeline, + timeout = config.cluster_manager_command.timeout, + print_dots = (opts & CLUSTER_MANAGER_OPT_VERBOSE), + option_cold = (opts & CLUSTER_MANAGER_OPT_COLD), + success = 1; + if (!option_cold) { + success = clusterManagerSetSlot(target, source, slot, + "importing", err); + if (!success) return 0; + success = clusterManagerSetSlot(source, target, slot, + "migrating", err); + if (!success) return 0; + } + success = clusterManagerMigrateKeysInSlot(source, target, slot, timeout, + pipeline, print_dots, err); + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) printf("\n"); + if (!success) return 0; + /* Set the new node as the owner of the slot in all the known nodes. */ + if (!option_cold) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER " + "SETSLOT %d %s %s", + slot, "node", + target->name); + success = (r != NULL); + if (!success) return 0; + if (r->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char), MALLOC_LOCAL); + strcpy(*err, r->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, *err); + } + } + freeReplyObject(r); + if (!success) return 0; + } + } + /* Update the node logical config */ + if (opts & CLUSTER_MANAGER_OPT_UPDATE) { + source->slots[slot] = 0; + target->slots[slot] = 1; + } + return 1; +} + +/* Flush the dirty node configuration by calling replicate for slaves or + * adding the slots defined in the masters. */ +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { + if (!node->dirty) return 0; + redisReply *reply = NULL; + int is_err = 0, success = 1; + if (err != NULL) *err = NULL; + if (node->replicate != NULL) { + reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", + node->replicate); + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char), MALLOC_LOCAL); + strcpy(*err, reply->str); + } + success = 0; + /* If the cluster did not already joined it is possible that + * the slave does not know the master node yet. So on errors + * we return ASAP leaving the dirty flag set, to flush the + * config later. */ + goto cleanup; + } + } else { + int added = clusterManagerAddSlots(node, err); + if (!added || *err != NULL) success = 0; + } + node->dirty = 0; +cleanup: + if (reply != NULL) freeReplyObject(reply); + return success; +} + +/* Load node's cluster configuration by calling "CLUSTER NODES" command. + * Node's configuration (name, replicate, slots, ...) is then updated. + * If CLUSTER_MANAGER_OPT_GETFRIENDS flag is set into 'opts' argument, + * and node already knows other nodes, the node's friends list is populated + * with the other nodes info. */ +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err) +{ + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + int success = 1; + *err = NULL; + if (!clusterManagerCheckRedisReply(node, reply, err)) { + success = 0; + goto cleanup; + } + int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); + char *lines = reply->str, *p, *line; + while ((p = strstr(lines, "\n")) != NULL) { + *p = '\0'; + line = lines; + lines = p + 1; + char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, + *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, + *link_status = NULL; + UNUSED(link_status); + int i = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + switch(i++){ + case 0: name = token; break; + case 1: addr = token; break; + case 2: flags = token; break; + case 3: master_id = token; break; + case 4: ping_sent = token; break; + case 5: ping_recv = token; break; + case 6: config_epoch = token; break; + case 7: link_status = token; break; + } + if (i == 8) break; // Slots + } + if (!flags) { + success = 0; + goto cleanup; + } + int myself = (strstr(flags, "myself") != NULL); + clusterManagerNode *currentNode = NULL; + if (myself) { + node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; + currentNode = node; + clusterManagerNodeResetSlots(node); + if (i == 8) { + int remaining = strlen(line); + while (remaining > 0) { + p = strchr(line, ' '); + if (p == NULL) p = line + remaining; + remaining -= (p - line); + + char *slotsdef = line; + *p = '\0'; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; + char *dash = NULL; + if (slotsdef[0] == '[') { + slotsdef++; + if ((p = strstr(slotsdef, "->-"))) { // Migrating + *p = '\0'; + p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; + sds slot = sdsnew(slotsdef); + sds dst = sdsnew(p); + node->migrating_count += 2; + node->migrating = zrealloc(node->migrating, + (node->migrating_count * sizeof(sds)), MALLOC_LOCAL); + node->migrating[node->migrating_count - 2] = + slot; + node->migrating[node->migrating_count - 1] = + dst; + } else if ((p = strstr(slotsdef, "-<-"))) {//Importing + *p = '\0'; + p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; + sds slot = sdsnew(slotsdef); + sds src = sdsnew(p); + node->importing_count += 2; + node->importing = zrealloc(node->importing, + (node->importing_count * sizeof(sds)), MALLOC_LOCAL); + node->importing[node->importing_count - 2] = + slot; + node->importing[node->importing_count - 1] = + src; + } + } else if ((dash = strchr(slotsdef, '-')) != NULL) { + p = dash; + int start, stop; + *p = '\0'; + start = atoi(slotsdef); + stop = atoi(p + 1); + node->slots_count += (stop - (start - 1)); + while (start <= stop) node->slots[start++] = 1; + } else if (p > slotsdef) { + node->slots[atoi(slotsdef)] = 1; + node->slots_count++; + } + } + } + node->dirty = 0; + } else if (!getfriends) { + if (!(node->flags & CLUSTER_MANAGER_FLAG_MYSELF)) continue; + else break; + } else { + if (addr == NULL) { + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + success = 0; + goto cleanup; + } + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + success = 0; + goto cleanup; + } + *c = '\0'; + int port = atoi(++c); + currentNode = clusterManagerNewNode(sdsnew(addr), port); + currentNode->flags |= CLUSTER_MANAGER_FLAG_FRIEND; + if (node->friends == NULL) node->friends = listCreate(); + listAddNodeTail(node->friends, currentNode); + } + if (name != NULL) { + if (currentNode->name) sdsfree(currentNode->name); + currentNode->name = sdsnew(name); + } + if (currentNode->flags_str != NULL) + freeClusterManagerNodeFlags(currentNode->flags_str); + currentNode->flags_str = listCreate(); + int flag_len; + while ((flag_len = strlen(flags)) > 0) { + sds flag = NULL; + char *fp = strchr(flags, ','); + if (fp) { + *fp = '\0'; + flag = sdsnew(flags); + flags = fp + 1; + } else { + flag = sdsnew(flags); + flags += flag_len; + } + if (strcmp(flag, "noaddr") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + else if (strcmp(flag, "disconnected") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + else if (strcmp(flag, "fail") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; + else if (strcmp(flag, "slave") == 0) { + currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; + if (master_id != NULL) { + if (currentNode->replicate) sdsfree(currentNode->replicate); + currentNode->replicate = sdsnew(master_id); + } + } + listAddNodeTail(currentNode->flags_str, flag); + } + if (config_epoch != NULL) + currentNode->current_epoch = atoll(config_epoch); + if (ping_sent != NULL) currentNode->ping_sent = atoll(ping_sent); + if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); + if (!getfriends && myself) break; + } +cleanup: + if (reply) freeReplyObject(reply); + return success; +} + +/* Retrieves info about the cluster using argument 'node' as the starting + * point. All nodes will be loaded inside the cluster_manager.nodes list. + * Warning: if something goes wrong, it will free the starting node before + * returning 0. */ +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { + if (node->context == NULL && !clusterManagerNodeConnect(node)) { + freeClusterManagerNode(node); + return 0; + } + opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; + char *e = NULL; + if (!clusterManagerNodeIsCluster(node, &e)) { + clusterManagerPrintNotClusterNodeError(node, e); + if (e) zfree(e); + freeClusterManagerNode(node); + return 0; + } + e = NULL; + if (!clusterManagerNodeLoadInfo(node, opts, &e)) { + if (e) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, e); + zfree(e); + } + freeClusterManagerNode(node); + return 0; + } + listIter li; + listNode *ln; + if (cluster_manager.nodes != NULL) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) + freeClusterManagerNode((clusterManagerNode *) ln->value); + listRelease(cluster_manager.nodes); + } + cluster_manager.nodes = listCreate(); + listAddNodeTail(cluster_manager.nodes, node); + if (node->friends != NULL) { + listRewind(node->friends, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *friend = ln->value; + if (!friend->ip || !friend->port) goto invalid_friend; + if (!friend->context && !clusterManagerNodeConnect(friend)) + goto invalid_friend; + e = NULL; + if (clusterManagerNodeLoadInfo(friend, 0, &e)) { + if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | + CLUSTER_MANAGER_FLAG_DISCONNECT | + CLUSTER_MANAGER_FLAG_FAIL)) + { + goto invalid_friend; + } + listAddNodeTail(cluster_manager.nodes, friend); + } else { + clusterManagerLogErr("[ERR] Unable to load info for " + "node %s:%d\n", + friend->ip, friend->port); + goto invalid_friend; + } + continue; +invalid_friend: + if (!(friend->flags & CLUSTER_MANAGER_FLAG_SLAVE)) + cluster_manager.unreachable_masters++; + freeClusterManagerNode(friend); + } + listRelease(node->friends); + node->friends = NULL; + } + // Count replicas for each node + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate != NULL) { + clusterManagerNode *master = clusterManagerNodeByName(n->replicate); + if (master == NULL) { + clusterManagerLogWarn("*** WARNING: %s:%d claims to be " + "slave of unknown node ID %s.\n", + n->ip, n->port, n->replicate); + } else master->replicas_count++; + } + } + return 1; +} + +/* Compare functions used by various sorting operations. */ +int clusterManagerSlotCompare(const void *slot1, const void *slot2) { + const char **i1 = (const char **)slot1; + const char **i2 = (const char **)slot2; + return strcmp(*i1, *i2); +} + +int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node2->slots_count - node1->slots_count; +} + +int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node1->balance - node2->balance; +} + +static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { + sds signature = NULL; + int node_count = 0, i = 0, name_len = 0; + char **node_configs = NULL; + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + if (reply == NULL || reply->type == REDIS_REPLY_ERROR) + goto cleanup; + char *lines = reply->str, *p, *line; + while ((p = strstr(lines, "\n")) != NULL) { + i = 0; + *p = '\0'; + line = lines; + lines = p + 1; + char *nodename = NULL; + int tot_size = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + if (i == 0) { + nodename = token; + tot_size = (p - token); + name_len = tot_size++; // Make room for ':' in tot_size + } + if (++i == 8) break; + } + if (i != 8) continue; + if (nodename == NULL) continue; + int remaining = strlen(line); + if (remaining == 0) continue; + char **slots = NULL; + int c = 0; + while (remaining > 0) { + p = strchr(line, ' '); + if (p == NULL) p = line + remaining; + int size = (p - line); + remaining -= size; + tot_size += size; + char *slotsdef = line; + *p = '\0'; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; + if (slotsdef[0] != '[') { + c++; + slots = zrealloc(slots, (c * sizeof(char *)), MALLOC_LOCAL); + slots[c - 1] = slotsdef; + } + } + if (c > 0) { + if (c > 1) + qsort(slots, c, sizeof(char *), clusterManagerSlotCompare); + node_count++; + node_configs = + zrealloc(node_configs, (node_count * sizeof(char *)), MALLOC_LOCAL); + /* Make room for '|' separators. */ + tot_size += (sizeof(char) * (c - 1)); + char *cfg = zmalloc((sizeof(char) * tot_size) + 1, MALLOC_LOCAL); + memcpy(cfg, nodename, name_len); + char *sp = cfg + name_len; + *(sp++) = ':'; + for (i = 0; i < c; i++) { + if (i > 0) *(sp++) = ','; + int slen = strlen(slots[i]); + memcpy(sp, slots[i], slen); + sp += slen; + } + *(sp++) = '\0'; + node_configs[node_count - 1] = cfg; + } + zfree(slots); + } + if (node_count > 0) { + if (node_count > 1) { + qsort(node_configs, node_count, sizeof(char *), + clusterManagerSlotCompare); + } + signature = sdsempty(); + for (i = 0; i < node_count; i++) { + if (i > 0) signature = sdscatprintf(signature, "%c", '|'); + signature = sdscatfmt(signature, "%s", node_configs[i]); + } + } +cleanup: + if (reply != NULL) freeReplyObject(reply); + if (node_configs != NULL) { + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + } + return signature; +} + +int clusterManagerIsConfigConsistent(int fLog) { + if (cluster_manager.nodes == NULL) return 0; + int consistent = (listLength(cluster_manager.nodes) <= 1); + // If the Cluster has only one node, it's always consistent + if (consistent) return 1; + sds first_cfg = NULL; + const char *firstNode = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds cfg = clusterManagerGetConfigSignature(node); + if (cfg == NULL) { + consistent = 0; + break; + } + if (first_cfg == NULL) { + first_cfg = cfg; + firstNode = node->name; + } else { + consistent = !sdscmp(first_cfg, cfg); + sdsfree(cfg); + if (fLog && !consistent) + clusterManagerLogInfo("\tNode %s (%s:%d) is inconsistent with %s\n", node->name, node->ip, node->port, firstNode); + if (!consistent) break; + } + } + if (first_cfg != NULL) sdsfree(first_cfg); + return consistent; +} + +/* Add the error string to cluster_manager.errors and print it. */ +void clusterManagerOnError(sds err) { + if (cluster_manager.errors == NULL) + cluster_manager.errors = listCreate(); + listAddNodeTail(cluster_manager.errors, err); + clusterManagerLogErr("%s\n", (char *) err); +} + +/* Check the slots coverage of the cluster. The 'all_slots' argument must be + * and array of 16384 bytes. Every covered slot will be set to 1 in the + * 'all_slots' array. The function returns the total number if covered slots.*/ +int clusterManagerGetCoveredSlots(char *all_slots) { + if (cluster_manager.nodes == NULL) return 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + int totslots = 0, i; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + if (node->slots[i] && !all_slots[i]) { + all_slots[i] = 1; + totslots++; + } + } + } + return totslots; +} + +void clusterManagerPrintSlotsList(list *slots) { + clusterManagerNode n = {0}; + listIter li; + listNode *ln; + listRewind(slots, &li); + while ((ln = listNext(&li)) != NULL) { + int slot = atoi(ln->value); + if (slot >= 0 && slot < CLUSTER_MANAGER_SLOTS) + n.slots[slot] = 1; + } + sds nodeslist = clusterManagerNodeSlotsString(&n); + printf("%s\n", nodeslist); + sdsfree(nodeslist); +} + +/* Return the node, among 'nodes' with the greatest number of keys + * in the specified slot. */ +clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, + int slot, + char **err) +{ + clusterManagerNode *node = NULL; + int numkeys = 0; + listIter li; + listNode *ln; + listRewind(nodes, &li); + if (err) *err = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *r = + CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOT %d", slot); + int success = clusterManagerCheckRedisReply(n, r, err); + if (success) { + if (r->integer > numkeys || node == NULL) { + numkeys = r->integer; + node = n; + } + } + if (r != NULL) freeReplyObject(r); + /* If the reply contains errors */ + if (!success) { + if (err != NULL && *err != NULL) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + node = NULL; + break; + } + } + return node; +} + +/* This function returns the master that has the least number of replicas + * in the cluster. If there are multiple masters with the same smaller + * number of replicas, one at random is returned. */ + +static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { + clusterManagerNode *node = NULL; + int lowest_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (node == NULL || n->replicas_count < lowest_count) { + node = n; + lowest_count = n->replicas_count; + } + } + return node; +} + +/* Slot 'slot' was found to be in importing or migrating state in one or + * more nodes. This function fixes this condition by migrating keys where + * it seems more sensible. */ +int clusterManagerFixOpenSlot(int slot) { + int force_fix = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS; + + if (cluster_manager.unreachable_masters > 0 && !force_fix) { + clusterManagerLogWarn("*** Fixing open slots with %d unreachable masters is dangerous: futriix-cliwill assume that slots about masters that are not reachable are not covered, and will try to reassign them to the reachable nodes. This can cause data loss and is rarely what you want to do. If you really want to proceed use the --cluster-fix-with-unreachable-masters option.\n", cluster_manager.unreachable_masters); + exit(1); + } + + clusterManagerLogInfo(">>> Fixing open slot %d\n", slot); + /* Try to obtain the current slot owner, according to the current + * nodes configuration. */ + int success = 1; + list *owners = listCreate(); /* List of nodes claiming some ownership. + it could be stating in the configuration + to have the node ownership, or just + holding keys for such slot. */ + list *migrating = listCreate(); + list *importing = listCreate(); + sds migrating_str = sdsempty(); + sds importing_str = sdsempty(); + clusterManagerNode *owner = NULL; /* The obvious slot owner if any. */ + + /* Iterate all the nodes, looking for potential owners of this slot. */ + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots[slot]) { + listAddNodeTail(owners, n); + } else { + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER COUNTKEYSINSLOT %d", slot); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (success && r->integer > 0) { + clusterManagerLogWarn("*** Found keys about slot %d " + "in non-owner node %s:%d!\n", slot, + n->ip, n->port); + listAddNodeTail(owners, n); + } + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } + } + + /* If we have only a single potential owner for this slot, + * set it as "owner". */ + if (listLength(owners) == 1) owner = listFirst(owners)->value; + + /* Scan the list of nodes again, in order to populate the + * list of nodes in importing or migrating state for + * this slot. */ + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + int is_migrating = 0, is_importing = 0; + if (n->migrating) { + for (int i = 0; i < n->migrating_count; i += 2) { + sds migrating_slot = n->migrating[i]; + if (atoi(migrating_slot) == slot) { + char *sep = (listLength(migrating) == 0 ? "" : ","); + migrating_str = sdscatfmt(migrating_str, "%s%s:%u", + sep, n->ip, n->port); + listAddNodeTail(migrating, n); + is_migrating = 1; + break; + } + } + } + if (!is_migrating && n->importing) { + for (int i = 0; i < n->importing_count; i += 2) { + sds importing_slot = n->importing[i]; + if (atoi(importing_slot) == slot) { + char *sep = (listLength(importing) == 0 ? "" : ","); + importing_str = sdscatfmt(importing_str, "%s%s:%u", + sep, n->ip, n->port); + listAddNodeTail(importing, n); + is_importing = 1; + break; + } + } + } + + /* If the node is neither migrating nor importing and it's not + * the owner, then is added to the importing list in case + * it has keys in the slot. */ + if (!is_migrating && !is_importing && n != owner) { + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER COUNTKEYSINSLOT %d", slot); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (success && r->integer > 0) { + clusterManagerLogWarn("*** Found keys about slot %d " + "in node %s:%d!\n", slot, n->ip, + n->port); + char *sep = (listLength(importing) == 0 ? "" : ","); + importing_str = sdscatfmt(importing_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(importing, n); + } + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } + } + if (sdslen(migrating_str) > 0) + printf("Set as migrating in: %s\n", migrating_str); + if (sdslen(importing_str) > 0) + printf("Set as importing in: %s\n", importing_str); + + /* If there is no slot owner, set as owner the node with the biggest + * number of keys, among the set of migrating / importing nodes. */ + if (owner == NULL) { + clusterManagerLogInfo(">>> No single clear owner for the slot, " + "selecting an owner by # of keys...\n"); + owner = clusterManagerGetNodeWithMostKeysInSlot(cluster_manager.nodes, + slot, NULL); + // If we still don't have an owner, we can't fix it. + if (owner == NULL) { + clusterManagerLogErr("[ERR] Can't select a slot owner. " + "Impossible to fix.\n"); + success = 0; + goto cleanup; + } + + // Use ADDSLOTS to assign the slot. + clusterManagerLogWarn("*** Configuring %s:%d as the slot owner\n", + owner->ip, owner->port); + success = clusterManagerClearSlotStatus(owner, slot); + if (!success) goto cleanup; + success = clusterManagerSetSlotOwner(owner, slot, 0); + if (!success) goto cleanup; + /* Since CLUSTER ADDSLOTS succeeded, we also update the slot + * info into the node struct, in order to keep it synced */ + owner->slots[slot] = 1; + /* Make sure this information will propagate. Not strictly needed + * since there is no past owner, so all the other nodes will accept + * whatever epoch this node will claim the slot with. */ + success = clusterManagerBumpEpoch(owner); + if (!success) goto cleanup; + /* Remove the owner from the list of migrating/importing + * nodes. */ + clusterManagerRemoveNodeFromList(migrating, owner); + clusterManagerRemoveNodeFromList(importing, owner); + } + + /* If there are multiple owners of the slot, we need to fix it + * so that a single node is the owner and all the other nodes + * are in importing state. Later the fix can be handled by one + * of the base cases above. + * + * Note that this case also covers multiple nodes having the slot + * in migrating state, since migrating is a valid state only for + * slot owners. */ + if (listLength(owners) > 1) { + /* Owner cannot be NULL at this point, since if there are more owners, + * the owner has been set in the previous condition (owner == NULL). */ + assert(owner != NULL); + listRewind(owners, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + success = clusterManagerDelSlot(n, slot, 1); + if (!success) goto cleanup; + n->slots[slot] = 0; + /* Assign the slot to the owner in the node 'n' configuration.' */ + success = clusterManagerSetSlot(n, owner, slot, "node", NULL); + if (!success) goto cleanup; + success = clusterManagerSetSlot(n, owner, slot, "importing", NULL); + if (!success) goto cleanup; + /* Avoid duplicates. */ + clusterManagerRemoveNodeFromList(importing, n); + listAddNodeTail(importing, n); + /* Ensure that the node is not in the migrating list. */ + clusterManagerRemoveNodeFromList(migrating, n); + } + } + int move_opts = CLUSTER_MANAGER_OPT_VERBOSE; + + /* Case 1: The slot is in migrating state in one node, and in + * importing state in 1 node. That's trivial to address. */ + if (listLength(migrating) == 1 && listLength(importing) == 1) { + clusterManagerNode *src = listFirst(migrating)->value; + clusterManagerNode *dst = listFirst(importing)->value; + clusterManagerLogInfo(">>> Case 1: Moving slot %d from " + "%s:%d to %s:%d\n", slot, + src->ip, src->port, dst->ip, dst->port); + move_opts |= CLUSTER_MANAGER_OPT_UPDATE; + success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL); + } + + /* Case 2: There are multiple nodes that claim the slot as importing, + * they probably got keys about the slot after a restart so opened + * the slot. In this case we just move all the keys to the owner + * according to the configuration. */ + else if (listLength(migrating) == 0 && listLength(importing) > 0) { + clusterManagerLogInfo(">>> Case 2: Moving all the %d slot keys to its " + "owner %s:%d\n", slot, owner->ip, owner->port); + move_opts |= CLUSTER_MANAGER_OPT_COLD; + listRewind(importing, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + success = clusterManagerMoveSlot(n, owner, slot, move_opts, NULL); + if (!success) goto cleanup; + clusterManagerLogInfo(">>> Setting %d as STABLE in " + "%s:%d\n", slot, n->ip, n->port); + success = clusterManagerClearSlotStatus(n, slot); + if (!success) goto cleanup; + } + /* Since the slot has been moved in "cold" mode, ensure that all the + * other nodes update their own configuration about the slot itself. */ + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + success = clusterManagerSetSlot(n, owner, slot, "NODE", NULL); + if (!success) goto cleanup; + } + } + + /* Case 3: The slot is in migrating state in one node but multiple + * other nodes claim to be in importing state and don't have any key in + * the slot. We search for the importing node having the same ID as + * the destination node of the migrating node. + * In that case we move the slot from the migrating node to this node and + * we close the importing states on all the other importing nodes. + * If no importing node has the same ID as the destination node of the + * migrating node, the slot's state is closed on both the migrating node + * and the importing nodes. */ + else if (listLength(migrating) == 1 && listLength(importing) > 1) { + int try_to_fix = 1; + clusterManagerNode *src = listFirst(migrating)->value; + clusterManagerNode *dst = NULL; + sds target_id = NULL; + for (int i = 0; i < src->migrating_count; i += 2) { + sds migrating_slot = src->migrating[i]; + if (atoi(migrating_slot) == slot) { + target_id = src->migrating[i + 1]; + break; + } + } + assert(target_id != NULL); + listIter li; + listNode *ln; + listRewind(importing, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + int count = clusterManagerCountKeysInSlot(n, slot); + if (count > 0) { + try_to_fix = 0; + break; + } + if (strcmp(n->name, target_id) == 0) dst = n; + } + if (!try_to_fix) goto unhandled_case; + if (dst != NULL) { + clusterManagerLogInfo(">>> Case 3: Moving slot %d from %s:%d to " + "%s:%d and closing it on all the other " + "importing nodes.\n", + slot, src->ip, src->port, + dst->ip, dst->port); + /* Move the slot to the destination node. */ + success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL); + if (!success) goto cleanup; + /* Close slot on all the other importing nodes. */ + listRewind(importing, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (dst == n) continue; + success = clusterManagerClearSlotStatus(n, slot); + if (!success) goto cleanup; + } + } else { + clusterManagerLogInfo(">>> Case 3: Closing slot %d on both " + "migrating and importing nodes.\n", slot); + /* Close the slot on both the migrating node and the importing + * nodes. */ + success = clusterManagerClearSlotStatus(src, slot); + if (!success) goto cleanup; + listRewind(importing, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + success = clusterManagerClearSlotStatus(n, slot); + if (!success) goto cleanup; + } + } + } else { + int try_to_close_slot = (listLength(importing) == 0 && + listLength(migrating) == 1); + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + if (!owner || owner != n) { + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", slot, 10); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) { + if (success) try_to_close_slot = (r->elements == 0); + freeReplyObject(r); + } + if (!success) goto cleanup; + } + } + /* Case 4: There are no slots claiming to be in importing state, but + * there is a migrating node that actually don't have any key or is the + * slot owner. We can just close the slot, probably a reshard + * interrupted in the middle. */ + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + clusterManagerLogInfo(">>> Case 4: Closing slot %d on %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } else { +unhandled_case: + success = 0; + clusterManagerLogErr("[ERR] Sorry, futriix-clican't fix this slot " + "yet (work in progress). Slot is set as " + "migrating in %s, as importing in %s, " + "owner is %s:%d\n", migrating_str, + importing_str, owner->ip, owner->port); + } + } +cleanup: + listRelease(owners); + listRelease(migrating); + listRelease(importing); + sdsfree(migrating_str); + sdsfree(importing_str); + return success; +} + +int clusterManagerFixMultipleSlotOwners(int slot, list *owners) { + clusterManagerLogInfo(">>> Fixing multiple owners for slot %d...\n", slot); + int success = 0; + assert(listLength(owners) > 1); + clusterManagerNode *owner = clusterManagerGetNodeWithMostKeysInSlot(owners, + slot, + NULL); + if (!owner) owner = listFirst(owners)->value; + clusterManagerLogInfo(">>> Setting slot %d owner: %s:%d\n", + slot, owner->ip, owner->port); + /* Set the slot owner. */ + if (!clusterManagerSetSlotOwner(owner, slot, 0)) return 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + /* Update configuration in all the other master nodes by assigning the slot + * itself to the new owner, and by eventually migrating keys if the node + * has keys for the slot. */ + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + int count = clusterManagerCountKeysInSlot(n, slot); + success = (count >= 0); + if (!success) break; + clusterManagerDelSlot(n, slot, 1); + if (!clusterManagerSetSlot(n, owner, slot, "node", NULL)) return 0; + if (count > 0) { + int opts = CLUSTER_MANAGER_OPT_VERBOSE | + CLUSTER_MANAGER_OPT_COLD; + success = clusterManagerMoveSlot(n, owner, slot, opts, NULL); + if (!success) break; + } + } + return success; +} + +static clusterManagerNode *clusterNodeForResharding(char *id, + clusterManagerNode *target, + int *raise_err) +{ + clusterManagerNode *node = NULL; + const char *invalid_node_msg = "*** The specified node (%s) is not known " + "or not a master, please retry.\n"; + node = clusterManagerNodeByName(id); + *raise_err = 0; + if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { + clusterManagerLogErr(invalid_node_msg, id); + *raise_err = 1; + return NULL; + } else if (target != NULL) { + if (!strcmp(node->name, target->name)) { + clusterManagerLogErr( "*** It is not possible to use " + "the target node as " + "source node.\n"); + return NULL; + } + } + return node; +} + +static list *clusterManagerComputeReshardTable(list *sources, int numslots) { + list *moved = listCreate(); + int src_count = listLength(sources), i = 0, tot_slots = 0, j; + clusterManagerNode **sorted = zmalloc(src_count * sizeof(*sorted), MALLOC_LOCAL); + listIter li; + listNode *ln; + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + tot_slots += node->slots_count; + sorted[i++] = node; + } + qsort(sorted, src_count, sizeof(clusterManagerNode *), + clusterManagerSlotCountCompareDesc); + for (i = 0; i < src_count; i++) { + clusterManagerNode *node = sorted[i]; + float n = ((float) numslots / tot_slots * node->slots_count); + if (i == 0) n = ceil(n); + else n = floor(n); + int max = (int) n, count = 0; + for (j = 0; j < CLUSTER_MANAGER_SLOTS; j++) { + int slot = node->slots[j]; + if (!slot) continue; + if (count >= max || (int)listLength(moved) >= numslots) break; + clusterManagerReshardTableItem *item = zmalloc(sizeof(*item), MALLOC_LOCAL); + item->source = node; + item->slot = j; + listAddNodeTail(moved, item); + count++; + } + } + zfree(sorted); + return moved; +} + +static void clusterManagerShowReshardTable(list *table) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + clusterManagerNode *n = item->source; + printf(" Moving slot %d from %s\n", item->slot, (char *) n->name); + } +} + +static void clusterManagerReleaseReshardTable(list *table) { + if (table != NULL) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } +} + +void clusterManagerLog(int level, const char* fmt, ...) { + int use_colors = + (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); + if (use_colors) { + printf("\033["); + switch (level) { + case CLUSTER_MANAGER_LOG_LVL_INFO: printf(LOG_COLOR_BOLD); break; + case CLUSTER_MANAGER_LOG_LVL_WARN: printf(LOG_COLOR_YELLOW); break; + case CLUSTER_MANAGER_LOG_LVL_ERR: printf(LOG_COLOR_RED); break; + case CLUSTER_MANAGER_LOG_LVL_SUCCESS: printf(LOG_COLOR_GREEN); break; + default: printf(LOG_COLOR_RESET); break; + } + } + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + if (use_colors) printf("\033[" LOG_COLOR_RESET); +} + +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int alloc_len) +{ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*), MALLOC_LOCAL); + array->alloc = array->nodes; + array->len = alloc_len; + array->count = 0; +} + +/* Reset array->nodes to the original array allocation and re-count non-NULL + * nodes. */ +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array) { + if (array->nodes > array->alloc) { + array->len = array->nodes - array->alloc; + array->nodes = array->alloc; + array->count = 0; + int i = 0; + for(; i < array->len; i++) { + if (array->nodes[i] != NULL) array->count++; + } + } +} + +/* Shift array->nodes and store the shifted node into 'nodeptr'. */ +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr) +{ + assert(array->len > 0); + /* If the first node to be shifted is not NULL, decrement count. */ + if (*array->nodes != NULL) array->count--; + /* Store the first node to be shifted into 'nodeptr'. */ + *nodeptr = *array->nodes; + /* Shift the nodes array and decrement length. */ + array->nodes++; + array->len--; +} + +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node) +{ + assert(array->len > 0); + assert(node != NULL); + assert(array->count < array->len); + array->nodes[array->count++] = node; +} + +static void clusterManagerPrintNotEmptyNodeError(clusterManagerNode *node, + char *err) +{ + char *msg; + if (err) msg = err; + else { + msg = "is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err) +{ + char *msg = (err ? err : "is not configured as a cluster node."); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + +/* Execute futriix-cliin Cluster Manager mode */ +static void clusterManagerMode(clusterManagerCommandProc *proc) { + int argc = config.cluster_manager_command.argc; + char **argv = config.cluster_manager_command.argv; + cluster_manager.nodes = NULL; + if (!proc(argc, argv)) goto cluster_manager_err; + freeClusterManager(); + exit(0); +cluster_manager_err: + freeClusterManager(); + exit(1); +} + +/* Cluster Manager Commands */ + +static int clusterManagerCommandCreate(int argc, char **argv) { + int i, j, success = 1; + cluster_manager.nodes = listCreate(); + for (i = 0; i < argc; i++) { + char *addr = argv[i]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Invalid address format: %s\n", addr); + return 0; + } + *c = '\0'; + char *ip = addr; + int port = atoi(++c); + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerNodeConnect(node)) { + freeClusterManagerNode(node); + return 0; + } + char *err = NULL; + if (!clusterManagerNodeIsCluster(node, &err)) { + clusterManagerPrintNotClusterNodeError(node, err); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeLoadInfo(node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeIsEmpty(node, &err)) { + clusterManagerPrintNotEmptyNodeError(node, err); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + listAddNodeTail(cluster_manager.nodes, node); + } + int node_len = cluster_manager.nodes->len; + int replicas = config.cluster_manager_command.replicas; + int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); + if (masters_count < 3) { + clusterManagerLogErr( + "*** ERROR: Invalid configuration for cluster creation.\n" + "*** KeyDB Cluster requires at least 3 master nodes.\n" + "*** This is not possible with %d nodes and %d replicas per node.", + node_len, replicas); + clusterManagerLogErr("\n*** At least %d nodes are required.\n", + 3 * (replicas + 1)); + return 0; + } + clusterManagerLogInfo(">>> Performing hash slots allocation " + "on %d nodes...\n", node_len); + int interleaved_len = 0, ip_count = 0; + clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved), MALLOC_LOCAL); + char **ips = zcalloc(node_len * sizeof(char*), MALLOC_LOCAL); + clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes), MALLOC_LOCAL); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + int found = 0; + for (i = 0; i < ip_count; i++) { + char *ip = ips[i]; + if (!strcmp(ip, n->ip)) { + found = 1; + break; + } + } + if (!found) { + ips[ip_count++] = n->ip; + } + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->nodes == NULL) + clusterManagerNodeArrayInit(node_array, node_len); + clusterManagerNodeArrayAdd(node_array, n); + } + while (interleaved_len < node_len) { + for (i = 0; i < ip_count; i++) { + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->count > 0) { + clusterManagerNode *n = NULL; + clusterManagerNodeArrayShift(node_array, &n); + interleaved[interleaved_len++] = n; + } + } + } + clusterManagerNode **masters = interleaved; + interleaved += masters_count; + interleaved_len -= masters_count; + float slots_per_node = CLUSTER_MANAGER_SLOTS / (float) masters_count; + long first = 0; + float cursor = 0.0f; + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + long last = lround(cursor + slots_per_node - 1); + if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1)) + last = CLUSTER_MANAGER_SLOTS - 1; + if (last < first) last = first; + printf("Master[%d] -> Slots %ld - %ld\n", i, first, last); + master->slots_count = 0; + for (j = first; j <= last; j++) { + master->slots[j] = 1; + master->slots_count++; + } + master->dirty = 1; + first = last + 1; + cursor += slots_per_node; + } + + /* Rotating the list sometimes helps to get better initial + * anti-affinity before the optimizer runs. */ + clusterManagerNode *first_node = interleaved[0]; + for (i = 0; i < (interleaved_len - 1); i++) + interleaved[i] = interleaved[i + 1]; + interleaved[interleaved_len - 1] = first_node; + int assign_unused = 0, available_count = interleaved_len; +assign_replicas: + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + int assigned_replicas = 0; + while (assigned_replicas < replicas) { + if (available_count == 0) break; + clusterManagerNode *found = NULL, *slave = NULL; + int firstNodeIdx = -1; + for (j = 0; j < interleaved_len; j++) { + clusterManagerNode *n = interleaved[j]; + if (n == NULL) continue; + if (strcmp(n->ip, master->ip)) { + found = n; + interleaved[j] = NULL; + break; + } + if (firstNodeIdx < 0) firstNodeIdx = j; + } + if (found) slave = found; + else if (firstNodeIdx >= 0) { + slave = interleaved[firstNodeIdx]; + interleaved_len -= (interleaved - (interleaved + firstNodeIdx)); + interleaved += (firstNodeIdx + 1); + } + if (slave != NULL) { + assigned_replicas++; + available_count--; + if (slave->replicate) sdsfree(slave->replicate); + slave->replicate = sdsnew(master->name); + slave->dirty = 1; + } else break; + printf("Adding replica %s:%d to %s:%d\n", slave->ip, slave->port, + master->ip, master->port); + if (assign_unused) break; + } + } + if (!assign_unused && available_count > 0) { + assign_unused = 1; + printf("Adding extra replicas...\n"); + goto assign_replicas; + } + for (i = 0; i < ip_count; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + clusterManagerNodeArrayReset(node_array); + } + clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); + clusterManagerShowNodes(); + int ignore_force = 0; + if (confirmWithYes("Can I set the above configuration?", ignore_force)) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && node->dirty && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + success = 0; + goto cleanup; + } else if (err != NULL) zfree(err); + } + clusterManagerLogInfo(">>> Nodes configuration updated\n"); + clusterManagerLogInfo(">>> Assign a different config epoch to " + "each node\n"); + int config_epoch = 1; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, + "cluster set-config-epoch %d", + config_epoch++); + if (reply != NULL) freeReplyObject(reply); + } + clusterManagerLogInfo(">>> Sending CLUSTER MEET messages to join " + "the cluster\n"); + clusterManagerNode *first = NULL; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (first == NULL) { + first = node; + continue; + } + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", + first->ip, first->port); + int is_err = 0; + if (reply != NULL) { + if ((is_err = reply->type == REDIS_REPLY_ERROR)) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, reply->str); + freeReplyObject(reply); + } else { + is_err = 1; + fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); + } + if (is_err) { + success = 0; + goto cleanup; + } + } + /* Give one second for the join to start, in order to avoid that + * waiting for cluster join will find all the nodes agree about + * the config as they are still empty with unassigned slots. */ + sleep(1); + clusterManagerWaitForClusterJoin(); + /* Useful for the replicas */ + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!node->dirty) continue; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + success = 0; + goto cleanup; + } + } + // Reset Nodes + listRewind(cluster_manager.nodes, &li); + clusterManagerNode *first_node = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!first_node) first_node = node; + else freeClusterManagerNode(node); + } + listEmpty(cluster_manager.nodes); + if (!clusterManagerLoadInfoFromNode(first_node, 0)) { + success = 0; + goto cleanup; + } + clusterManagerCheckCluster(0); + } +cleanup: + /* Free everything */ + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODE_ARRAY_FREE(node_array); + } + zfree(ip_nodes); + return success; +} + +static int clusterManagerCommandAddNode(int argc, char **argv) { + int success = 1; + redisReply *reply = NULL; + char *ref_ip = NULL, *ip = NULL; + int ref_port = 0, port = 0; + if (!getClusterHostFromCmdArgs(argc - 1, argv + 1, &ref_ip, &ref_port)) + goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) + goto invalid_args; + clusterManagerLogInfo(">>> Adding node %s:%d to cluster %s:%d\n", ip, port, + ref_ip, ref_port); + // Check the existing cluster + clusterManagerNode *refnode = clusterManagerNewNode(ref_ip, ref_port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; + + /* If --cluster-master-id was specified, try to resolve it now so that we + * abort before starting with the node configuration. */ + clusterManagerNode *master_node = NULL; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVE) { + char *master_id = config.cluster_manager_command.master_id; + if (master_id != NULL) { + master_node = clusterManagerNodeByName(master_id); + if (master_node == NULL) { + clusterManagerLogErr("[ERR] No such master ID %s\n", master_id); + return 0; + } + } else { + master_node = clusterManagerNodeWithLeastReplicas(); + assert(master_node != NULL); + printf("Automatically selected master %s:%d\n", master_node->ip, + master_node->port); + } + } + + // Add the new node + clusterManagerNode *new_node = clusterManagerNewNode(ip, port); + int added = 0; + if (!clusterManagerNodeConnect(new_node)) { + clusterManagerLogErr("[ERR] Sorry, can't connect to node %s:%d\n", + ip, port); + success = 0; + goto cleanup; + } + char *err = NULL; + if (!(success = clusterManagerNodeIsCluster(new_node, &err))) { + clusterManagerPrintNotClusterNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + if (!clusterManagerNodeLoadInfo(new_node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(new_node, err); + zfree(err); + } + success = 0; + goto cleanup; + } + if (!(success = clusterManagerNodeIsEmpty(new_node, &err))) { + clusterManagerPrintNotEmptyNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + clusterManagerNode *first = listFirst(cluster_manager.nodes)->value; + listAddNodeTail(cluster_manager.nodes, new_node); + added = 1; + + // Send CLUSTER MEET command to the new node + clusterManagerLogInfo(">>> Send CLUSTER MEET to node %s:%d to make it " + "join the cluster.\n", ip, port); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER MEET %s %d", + first->ip, first->port); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + + /* Additional configuration is needed if the node is added as a slave. */ + if (master_node) { + sleep(1); + clusterManagerWaitForClusterJoin(); + clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n", + master_node->ip, master_node->port); + freeReplyObject(reply); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s", + master_node->name); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + } + clusterManagerLogOk("[OK] New node added correctly.\n"); +cleanup: + if (!added && new_node) freeClusterManagerNode(new_node); + if (reply) freeReplyObject(reply); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandDeleteNode(int argc, char **argv) { + UNUSED(argc); + int success = 1; + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + char *node_id = argv[1]; + clusterManagerLogInfo(">>> Removing node %s from cluster %s:%d\n", + node_id, ip, port); + clusterManagerNode *ref_node = clusterManagerNewNode(ip, port); + clusterManagerNode *node = NULL; + + // Load cluster information + if (!clusterManagerLoadInfoFromNode(ref_node, 0)) return 0; + + // Check if the node exists and is not empty + node = clusterManagerNodeByName(node_id); + if (node == NULL) { + clusterManagerLogErr("[ERR] No such node ID %s\n", node_id); + return 0; + } + if (node->slots_count != 0) { + clusterManagerLogErr("[ERR] Node %s:%d is not empty! Reshard data " + "away and try again.\n", node->ip, node->port); + return 0; + } + + // Send CLUSTER FORGET to all the nodes but the node to remove + clusterManagerLogInfo(">>> Sending CLUSTER FORGET messages to the " + "cluster...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == node) continue; + if (n->replicate && !strcasecmp(n->replicate, node_id)) { + // Reconfigure the slave to replicate with some other node + clusterManagerNode *master = clusterManagerNodeWithLeastReplicas(); + assert(master != NULL); + clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", + n->ip, n->port, master->ip, master->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s", + master->name); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER FORGET %s", + node_id); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + + /* Finally send CLUSTER RESET to the node. */ + clusterManagerLogInfo(">>> Sending CLUSTER RESET SOFT to the " + "deleted node.\n"); + redisReply *r = redisCommand(node->context, "CLUSTER RESET %s", "SOFT"); + success = clusterManagerCheckRedisReply(node, r, NULL); + if (r) freeReplyObject(r); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandInfo(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowClusterInfo(); + return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandCheck(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowClusterInfo(); + return clusterManagerCheckCluster(0); +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandFix(int argc, char **argv) { + config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX; + return clusterManagerCommandCheck(argc, argv); +} + +static int clusterManagerCommandReshard(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerCheckCluster(0); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0 && !config.force_mode) { + fflush(stdout); + fprintf(stderr, + "*** Please fix your cluster problems before resharding\n"); + return 0; + } + int slots = config.cluster_manager_command.slots; + if (!slots) { + while (slots <= 0 || slots > CLUSTER_MANAGER_SLOTS) { + printf("How many slots do you want to move (from 1 to %d)? ", + CLUSTER_MANAGER_SLOTS); + fflush(stdout); + char buf[6]; + int nread = read(fileno(stdin),buf,6); + if (nread <= 0) continue; + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + slots = atoi(buf); + } + } + char buf[255]; + char *to = config.cluster_manager_command.to, + *from = config.cluster_manager_command.from; + while (to == NULL) { + printf("What is the receiving node ID? "); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (nread <= 0) continue; + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (strlen(buf) > 0) to = buf; + } + int raise_err = 0; + clusterManagerNode *target = clusterNodeForResharding(to, NULL, &raise_err); + if (target == NULL) return 0; + list *sources = listCreate(); + list *table = NULL; + int all = 0, result = 1; + if (from == NULL) { + printf("Please enter all the source node IDs.\n"); + printf(" Type 'all' to use all the nodes as source nodes for " + "the hash slots.\n"); + printf(" Type 'done' once you entered all the source nodes IDs.\n"); + while (1) { + printf("Source node #%lu: ", listLength(sources) + 1); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (nread <= 0) continue; + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (!strcmp(buf, "done")) break; + else if (!strcmp(buf, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(buf, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + } else { + char *p; + while((p = strchr(from, ',')) != NULL) { + *p = '\0'; + if (!strcmp(from, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + from = p + 1; + } + /* Check if there's still another source to process. */ + if (!all && strlen(from) > 0) { + if (!strcmp(from, "all")) all = 1; + if (!all) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + } + listIter li; + listNode *ln; + if (all) { + listEmpty(sources); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!sdscmp(n->name, target->name)) continue; + listAddNodeTail(sources, n); + } + } + if (listLength(sources) == 0) { + fprintf(stderr, "*** No source nodes given, operation aborted.\n"); + result = 0; + goto cleanup; + } + printf("\nReady to move %d slots.\n", slots); + printf(" Source nodes:\n"); + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *src = ln->value; + sds info = clusterManagerNodeInfo(src, 4); + printf("%s\n", info); + sdsfree(info); + } + printf(" Destination node:\n"); + sds info = clusterManagerNodeInfo(target, 4); + printf("%s\n", info); + sdsfree(info); + table = clusterManagerComputeReshardTable(sources, slots); + printf(" Resharding plan:\n"); + clusterManagerShowReshardTable(table); + if (!(config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_YES)) + { + printf("Do you want to proceed with the proposed " + "reshard plan (yes/no)? "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread <= 0 || strcmp("yes", buf) != 0) { + result = 0; + goto cleanup; + } + } + int opts = CLUSTER_MANAGER_OPT_VERBOSE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + char *err = NULL; + result = clusterManagerMoveSlot(item->source, target, item->slot, + opts, &err); + if (!result) { + if (err != NULL) { + //clusterManagerLogErr("\n%s\n", err); + zfree(err); + } + goto cleanup; + } + } +cleanup: + listRelease(sources); + clusterManagerReleaseReshardTable(table); + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandRebalance(int argc, char **argv) { + int port = 0; + char *ip = NULL; + clusterManagerNode **weightedNodes = NULL; + list *involved = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int result = 1, i; + if (config.cluster_manager_command.weight != NULL) { + for (i = 0; i < config.cluster_manager_command.weight_argc; i++) { + char *name = config.cluster_manager_command.weight[i]; + char *p = strchr(name, '='); + if (p == NULL) { + result = 0; + goto cleanup; + } + *p = '\0'; + float w = atof(++p); + clusterManagerNode *n = clusterManagerNodeByAbbreviatedName(name); + if (n == NULL) { + clusterManagerLogErr("*** No such master node %s\n", name); + result = 0; + goto cleanup; + } + n->weight = w; + } + } + float total_weight = 0; + int nodes_involved = 0; + int use_empty = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; + involved = listCreate(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + /* Compute the total cluster weight. */ + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!use_empty && n->slots_count == 0) { + n->weight = 0; + continue; + } + total_weight += n->weight; + nodes_involved++; + listAddNodeTail(involved, n); + } + weightedNodes = zmalloc(nodes_involved * sizeof(clusterManagerNode *), MALLOC_LOCAL); + if (weightedNodes == NULL) goto cleanup; + /* Check cluster, only proceed if it looks sane. */ + clusterManagerCheckCluster(1); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0 && !config.force_mode) { + clusterManagerLogErr("*** Please fix your cluster problems " + "before rebalancing\n"); + result = 0; + goto cleanup; + } + /* Calculate the slots balance for each node. It's the number of + * slots the node should lose (if positive) or gain (if negative) + * in order to be balanced. */ + int threshold_reached = 0, total_balance = 0; + float threshold = config.cluster_manager_command.threshold; + i = 0; + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + weightedNodes[i++] = n; + int expected = (int) (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + n->weight); + n->balance = n->slots_count - expected; + total_balance += n->balance; + /* Compute the percentage of difference between the + * expected number of slots and the real one, to see + * if it's over the threshold specified by the user. */ + int over_threshold = 0; + if (threshold > 0) { + if (n->slots_count > 0) { + float err_perc = fabs((100-(100.0*expected/n->slots_count))); + if (err_perc > threshold) over_threshold = 1; + } else if (expected > 1) { + over_threshold = 1; + } + } + if (over_threshold) threshold_reached = 1; + } + if (!threshold_reached) { + clusterManagerLogWarn("*** No rebalancing needed! " + "All nodes are within the %.2f%% threshold.\n", + config.cluster_manager_command.threshold); + goto cleanup; + } + /* Because of rounding, it is possible that the balance of all nodes + * summed does not give 0. Make sure that nodes that have to provide + * slots are always matched by nodes receiving slots. */ + while (total_balance > 0) { + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->balance <= 0 && total_balance > 0) { + n->balance--; + total_balance--; + } + } + } + /* Sort nodes by their slots balance. */ + qsort(weightedNodes, nodes_involved, sizeof(clusterManagerNode *), + clusterManagerCompareNodeBalance); + clusterManagerLogInfo(">>> Rebalancing across %d nodes. " + "Total weight = %.2f\n", + nodes_involved, total_weight); + if (config.verbose) { + for (i = 0; i < nodes_involved; i++) { + clusterManagerNode *n = weightedNodes[i]; + printf("%s:%d balance is %d slots\n", n->ip, n->port, n->balance); + } + } + /* Now we have at the start of the 'sn' array nodes that should get + * slots, at the end nodes that must give slots. + * We take two indexes, one at the start, and one at the end, + * incrementing or decrementing the indexes accordingly til we + * find nodes that need to get/provide slots. */ + int dst_idx = 0; + int src_idx = nodes_involved - 1; + int simulate = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + while (dst_idx < src_idx) { + clusterManagerNode *dst = weightedNodes[dst_idx]; + clusterManagerNode *src = weightedNodes[src_idx]; + int db = abs(dst->balance); + int sb = abs(src->balance); + int numslots = (db < sb ? db : sb); + if (numslots > 0) { + printf("Moving %d slots from %s:%d to %s:%d\n", numslots, + src->ip, + src->port, + dst->ip, + dst->port); + /* Actually move the slots. */ + list *lsrc = listCreate(), *table = NULL; + listAddNodeTail(lsrc, src); + table = clusterManagerComputeReshardTable(lsrc, numslots); + listRelease(lsrc); + int table_len = 0; + if (!table || (table_len = (int) listLength(table)) != numslots) { + clusterManagerLogErr("*** Assertion failed: Reshard table " + "!= number of slots"); + result = 0; + goto end_move; + } + if (simulate) { + for (i = 0; i < table_len; i++) printf("#"); + } else { + int opts = CLUSTER_MANAGER_OPT_QUIET | + CLUSTER_MANAGER_OPT_UPDATE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + result = clusterManagerMoveSlot(item->source, + dst, + item->slot, + opts, NULL); + if (!result) goto end_move; + printf("#"); + fflush(stdout); + } + + } + printf("\n"); +end_move: + clusterManagerReleaseReshardTable(table); + if (!result) goto cleanup; + } + /* Update nodes balance. */ + dst->balance += numslots; + src->balance -= numslots; + if (dst->balance == 0) dst_idx++; + if (src->balance == 0) src_idx --; + } +cleanup: + if (involved != NULL) listRelease(involved); + if (weightedNodes != NULL) zfree(weightedNodes); + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandSetTimeout(int argc, char **argv) { + UNUSED(argc); + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + int timeout = atoi(argv[1]); + if (timeout < 100) { + fprintf(stderr, "Setting a node timeout of less than 100 " + "milliseconds is a bad idea.\n"); + return 0; + } + // Load cluster information + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int ok_count = 0, err_count = 0; + + clusterManagerLogInfo(">>> Reconfiguring node timeout in every " + "cluster node...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + char *err = NULL; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s %s %d", + "SET", + "cluster-node-timeout", + timeout); + if (reply == NULL) goto reply_err; + int ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s", "REWRITE"); + if (reply == NULL) goto reply_err; + ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + clusterManagerLogWarn("*** New timeout set for %s:%d\n", n->ip, + n->port); + ok_count++; + continue; +reply_err:; + int need_free = 0; + if (err == NULL) err = ""; + else need_free = 1; + clusterManagerLogErr("ERR setting node-timeot for %s:%d: %s\n", n->ip, + n->port, err); + if (need_free) zfree(err); + err_count++; + } + clusterManagerLogInfo(">>> New node timeout set. %d OK, %d ERR.\n", + ok_count, err_count); + return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandImport(int argc, char **argv) { + int success = 1; + int port = 0, src_port = 0; + char *ip = NULL, *src_ip = NULL; + char *invalid_args_msg = NULL; + sds cmdfmt = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) { + invalid_args_msg = CLUSTER_MANAGER_INVALID_HOST_ARG; + goto invalid_args; + } + if (config.cluster_manager_command.from == NULL) { + invalid_args_msg = "[ERR] Option '--cluster-from' is required for " + "subcommand 'import'.\n"; + goto invalid_args; + } + char *src_host[] = {config.cluster_manager_command.from}; + if (!getClusterHostFromCmdArgs(1, src_host, &src_ip, &src_port)) { + invalid_args_msg = "[ERR] Invalid --cluster-from host. You need to " + "pass a valid address (ie. 120.0.0.1:7000).\n"; + goto invalid_args; + } + clusterManagerLogInfo(">>> Importing data from %s:%d to cluster %s:%d\n", + src_ip, src_port, ip, port); + + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; + char *reply_err = NULL; + redisReply *src_reply = NULL; + // Connect to the source node. + struct timeval tv; + tv.tv_sec = config.cluster_manager_command.timeout / 1000; + tv.tv_usec = (config.cluster_manager_command.timeout % 1000) * 1000; + redisContext *src_ctx = redisConnectWithTimeout(src_ip, src_port, tv); + if (src_ctx->err) { + success = 0; + fprintf(stderr,"Could not connect to KeyDB at %s:%d: %s.\n", src_ip, + src_port, src_ctx->errstr); + goto cleanup; + } + // Auth for the source node. + char *from_user = config.cluster_manager_command.from_user; + char *from_pass = config.cluster_manager_command.from_pass; + if (cliAuth(src_ctx, from_user, from_pass) == REDIS_ERR) { + success = 0; + goto cleanup; + } + + src_reply = reconnectingRedisCommand(src_ctx, "INFO"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + if (getLongInfoField(src_reply->str, "cluster_enabled")) { + clusterManagerLogErr("[ERR] The source node should not be a " + "cluster node.\n"); + success = 0; + goto cleanup; + } + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "DBSIZE"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + int size = src_reply->integer, i; + clusterManagerLogWarn("*** Importing %d keys from DB 0\n", size); + + // Build a slot -> node map + clusterManagerNode *slots_map[CLUSTER_MANAGER_SLOTS]; + memset(slots_map, 0, sizeof(slots_map)); + listIter li; + listNode *ln; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots_count == 0) continue; + if (n->slots[i]) { + slots_map[i] = n; + break; + } + } + } + cmdfmt = sdsnew("MIGRATE %s %d %s %d %d"); + if (config.auth) { + if (config.user) { + cmdfmt = sdscatfmt(cmdfmt," AUTH2 %s %s", config.user, config.auth); + } else { + cmdfmt = sdscatfmt(cmdfmt," AUTH %s", config.auth); + } + } + + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COPY) + cmdfmt = sdscat(cmdfmt," COPY"); + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLACE) + cmdfmt = sdscat(cmdfmt," REPLACE"); + + /* Use SCAN to iterate over the keys, migrating to the + * right node as needed. */ + int cursor = -999, timeout = config.cluster_manager_command.timeout; + while (cursor != 0) { + if (cursor < 0) cursor = 0; + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "SCAN %d COUNT %d", + cursor, 1000); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + assert(src_reply->type == REDIS_REPLY_ARRAY); + assert(src_reply->elements >= 2); + assert(src_reply->element[1]->type == REDIS_REPLY_ARRAY); + if (src_reply->element[0]->type == REDIS_REPLY_STRING) + cursor = atoi(src_reply->element[0]->str); + else if (src_reply->element[0]->type == REDIS_REPLY_INTEGER) + cursor = src_reply->element[0]->integer; + int keycount = src_reply->element[1]->elements; + for (i = 0; i < keycount; i++) { + redisReply *kr = src_reply->element[1]->element[i]; + assert(kr->type == REDIS_REPLY_STRING); + char *key = kr->str; + uint16_t slot = clusterManagerKeyHashSlot(key, kr->len); + clusterManagerNode *target = slots_map[slot]; + printf("Migrating %s to %s:%d: ", key, target->ip, target->port); + redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, + target->ip, target->port, + key, 0, timeout); + if (!r || r->type == REDIS_REPLY_ERROR) { + if (r && r->str) { + clusterManagerLogErr("Source %s:%d replied with " + "error:\n%s\n", src_ip, src_port, + r->str); + } + success = 0; + } + freeReplyObject(r); + if (!success) goto cleanup; + clusterManagerLogOk("OK\n"); + } + } +cleanup: + if (reply_err) + clusterManagerLogErr("Source %s:%d replied with error:\n%s\n", + src_ip, src_port, reply_err); + if (src_ctx) redisFree(src_ctx); + if (src_reply) freeReplyObject(src_reply); + if (cmdfmt) sdsfree(cmdfmt); + return success; +invalid_args: + fprintf(stderr, "%s", invalid_args_msg); + return 0; +} + +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0, i; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + argc--; + argv++; + size_t *argvlen = zmalloc(argc*sizeof(size_t), MALLOC_LOCAL); + clusterManagerLogInfo(">>> Calling"); + for (i = 0; i < argc; i++) { + argvlen[i] = strlen(argv[i]); + printf(" %s", argv[i]); + } + printf("\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if ((config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_MASTERS_ONLY) + && (n->replicate != NULL)) continue; // continue if node is slave + if ((config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVES_ONLY) + && (n->replicate == NULL)) continue; // continue if node is master + if (!n->context && !clusterManagerNodeConnect(n)) continue; + redisReply *reply = NULL; + redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen); + int status = redisGetReply(n->context, (void **)(&reply)); + if (status != REDIS_OK || reply == NULL ) + printf("%s:%d: Failed!\n", n->ip, n->port); + else { + sds formatted_reply = cliFormatReplyRaw(reply); + printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply); + sdsfree(formatted_reply); + } + if (reply != NULL) freeReplyObject(reply); + } + zfree(argvlen); + return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandBackup(int argc, char **argv) { + UNUSED(argc); + int success = 1, port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + int no_issues = clusterManagerCheckCluster(0); + int cluster_errors_count = (no_issues ? 0 : + listLength(cluster_manager.errors)); + config.cluster_manager_command.backup_dir = argv[1]; + /* TODO: check if backup_dir is a valid directory. */ + sds json = sdsnew("[\n"); + int first_node = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + if (!first_node) first_node = 1; + else json = sdscat(json, ",\n"); + clusterManagerNode *node = ln->value; + sds node_json = clusterManagerNodeGetJSON(node, cluster_errors_count); + json = sdscat(json, node_json); + sdsfree(node_json); + if (node->replicate) + continue; + clusterManagerLogInfo(">>> Node %s:%d -> Saving RDB...\n", + node->ip, node->port); + fflush(stdout); + getRDB(node); + } + json = sdscat(json, "\n]"); + sds jsonpath = sdsnew(config.cluster_manager_command.backup_dir); + if (jsonpath[sdslen(jsonpath) - 1] != '/') + jsonpath = sdscat(jsonpath, "/"); + jsonpath = sdscat(jsonpath, "nodes.json"); + fflush(stdout); + clusterManagerLogInfo("Saving cluster configuration to: %s\n", jsonpath); + FILE *out = fopen(jsonpath, "w+"); + if (!out) { + clusterManagerLogErr("Could not save nodes to: %s\n", jsonpath); + success = 0; + goto cleanup; + } + fputs(json, out); + fclose(out); +cleanup: + sdsfree(json); + sdsfree(jsonpath); + if (success) { + if (!no_issues) { + clusterManagerLogWarn("*** Cluster seems to have some problems, " + "please be aware of it if you're going " + "to restore this backup.\n"); + } + clusterManagerLogOk("[OK] Backup created into: %s\n", + config.cluster_manager_command.backup_dir); + } else clusterManagerLogOk("[ERR] Failed to back cluster!\n"); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandHelp(int argc, char **argv) { + UNUSED(argc); + UNUSED(argv); + int commands_count = sizeof(clusterManagerCommands) / + sizeof(clusterManagerCommandDef); + int i = 0, j; + fprintf(stderr, "Cluster Manager Commands:\n"); + int padding = 15; + for (; i < commands_count; i++) { + clusterManagerCommandDef *def = &(clusterManagerCommands[i]); + int namelen = strlen(def->name), padlen = padding - namelen; + fprintf(stderr, " %s", def->name); + for (j = 0; j < padlen; j++) fprintf(stderr, " "); + fprintf(stderr, "%s\n", (def->args ? def->args : "")); + if (def->options != NULL) { + int optslen = strlen(def->options); + char *p = def->options, *eos = p + optslen; + char *comma = NULL; + while ((comma = strchr(p, ',')) != NULL) { + int deflen = (int)(comma - p); + char buf[255]; + memcpy(buf, p, deflen); + buf[deflen] = '\0'; + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", buf); + p = comma + 1; + if (p >= eos) break; + } + if (p < eos) { + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", p); + } + } + } + fprintf(stderr, "\nFor check, fix, reshard, del-node, set-timeout you " + "can specify the host and port of any working node in " + "the cluster.\n"); + + int options_count = sizeof(clusterManagerOptions) / + sizeof(clusterManagerOptionDef); + i = 0; + fprintf(stderr, "\nCluster Manager Options:\n"); + for (; i < options_count; i++) { + clusterManagerOptionDef *def = &(clusterManagerOptions[i]); + int namelen = strlen(def->name), padlen = padding - namelen; + fprintf(stderr, " %s", def->name); + for (j = 0; j < padlen; j++) fprintf(stderr, " "); + fprintf(stderr, "%s\n", def->desc); + } + + fprintf(stderr, "\n"); + return 0; +} + +/*------------------------------------------------------------------------------ + * Latency and latency history modes + *--------------------------------------------------------------------------- */ + +static void latencyModePrint(long long min, long long max, double avg, long long count) { + if (config.output == OUTPUT_STANDARD) { + printf("min: %lld, max: %lld, avg: %.2f (%lld samples)", + min, max, avg, count); + fflush(stdout); + } else if (config.output == OUTPUT_CSV) { + printf("%lld,%lld,%.2f,%lld\n", min, max, avg, count); + } else if (config.output == OUTPUT_RAW) { + printf("%lld %lld %.2f %lld\n", min, max, avg, count); + } +} + +#define LATENCY_SAMPLE_RATE 10 /* milliseconds. */ +#define LATENCY_HISTORY_DEFAULT_INTERVAL 15000 /* milliseconds. */ +static void latencyMode(void) { + redisReply *reply; + long long start, latency, min = 0, max = 0, tot = 0, count = 0; + long long history_interval = + config.interval ? config.interval/1000 : + LATENCY_HISTORY_DEFAULT_INTERVAL; + double avg; + long long history_start = mstime(); + + /* Set a default for the interval in case of --latency option + * with --raw, --csv or when it is redirected to non tty. */ + if (config.interval == 0) { + config.interval = 1000; + } else { + config.interval /= 1000; /* We need to convert to milliseconds. */ + } + + if (!context) exit(1); + while(1) { + start = mstime(); + reply = reconnectingRedisCommand(context,"PING"); + if (reply == NULL) { + fprintf(stderr,"\nI/O error\n"); + exit(1); + } + latency = mstime()-start; + freeReplyObject(reply); + count++; + if (count == 1) { + min = max = tot = latency; + avg = (double) latency; + } else { + if (latency < min) min = latency; + if (latency > max) max = latency; + tot += latency; + avg = (double) tot/count; + } + + if (config.output == OUTPUT_STANDARD) { + printf("\x1b[0G\x1b[2K"); /* Clear the line. */ + latencyModePrint(min,max,avg,count); + } else { + if (config.latency_history) { + latencyModePrint(min,max,avg,count); + } else if (mstime()-history_start > config.interval) { + latencyModePrint(min,max,avg,count); + exit(0); + } + } + + if (config.latency_history && mstime()-history_start > history_interval) + { + printf(" -- %.2f seconds range\n", (float)(mstime()-history_start)/1000); + history_start = mstime(); + min = max = tot = count = 0; + } + usleep(LATENCY_SAMPLE_RATE * 1000); + } +} + +/*------------------------------------------------------------------------------ + * Latency distribution mode -- requires 256 colors xterm + *--------------------------------------------------------------------------- */ + +#define LATENCY_DIST_DEFAULT_INTERVAL 1000 /* milliseconds. */ + +/* Structure to store samples distribution. */ +struct distsamples { + long long max; /* Max latency to fit into this interval (usec). */ + long long count; /* Number of samples in this interval. */ + int character; /* Associated character in visualization. */ +}; + +/* Helper function for latencyDistMode(). Performs the spectrum visualization + * of the collected samples targeting an xterm 256 terminal. + * + * Takes an array of distsamples structures, ordered from smaller to bigger + * 'max' value. Last sample max must be 0, to mean that it olds all the + * samples greater than the previous one, and is also the stop sentinel. + * + * "tot' is the total number of samples in the different buckets, so it + * is the SUM(samples[i].count) for i to 0 up to the max sample. + * + * As a side effect the function sets all the buckets count to 0. */ +void showLatencyDistSamples(struct distsamples *samples, long long tot) { + int j; + + /* We convert samples into an index inside the palette + * proportional to the percentage a given bucket represents. + * This way intensity of the different parts of the spectrum + * don't change relative to the number of requests, which avoids to + * pollute the visualization with non-latency related info. */ + printf("\033[38;5;0m"); /* Set foreground color to black. */ + for (j = 0; ; j++) { + int coloridx = + ceil((double) samples[j].count / tot * (spectrum_palette_size-1)); + int color = spectrum_palette[coloridx]; + printf("\033[48;5;%dm%c", (int)color, samples[j].character); + samples[j].count = 0; + if (samples[j].max == 0) break; /* Last sample. */ + } + printf("\033[0m\n"); + fflush(stdout); +} + +/* Show the legend: different buckets values and colors meaning, so + * that the spectrum is more easily readable. */ +void showLatencyDistLegend(void) { + int j; + + printf("---------------------------------------------\n"); + printf(". - * # .01 .125 .25 .5 milliseconds\n"); + printf("1,2,3,...,9 from 1 to 9 milliseconds\n"); + printf("A,B,C,D,E 10,20,30,40,50 milliseconds\n"); + printf("F,G,H,I,J .1,.2,.3,.4,.5 seconds\n"); + printf("K,L,M,N,O,P,Q,? 1,2,4,8,16,30,60,>60 seconds\n"); + printf("From 0 to 100%%: "); + for (j = 0; j < spectrum_palette_size; j++) { + printf("\033[48;5;%dm ", spectrum_palette[j]); + } + printf("\033[0m\n"); + printf("---------------------------------------------\n"); +} + +static void latencyDistMode(void) { + redisReply *reply; + long long start, latency, count = 0; + long long history_interval = + config.interval ? config.interval/1000 : + LATENCY_DIST_DEFAULT_INTERVAL; + long long history_start = ustime(); + int j, outputs = 0; + + struct distsamples samples[] = { + /* We use a mostly logarithmic scale, with certain linear intervals + * which are more interesting than others, like 1-10 milliseconds + * range. */ + {10,0,'.'}, /* 0.01 ms */ + {125,0,'-'}, /* 0.125 ms */ + {250,0,'*'}, /* 0.25 ms */ + {500,0,'#'}, /* 0.5 ms */ + {1000,0,'1'}, /* 1 ms */ + {2000,0,'2'}, /* 2 ms */ + {3000,0,'3'}, /* 3 ms */ + {4000,0,'4'}, /* 4 ms */ + {5000,0,'5'}, /* 5 ms */ + {6000,0,'6'}, /* 6 ms */ + {7000,0,'7'}, /* 7 ms */ + {8000,0,'8'}, /* 8 ms */ + {9000,0,'9'}, /* 9 ms */ + {10000,0,'A'}, /* 10 ms */ + {20000,0,'B'}, /* 20 ms */ + {30000,0,'C'}, /* 30 ms */ + {40000,0,'D'}, /* 40 ms */ + {50000,0,'E'}, /* 50 ms */ + {100000,0,'F'}, /* 0.1 s */ + {200000,0,'G'}, /* 0.2 s */ + {300000,0,'H'}, /* 0.3 s */ + {400000,0,'I'}, /* 0.4 s */ + {500000,0,'J'}, /* 0.5 s */ + {1000000,0,'K'}, /* 1 s */ + {2000000,0,'L'}, /* 2 s */ + {4000000,0,'M'}, /* 4 s */ + {8000000,0,'N'}, /* 8 s */ + {16000000,0,'O'}, /* 16 s */ + {30000000,0,'P'}, /* 30 s */ + {60000000,0,'Q'}, /* 1 minute */ + {0,0,'?'}, /* > 1 minute */ + }; + + if (!context) exit(1); + while(1) { + start = ustime(); + reply = reconnectingRedisCommand(context,"PING"); + if (reply == NULL) { + fprintf(stderr,"\nI/O error\n"); + exit(1); + } + latency = ustime()-start; + freeReplyObject(reply); + count++; + + /* Populate the relevant bucket. */ + for (j = 0; ; j++) { + if (samples[j].max == 0 || latency <= samples[j].max) { + samples[j].count++; + break; + } + } + + /* From time to time show the spectrum. */ + if (count && (ustime()-history_start)/1000 > history_interval) { + if ((outputs++ % 20) == 0) + showLatencyDistLegend(); + showLatencyDistSamples(samples,count); + history_start = ustime(); + count = 0; + } + usleep(LATENCY_SAMPLE_RATE * 1000); + } +} + +/*------------------------------------------------------------------------------ + * Slave mode + *--------------------------------------------------------------------------- */ + +#define RDB_EOF_MARK_SIZE 40 + +void sendReplconf(const char* arg1, const char* arg2) { + fprintf(stderr, "sending REPLCONF %s %s\n", arg1, arg2); + redisReply *reply = redisCommand(context, "REPLCONF %s %s", arg1, arg2); + + /* Handle any error conditions */ + if(reply == NULL) { + fprintf(stderr, "\nI/O error\n"); + exit(1); + } else if(reply->type == REDIS_REPLY_ERROR) { + fprintf(stderr, "REPLCONF %s error: %s\n", arg1, reply->str); + /* non fatal, old versions may not support it */ + } + freeReplyObject(reply); +} + +void sendCapa() { + sendReplconf("capa", "eof"); +} + +void sendRdbOnly(void) { + sendReplconf("rdb-only", "1"); +} + +/* Read raw bytes through a redisContext. The read operation is not greedy + * and may not fill the buffer entirely. + */ +static ssize_t readConn(redisContext *c, char *buf, size_t len) +{ + return c->funcs->read(c, buf, len); +} + +/* Sends SYNC and reads the number of bytes in the payload. Used both by + * slaveMode() and getRDB(). + * returns 0 in case an EOF marker is used. */ +unsigned long long sendSync(redisContext *c, char *out_eof) { + /* To start we need to send the SYNC command and return the payload. + * The hiredis client lib does not understand this part of the protocol + * and we don't want to mess with its buffers, so everything is performed + * using direct low-level I/O. */ + char buf[4096], *p; + ssize_t nread; + + /* Send the SYNC command. */ + if (cliWriteConn(c, "SYNC\r\n", 6) != 6) { + fprintf(stderr,"Error writing to master\n"); + exit(1); + } + + /* Read $\r\n, making sure to read just up to "\n" */ + p = buf; + while(1) { + nread = readConn(c,p,1); + if (nread <= 0) { + fprintf(stderr,"Error reading bulk length while SYNCing\n"); + exit(1); + } + if (*p == '\n' && p != buf) break; + if (*p != '\n') p++; + } + *p = '\0'; + if (buf[0] == '-') { + fprintf(stderr, "SYNC with master failed: %s\n", buf); + exit(1); + } + if (strncmp(buf+1,"EOF:",4) == 0 && strlen(buf+5) >= RDB_EOF_MARK_SIZE) { + memcpy(out_eof, buf+5, RDB_EOF_MARK_SIZE); + return 0; + } + return strtoull(buf+1,NULL,10); +} + +static void slaveMode(void) { + static char eofmark[RDB_EOF_MARK_SIZE]; + static char lastbytes[RDB_EOF_MARK_SIZE]; + static int usemark = 0; + unsigned long long payload = sendSync(context,eofmark); + char buf[1024]; + int original_output = config.output; + + if (payload == 0) { + payload = ULLONG_MAX; + memset(lastbytes,0,RDB_EOF_MARK_SIZE); + usemark = 1; + fprintf(stderr,"SYNC with master, discarding " + "bytes of bulk transfer until EOF marker...\n"); + } else { + fprintf(stderr,"SYNC with master, discarding %llu " + "bytes of bulk transfer...\n", payload); + } + + + /* Discard the payload. */ + while(payload) { + ssize_t nread; + + nread = readConn(context,buf,(payload > sizeof(buf)) ? sizeof(buf) : payload); + if (nread <= 0) { + fprintf(stderr,"Error reading RDB payload while SYNCing\n"); + exit(1); + } + payload -= nread; + + if (usemark) { + /* Update the last bytes array, and check if it matches our delimiter.*/ + if (nread >= RDB_EOF_MARK_SIZE) { + memcpy(lastbytes,buf+nread-RDB_EOF_MARK_SIZE,RDB_EOF_MARK_SIZE); + } else { + int rem = RDB_EOF_MARK_SIZE-nread; + memmove(lastbytes,lastbytes+nread,rem); + memcpy(lastbytes+rem,buf,nread); + } + if (memcmp(lastbytes,eofmark,RDB_EOF_MARK_SIZE) == 0) + break; + } + } + + if (usemark) { + unsigned long long offset = ULLONG_MAX - payload; + fprintf(stderr,"SYNC done after %llu bytes. Logging commands from master.\n", offset); + /* put the slave online */ + sleep(1); + sendReplconf("ACK", "0"); + } else + fprintf(stderr,"SYNC done. Logging commands from master.\n"); + + /* Now we can use hiredis to read the incoming protocol. */ + config.output = OUTPUT_CSV; + while (cliReadReply(0) == REDIS_OK); + config.output = original_output; +} + +/*------------------------------------------------------------------------------ + * RDB transfer mode + *--------------------------------------------------------------------------- */ + +/* This function implements --rdb, so it uses the replication protocol in order + * to fetch the RDB file from a remote server. */ +static void getRDB(clusterManagerNode *node) { + int fd; + redisContext *s; + char *filename; + if (node != NULL) { + assert(node->context); + s = node->context; + filename = clusterManagerGetNodeRDBFilename(node); + } else { + s = context; + filename = config.rdb_filename; + } + static char eofmark[RDB_EOF_MARK_SIZE]; + static char lastbytes[RDB_EOF_MARK_SIZE]; + static int usemark = 0; + unsigned long long payload = sendSync(s, eofmark); + char buf[4096]; + + if (payload == 0) { + payload = ULLONG_MAX; + memset(lastbytes,0,RDB_EOF_MARK_SIZE); + usemark = 1; + fprintf(stderr,"SYNC sent to master, writing bytes of bulk transfer " + "until EOF marker to '%s'\n", filename); + } else { + fprintf(stderr,"SYNC sent to master, writing %llu bytes to '%s'\n", + payload, filename); + } + + int write_to_stdout = !strcmp(filename,"-"); + /* Write to file. */ + if (write_to_stdout) { + fd = STDOUT_FILENO; + } else { + fd = open(filename, O_CREAT|O_WRONLY, 0644); + if (fd == -1) { + fprintf(stderr, "Error opening '%s': %s\n", filename, + strerror(errno)); + exit(1); + } + } + + while(payload) { + ssize_t nread, nwritten; + + nread = readConn(s,buf,(payload > sizeof(buf)) ? sizeof(buf) : payload); + if (nread <= 0) { + fprintf(stderr,"I/O Error reading RDB payload from socket\n"); + exit(1); + } + nwritten = write(fd, buf, nread); + if (nwritten != nread) { + fprintf(stderr,"Error writing data to file: %s\n", + (nwritten == -1) ? strerror(errno) : "short write"); + exit(1); + } + payload -= nread; + + if (usemark) { + /* Update the last bytes array, and check if it matches our delimiter.*/ + if (nread >= RDB_EOF_MARK_SIZE) { + memcpy(lastbytes,buf+nread-RDB_EOF_MARK_SIZE,RDB_EOF_MARK_SIZE); + } else { + int rem = RDB_EOF_MARK_SIZE-nread; + memmove(lastbytes,lastbytes+nread,rem); + memcpy(lastbytes+rem,buf,nread); + } + if (memcmp(lastbytes,eofmark,RDB_EOF_MARK_SIZE) == 0) + break; + } + } + if (usemark) { + payload = ULLONG_MAX - payload - RDB_EOF_MARK_SIZE; + if (!write_to_stdout && ftruncate(fd, payload) == -1) + fprintf(stderr,"ftruncate failed: %s.\n", strerror(errno)); + fprintf(stderr,"Transfer finished with success after %llu bytes\n", payload); + } else { + fprintf(stderr,"Transfer finished with success.\n"); + } + redisFree(s); /* Close the connection ASAP as fsync() may take time. */ + if (node) + node->context = NULL; + if (!write_to_stdout && fsync(fd) == -1) { + fprintf(stderr,"Fail to fsync '%s': %s\n", filename, strerror(errno)); + exit(1); + } + close(fd); + if (node) { + sdsfree(filename); + return; + } + exit(0); +} + +/*------------------------------------------------------------------------------ + * Bulk import (pipe) mode + *--------------------------------------------------------------------------- */ + +#define PIPEMODE_WRITE_LOOP_MAX_BYTES (128*1024) +static void pipeMode(void) { + long long errors = 0, replies = 0, obuf_len = 0, obuf_pos = 0; + char obuf[1024*16]; /* Output buffer */ + char aneterr[ANET_ERR_LEN]; + redisReply *reply; + int eof = 0; /* True once we consumed all the standard input. */ + int done = 0; + char magic[20]; /* Special reply we recognize. */ + time_t last_read_time = time(NULL); + + srand(time(NULL)); + + /* Use non blocking I/O. */ + if (anetNonBlock(aneterr,context->fd) == ANET_ERR) { + fprintf(stderr, "Can't set the socket in non blocking mode: %s\n", + aneterr); + exit(1); + } + + context->flags &= ~REDIS_BLOCK; + + /* Transfer raw protocol and read replies from the server at the same + * time. */ + while(!done) { + int mask = AE_READABLE; + + if (!eof || obuf_len != 0) mask |= AE_WRITABLE; + mask = aeWait(context->fd,mask,1000); + + /* Handle the readable state: we can read replies from the server. */ + if (mask & AE_READABLE) { + int read_error = 0; + + do { + if (!read_error && redisBufferRead(context) == REDIS_ERR) { + read_error = 1; + } + + reply = NULL; + if (redisGetReply(context, (void **) &reply) == REDIS_ERR) { + fprintf(stderr, "Error reading replies from server\n"); + exit(1); + } + if (reply) { + last_read_time = time(NULL); + if (reply->type == REDIS_REPLY_ERROR) { + fprintf(stderr,"%s\n", reply->str); + errors++; + } else if (eof && reply->type == REDIS_REPLY_STRING && + reply->len == 20) { + /* Check if this is the reply to our final ECHO + * command. If so everything was received + * from the server. */ + if (memcmp(reply->str,magic,20) == 0) { + printf("Last reply received from server.\n"); + done = 1; + replies--; + } + } + replies++; + freeReplyObject(reply); + } + } while(reply); + + /* Abort on read errors. We abort here because it is important + * to consume replies even after a read error: this way we can + * show a potential problem to the user. */ + if (read_error) exit(1); + } + + /* Handle the writable state: we can send protocol to the server. */ + if (mask & AE_WRITABLE) { + ssize_t loop_nwritten = 0; + + while(1) { + /* Transfer current buffer to server. */ + if (obuf_len != 0) { + ssize_t nwritten = cliWriteConn(context,obuf+obuf_pos,obuf_len); + + if (nwritten == -1) { + if (errno != EAGAIN && errno != EINTR) { + fprintf(stderr, "Error writing to the server: %s\n", + strerror(errno)); + exit(1); + } else { + nwritten = 0; + } + } + obuf_len -= nwritten; + obuf_pos += nwritten; + loop_nwritten += nwritten; + if (obuf_len != 0) break; /* Can't accept more data. */ + } + if (context->err) { + fprintf(stderr, "Server I/O Error: %s\n", context->errstr); + exit(1); + } + /* If buffer is empty, load from stdin. */ + if (obuf_len == 0 && !eof) { + ssize_t nread = read(STDIN_FILENO,obuf,sizeof(obuf)); + + if (nread == 0) { + /* The ECHO sequence starts with a "\r\n" so that if there + * is garbage in the protocol we read from stdin, the ECHO + * will likely still be properly formatted. + * CRLF is ignored by Redis, so it has no effects. */ + char echo[] = + "\r\n*2\r\n$4\r\nECHO\r\n$20\r\n01234567890123456789\r\n"; + int j; + + eof = 1; + /* Everything transferred, so we queue a special + * ECHO command that we can match in the replies + * to make sure everything was read from the server. */ + for (j = 0; j < 20; j++) + magic[j] = rand() & 0xff; + memcpy(echo+21,magic,20); + memcpy(obuf,echo,sizeof(echo)-1); + obuf_len = sizeof(echo)-1; + obuf_pos = 0; + printf("All data transferred. Waiting for the last reply...\n"); + } else if (nread == -1) { + fprintf(stderr, "Error reading from stdin: %s\n", + strerror(errno)); + exit(1); + } else { + obuf_len = nread; + obuf_pos = 0; + } + } + if ((obuf_len == 0 && eof) || + loop_nwritten > PIPEMODE_WRITE_LOOP_MAX_BYTES) break; + } + } + + /* Handle timeout, that is, we reached EOF, and we are not getting + * replies from the server for a few seconds, nor the final ECHO is + * received. */ + if (eof && config.pipe_timeout > 0 && + time(NULL)-last_read_time > config.pipe_timeout) + { + fprintf(stderr,"No replies for %d seconds: exiting.\n", + config.pipe_timeout); + errors++; + break; + } + } + printf("errors: %lld, replies: %lld\n", errors, replies); + if (errors) + exit(1); + else + exit(0); +} + +/*------------------------------------------------------------------------------ + * Find big keys + *--------------------------------------------------------------------------- */ + +redisReply *sendScan(unsigned long long *it) { + redisReply *reply; + + if (config.pattern) + reply = redisCommand(context, "SCAN %llu MATCH %b", + *it, config.pattern, sdslen(config.pattern)); + else + reply = redisCommand(context,"SCAN %llu",*it); + + /* Handle any error conditions */ + if(reply == NULL) { + fprintf(stderr, "\nI/O error\n"); + exit(1); + } else if(reply->type == REDIS_REPLY_ERROR) { + fprintf(stderr, "SCAN error: %s\n", reply->str); + exit(1); + } else if(reply->type != REDIS_REPLY_ARRAY) { + fprintf(stderr, "Non ARRAY response from SCAN!\n"); + exit(1); + } else if(reply->elements != 2) { + fprintf(stderr, "Invalid element count from SCAN!\n"); + exit(1); + } + + /* Validate our types are correct */ + assert(reply->element[0]->type == REDIS_REPLY_STRING); + assert(reply->element[1]->type == REDIS_REPLY_ARRAY); + + /* Update iterator */ + *it = strtoull(reply->element[0]->str, NULL, 10); + + return reply; +} + +int getDbSize(void) { + redisReply *reply; + int size; + + reply = redisCommand(context, "DBSIZE"); + + if (reply == NULL) { + fprintf(stderr, "\nI/O error\n"); + exit(1); + } else if (reply->type == REDIS_REPLY_ERROR) { + fprintf(stderr, "Couldn't determine DBSIZE: %s\n", reply->str); + exit(1); + } else if (reply->type != REDIS_REPLY_INTEGER) { + fprintf(stderr, "Non INTEGER response from DBSIZE!\n"); + exit(1); + } + + /* Grab the number of keys and free our reply */ + size = reply->integer; + freeReplyObject(reply); + + return size; +} + +typeinfo type_string = { "string", "STRLEN", "bytes" }; +typeinfo type_list = { "list", "LLEN", "items" }; +typeinfo type_set = { "set", "SCARD", "members" }; +typeinfo type_hash = { "hash", "HLEN", "fields" }; +typeinfo type_zset = { "zset", "ZCARD", "members" }; +typeinfo type_stream = { "stream", "XLEN", "entries" }; +typeinfo type_other = { "other", NULL, "?" }; + +void type_free(void* priv_data, void* val) { + typeinfo *info = val; + UNUSED(priv_data); + if (info->biggest_key) + sdsfree(info->biggest_key); + sdsfree(info->name); + zfree(info); +} + +void getKeySizes(redisReply *keys, typeinfo **types, + unsigned long long *sizes, int memkeys, + unsigned memkeys_samples) +{ + redisReply *reply; + unsigned int i; + + /* Pipeline size commands */ + for(i=0;ielements;i++) { + /* Skip keys that disappeared between SCAN and TYPE (or unknown types when not in memkeys mode) */ + if(!types[i] || (!types[i]->sizecmd && !memkeys)) + continue; + + if (!memkeys) { + const char* argv[] = {types[i]->sizecmd, keys->element[i]->str}; + size_t lens[] = {strlen(types[i]->sizecmd), keys->element[i]->len}; + redisAppendCommandArgv(context, 2, argv, lens); + } else if (memkeys_samples==0) { + const char* argv[] = {"MEMORY", "USAGE", keys->element[i]->str}; + size_t lens[] = {6, 5, keys->element[i]->len}; + redisAppendCommandArgv(context, 3, argv, lens); + } else { + sds samplesstr = sdsfromlonglong(memkeys_samples); + const char* argv[] = {"MEMORY", "USAGE", keys->element[i]->str, "SAMPLES", samplesstr}; + size_t lens[] = {6, 5, keys->element[i]->len, 7, sdslen(samplesstr)}; + redisAppendCommandArgv(context, 5, argv, lens); + sdsfree(samplesstr); + } + } + + /* Retrieve sizes */ + for(i=0;ielements;i++) { + /* Skip keys that disappeared between SCAN and TYPE (or unknown types when not in memkeys mode) */ + if(!types[i] || (!types[i]->sizecmd && !memkeys)) { + sizes[i] = 0; + continue; + } + + /* Retrieve size */ + if(redisGetReply(context, (void**)&reply)!=REDIS_OK) { + fprintf(stderr, "Error getting size for key '%s' (%d: %s)\n", + keys->element[i]->str, context->err, context->errstr); + exit(1); + } else if(reply->type != REDIS_REPLY_INTEGER) { + /* Theoretically the key could have been removed and + * added as a different type between TYPE and SIZE */ + fprintf(stderr, + "Warning: %s on '%s' failed (may have changed type)\n", + !memkeys? types[i]->sizecmd: "MEMORY USAGE", + keys->element[i]->str); + sizes[i] = 0; + } else { + sizes[i] = reply->integer; + } + + freeReplyObject(reply); + } +} + +static void getKeyFreqs(redisReply *keys, unsigned long long *freqs) { + redisReply *reply; + unsigned int i; + + /* Pipeline OBJECT freq commands */ + for(i=0;ielements;i++) { + const char* argv[] = {"OBJECT", "FREQ", keys->element[i]->str}; + size_t lens[] = {6, 4, keys->element[i]->len}; + redisAppendCommandArgv(context, 3, argv, lens); + } + + /* Retrieve freqs */ + for(i=0;ielements;i++) { + if(redisGetReply(context, (void**)&reply)!=REDIS_OK) { + sds keyname = sdscatrepr(sdsempty(), keys->element[i]->str, keys->element[i]->len); + fprintf(stderr, "Error getting freq for key '%s' (%d: %s)\n", + keyname, context->err, context->errstr); + sdsfree(keyname); + exit(1); + } else if(reply->type != REDIS_REPLY_INTEGER) { + if(reply->type == REDIS_REPLY_ERROR) { + fprintf(stderr, "Error: %s\n", reply->str); + exit(1); + } else { + sds keyname = sdscatrepr(sdsempty(), keys->element[i]->str, keys->element[i]->len); + fprintf(stderr, "Warning: OBJECT freq on '%s' failed (may have been deleted)\n", keyname); + sdsfree(keyname); + freqs[i] = 0; + } + } else { + freqs[i] = reply->integer; + } + freeReplyObject(reply); + } +} + +#define HOTKEYS_SAMPLE 16 +static void findHotKeys(void) { + redisReply *keys, *reply; + unsigned long long counters[HOTKEYS_SAMPLE] = {0}; + sds hotkeys[HOTKEYS_SAMPLE] = {NULL}; + unsigned long long sampled = 0, total_keys, *freqs = NULL, it = 0; + unsigned int arrsize = 0, i, k; + double pct; + + /* Total keys pre scanning */ + total_keys = getDbSize(); + + /* Status message */ + printf("\n# Scanning the entire keyspace to find hot keys as well as\n"); + printf("# average sizes per key type. You can use -i 0.1 to sleep 0.1 sec\n"); + printf("# per 100 SCAN commands (not usually needed).\n\n"); + + /* SCAN loop */ + do { + /* Calculate approximate percentage completion */ + pct = 100 * (double)sampled/total_keys; + + /* Grab some keys and point to the keys array */ + reply = sendScan(&it); + keys = reply->element[1]; + + /* Reallocate our freqs array if we need to */ + if(keys->elements > arrsize) { + freqs = zrealloc(freqs, sizeof(unsigned long long)*keys->elements, MALLOC_LOCAL); + + if(!freqs) { + fprintf(stderr, "Failed to allocate storage for keys!\n"); + exit(1); + } + + arrsize = keys->elements; + } + + getKeyFreqs(keys, freqs); + + /* Now update our stats */ + for(i=0;ielements;i++) { + sampled++; + /* Update overall progress */ + if(sampled % 1000000 == 0) { + printf("[%05.2f%%] Sampled %llu keys so far\n", pct, sampled); + } + + /* Use eviction pool here */ + k = 0; + while (k < HOTKEYS_SAMPLE && freqs[i] > counters[k]) k++; + if (k == 0) continue; + k--; + if (k == 0 || counters[k] == 0) { + sdsfree(hotkeys[k]); + } else { + sdsfree(hotkeys[0]); + memmove(counters,counters+1,sizeof(counters[0])*k); + memmove(hotkeys,hotkeys+1,sizeof(hotkeys[0])*k); + } + counters[k] = freqs[i]; + hotkeys[k] = sdscatrepr(sdsempty(), keys->element[i]->str, keys->element[i]->len); + printf( + "[%05.2f%%] Hot key '%s' found so far with counter %llu\n", + pct, hotkeys[k], freqs[i]); + } + + /* Sleep if we've been directed to do so */ + if(sampled && (sampled %100) == 0 && config.interval) { + usleep(config.interval); + } + + freeReplyObject(reply); + } while(it != 0); + + if (freqs) zfree(freqs); + + /* We're done */ + printf("\n-------- summary -------\n\n"); + + printf("Sampled %llu keys in the keyspace!\n", sampled); + + for (i=1; i<= HOTKEYS_SAMPLE; i++) { + k = HOTKEYS_SAMPLE - i; + if(counters[k]>0) { + printf("hot key found with counter: %llu\tkeyname: %s\n", counters[k], hotkeys[k]); + sdsfree(hotkeys[k]); + } + } + + exit(0); +} + +/*------------------------------------------------------------------------------ + * Stats mode + *--------------------------------------------------------------------------- */ + +/* Return the specified INFO field from the INFO command output "info". + * A new buffer is allocated for the result, that needs to be free'd. + * If the field is not found NULL is returned. */ +static char *getInfoField(char *info, char *field) { + char *p = strstr(info,field); + char *n1, *n2; + char *result; + + if (!p) return NULL; + p += strlen(field)+1; + n1 = strchr(p,'\r'); + n2 = strchr(p,','); + if (n2 && n2 < n1) n1 = n2; + result = zmalloc(sizeof(char)*(n1-p)+1, MALLOC_LOCAL); + memcpy(result,p,(n1-p)); + result[n1-p] = '\0'; + return result; +} + +/* Like the above function but automatically convert the result into + * a long. On error (missing field) LONG_MIN is returned. */ +static long getLongInfoField(char *info, char *field) { + char *value = getInfoField(info,field); + long l; + + if (!value) return LONG_MIN; + l = strtol(value,NULL,10); + zfree(value); + return l; +} + +/* Convert number of bytes into a human readable string of the form: + * 100B, 2G, 100M, 4K, and so forth. */ +void bytesToHuman(char *s, long long n, size_t bufsize) { + double d; + + if (n < 0) { + *s = '-'; + s++; + n = -n; + } + if (n < 1024) { + /* Bytes */ + snprintf(s,bufsize,"%lldB",n); + return; + } else if (n < (1024*1024)) { + d = (double)n/(1024); + snprintf(s,bufsize,"%.2fK",d); + } else if (n < (1024LL*1024*1024)) { + d = (double)n/(1024*1024); + snprintf(s,bufsize,"%.2fM",d); + } else if (n < (1024LL*1024*1024*1024)) { + d = (double)n/(1024LL*1024*1024); + snprintf(s,bufsize,"%.2fG",d); + } +} + +static void statMode(void) { + redisReply *reply; + long aux, requests = 0; + int i = 0; + + while(1) { + char buf[64]; + int j; + + reply = reconnectingRedisCommand(context,"INFO"); + if (reply->type == REDIS_REPLY_ERROR) { + printf("ERROR: %s\n", reply->str); + exit(1); + } + + if ((i++ % 20) == 0) { + printf( +"------- data ------ --------------------- load -------------------- - child -\n" +"keys mem clients blocked requests connections \n"); + } + + /* Keys */ + aux = 0; + for (j = 0; j < 20; j++) { + long k; + + snprintf(buf,sizeof(buf),"db%d:keys",j); + k = getLongInfoField(reply->str,buf); + if (k == LONG_MIN) continue; + aux += k; + } + snprintf(buf,sizeof(buf),"%ld",aux); + printf("%-11s",buf); + + /* Used memory */ + aux = getLongInfoField(reply->str,"used_memory"); + bytesToHuman(buf,aux,sizeof(buf)); + printf("%-8s",buf); + + /* Clients */ + aux = getLongInfoField(reply->str,"connected_clients"); + snprintf(buf,sizeof(buf),"%ld",aux); + printf(" %-8s",buf); + + /* Blocked (BLPOPPING) Clients */ + aux = getLongInfoField(reply->str,"blocked_clients"); + snprintf(buf,sizeof(buf),"%ld",aux); + printf("%-8s",buf); + + /* Requests */ + aux = getLongInfoField(reply->str,"total_commands_processed"); + snprintf(buf,sizeof(buf),"%ld (+%ld)",aux,requests == 0 ? 0 : aux-requests); + printf("%-19s",buf); + requests = aux; + + /* Connections */ + aux = getLongInfoField(reply->str,"total_connections_received"); + snprintf(buf,sizeof(buf),"%ld",aux); + printf(" %-12s",buf); + + /* Children */ + aux = getLongInfoField(reply->str,"bgsave_in_progress"); + aux |= getLongInfoField(reply->str,"aof_rewrite_in_progress") << 1; + aux |= getLongInfoField(reply->str,"loading") << 2; + switch(aux) { + case 0: break; + case 1: + printf("SAVE"); + break; + case 2: + printf("AOF"); + break; + case 3: + printf("SAVE+AOF"); + break; + case 4: + printf("LOAD"); + break; + } + + printf("\n"); + freeReplyObject(reply); + usleep(config.interval); + } +} + +/*------------------------------------------------------------------------------ + * Scan mode + *--------------------------------------------------------------------------- */ + +static void scanMode(void) { + redisReply *reply; + unsigned long long cur = 0; + + do { + reply = sendScan(&cur); + for (unsigned int j = 0; j < reply->element[1]->elements; j++) { + if (config.output == OUTPUT_STANDARD) { + sds out = sdscatrepr(sdsempty(), reply->element[1]->element[j]->str, + reply->element[1]->element[j]->len); + printf("%s\n", out); + sdsfree(out); + } else { + printf("%s\n", reply->element[1]->element[j]->str); + } + } + freeReplyObject(reply); + } while(cur != 0); + + exit(0); +} + +/*------------------------------------------------------------------------------ + * LRU test mode + *--------------------------------------------------------------------------- */ + +/* Return an integer from min to max (both inclusive) using a power-law + * distribution, depending on the value of alpha: the greater the alpha + * the more bias towards lower values. + * + * With alpha = 6.2 the output follows the 80-20 rule where 20% of + * the returned numbers will account for 80% of the frequency. */ +long long powerLawRand(long long min, long long max, double alpha) { + double pl, r; + + max += 1; + r = ((double)rand()) / RAND_MAX; + pl = pow( + ((pow(max,alpha+1) - pow(min,alpha+1))*r + pow(min,alpha+1)), + (1.0/(alpha+1))); + return (max-1-(long long)pl)+min; +} + +/* Generates a key name among a set of lru_test_sample_size keys, using + * an 80-20 distribution. */ +void LRUTestGenKey(char *buf, size_t buflen) { + snprintf(buf, buflen, "lru:%lld", + powerLawRand(1, config.lru_test_sample_size, 6.2)); +} + +#define LRU_CYCLE_PERIOD 1000 /* 1000 milliseconds. */ +#define LRU_CYCLE_PIPELINE_SIZE 250 +static void LRUTestMode(void) { + redisReply *reply; + char key[128]; + long long start_cycle; + int j; + + srand(time(NULL)^getpid()); + while(1) { + /* Perform cycles of 1 second with 50% writes and 50% reads. + * We use pipelining batching writes / reads N times per cycle in order + * to fill the target instance easily. */ + start_cycle = mstime(); + long long hits = 0, misses = 0; + while(mstime() - start_cycle < LRU_CYCLE_PERIOD) { + /* Write cycle. */ + for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) { + char val[6]; + val[5] = '\0'; + for (int i = 0; i < 5; i++) val[i] = 'A'+rand()%('z'-'A'); + LRUTestGenKey(key,sizeof(key)); + redisAppendCommand(context, "SET %s %s",key,val); + } + for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) + redisGetReply(context, (void**)&reply); + + /* Read cycle. */ + for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) { + LRUTestGenKey(key,sizeof(key)); + redisAppendCommand(context, "GET %s",key); + } + for (j = 0; j < LRU_CYCLE_PIPELINE_SIZE; j++) { + if (redisGetReply(context, (void**)&reply) == REDIS_OK) { + switch(reply->type) { + case REDIS_REPLY_ERROR: + printf("%s\n", reply->str); + break; + case REDIS_REPLY_NIL: + misses++; + break; + default: + hits++; + break; + } + } + } + + if (context->err) { + fprintf(stderr,"I/O error during LRU test\n"); + exit(1); + } + } + /* Print stats. */ + printf( + "%lld Gets/sec | Hits: %lld (%.2f%%) | Misses: %lld (%.2f%%)\n", + hits+misses, + hits, (double)hits/(hits+misses)*100, + misses, (double)misses/(hits+misses)*100); + } + exit(0); +} + +/*------------------------------------------------------------------------------ + * Intrisic latency mode. + * + * Measure max latency of a running process that does not result from + * syscalls. Basically this software should provide a hint about how much + * time the kernel leaves the process without a chance to run. + *--------------------------------------------------------------------------- */ + +/* This is just some computation the compiler can't optimize out. + * Should run in less than 100-200 microseconds even using very + * slow hardware. Runs in less than 10 microseconds in modern HW. */ +unsigned long compute_something_fast(void) { + unsigned char s[256], i, j, t; + int count = 1000, k; + unsigned long output = 0; + + for (k = 0; k < 256; k++) s[k] = k; + + i = 0; + j = 0; + while(count--) { + i++; + j = j + s[i]; + t = s[i]; + s[i] = s[j]; + s[j] = t; + output += s[(s[i]+s[j])&255]; + } + return output; +} + +static void intrinsicLatencyModeStop(int s) { + UNUSED(s); + force_cancel_loop = 1; +} + +static void intrinsicLatencyMode(void) { + long long test_end, run_time, max_latency = 0, runs = 0; + + run_time = config.intrinsic_latency_duration*1000000; + test_end = ustime() + run_time; + signal(SIGINT, intrinsicLatencyModeStop); + + while(1) { + long long start, end, latency; + + start = ustime(); + compute_something_fast(); + end = ustime(); + latency = end-start; + runs++; + if (latency <= 0) continue; + + /* Reporting */ + if (latency > max_latency) { + max_latency = latency; + printf("Max latency so far: %lld microseconds.\n", max_latency); + } + + double avg_us = (double)run_time/runs; + double avg_ns = avg_us * 1e3; + if (force_cancel_loop || end > test_end) { + printf("\n%lld total runs " + "(avg latency: " + "%.4f microseconds / %.2f nanoseconds per run).\n", + runs, avg_us, avg_ns); + printf("Worst run took %.0fx longer than the average latency.\n", + max_latency / avg_us); + exit(0); + } + } +} + +static sds askPassword(const char *msg) { + linenoiseMaskModeEnable(); + sds auth = linenoise(msg); + linenoiseMaskModeDisable(); + return auth; +} + +/*------------------------------------------------------------------------------ + * Program main() + *--------------------------------------------------------------------------- */ + +int main(int argc, char **argv) { + int firstarg; + struct timeval tv; + + storage_init(NULL, 0); + memset(&config.sslconfig, 0, sizeof(config.sslconfig)); + config.hostip = sdsnew("127.0.0.1"); + config.hostport = 9880; + config.hostsocket = NULL; + config.repeat = 1; + config.interval = 0; + config.dbnum = 0; + config.input_dbnum = 0; + config.interactive = 0; + config.shutdown = 0; + config.monitor_mode = 0; + config.pubsub_mode = 0; + config.latency_mode = 0; + config.latency_dist_mode = 0; + config.latency_history = 0; + config.lru_test_mode = 0; + config.lru_test_sample_size = 0; + config.cluster_mode = 0; + config.cluster_send_asking = 0; + config.slave_mode = 0; + config.getrdb_mode = 0; + config.stat_mode = 0; + config.scan_mode = 0; + config.intrinsic_latency_mode = 0; + config.pattern = NULL; + config.rdb_filename = NULL; + config.pipe_mode = 0; + config.pipe_timeout = REDIS_CLI_DEFAULT_PIPE_TIMEOUT; + config.bigkeys = 0; + config.hotkeys = 0; + config.stdinarg = 0; + config.auth = NULL; + config.askpass = 0; + config.user = NULL; + config.eval = NULL; + config.eval_ldb = 0; + config.eval_ldb_end = 0; + config.eval_ldb_sync = 0; + config.enable_ldb_on_eval = 0; + config.last_cmd_type = -1; + config.verbose = 0; + config.set_errcode = 0; + config.no_auth_warning = 0; + config.in_multi = 0; + config.force_mode = 0; + config.cluster_manager_command.name = NULL; + config.cluster_manager_command.argc = 0; + config.cluster_manager_command.argv = NULL; + config.cluster_manager_command.flags = 0; + config.cluster_manager_command.replicas = 0; + config.cluster_manager_command.from = NULL; + config.cluster_manager_command.to = NULL; + config.cluster_manager_command.from_user = NULL; + config.cluster_manager_command.from_pass = NULL; + config.cluster_manager_command.from_askpass = 0; + config.cluster_manager_command.weight = NULL; + config.cluster_manager_command.weight_argc = 0; + config.cluster_manager_command.slots = 0; + config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; + config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; + config.cluster_manager_command.threshold = + CLUSTER_MANAGER_REBALANCE_THRESHOLD; + config.cluster_manager_command.backup_dir = NULL; + pref.hints = 1; + + spectrum_palette = spectrum_palette_color; + spectrum_palette_size = spectrum_palette_color_size; + + if (!isatty(fileno(stdout)) && (getenv("FAKETTY") == NULL)) { + config.output = OUTPUT_RAW; + config.push_output = 0; + } else { + config.output = OUTPUT_STANDARD; + config.push_output = 1; + } + config.mb_delim = sdsnew("\n"); + config.cmd_delim = sdsnew("\n"); + + firstarg = parseOptions(argc,argv); + argc -= firstarg; + argv += firstarg; + + parseEnv(); + + if (config.askpass) { + config.auth = askPassword("Please input password: "); + } + + if (config.cluster_manager_command.from_askpass) { + config.cluster_manager_command.from_pass = askPassword( + "Please input import source node password: "); + } + +#ifdef USE_OPENSSL + if (config.tls) { + cliSecureInit(); + } +#endif + + gettimeofday(&tv, NULL); + init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid()); + + /* Cluster Manager mode */ + if (CLUSTER_MANAGER_MODE()) { + clusterManagerCommandProc *proc = validateClusterManagerCommand(); + if (!proc) { + exit(1); + } + clusterManagerMode(proc); + } + + /* Latency mode */ + if (config.latency_mode) { + if (cliConnect(0) == REDIS_ERR) exit(1); + latencyMode(); + } + + /* Latency distribution mode */ + if (config.latency_dist_mode) { + if (cliConnect(0) == REDIS_ERR) exit(1); + latencyDistMode(); + } + + /* Slave mode */ + if (config.slave_mode) { + if (cliConnect(0) == REDIS_ERR) exit(1); + sendCapa(); + slaveMode(); + } + + /* Get RDB mode. */ + if (config.getrdb_mode) { + if (cliConnect(0) == REDIS_ERR) exit(1); + sendCapa(); + sendRdbOnly(); + getRDB(NULL); + } + + /* Pipe mode */ + if (config.pipe_mode) { + if (cliConnect(0) == REDIS_ERR) exit(1); + pipeMode(); + } + + /* Find big keys */ + if (config.bigkeys) { + if (cliConnect(0) == REDIS_ERR) exit(1); + findBigKeys(0, 0); + } + + /* Find large keys */ + if (config.memkeys) { + if (cliConnect(0) == REDIS_ERR) exit(1); + findBigKeys(1, config.memkeys_samples); + } + + /* Find hot keys */ + if (config.hotkeys) { + if (cliConnect(0) == REDIS_ERR) exit(1); + findHotKeys(); + } + + /* Find hot keys */ + if (config.hotkeys) { + if (cliConnect(0) == REDIS_ERR) exit(1); + findHotKeys(); + } + + /* Stat mode */ + if (config.stat_mode) { + if (cliConnect(0) == REDIS_ERR) exit(1); + if (config.interval == 0) config.interval = 1000000; + statMode(); + } + + /* Scan mode */ + if (config.scan_mode) { + if (cliConnect(0) == REDIS_ERR) exit(1); + scanMode(); + } + + /* LRU test mode */ + if (config.lru_test_mode) { + if (cliConnect(0) == REDIS_ERR) exit(1); + LRUTestMode(); + } + + /* Intrinsic latency mode */ + if (config.intrinsic_latency_mode) intrinsicLatencyMode(); + + /* Start interactive mode when no command is provided */ + if (argc == 0 && !config.eval) { + /* Show the message of the day if we are interactive */ + if (config.output == OUTPUT_STANDARD && !config.disable_motd) { + /*enable_motd=1 will retrieve the message of today using CURL*/ + char *szMotd = fetchMOTD(1 /* cache */, 1 /* enable_motd */); + if (szMotd != NULL) { + printf("Message of the day:\n %s\n", szMotd); + sdsfree(szMotd); + } + } + + /* Ignore SIGPIPE in interactive mode to force a reconnect */ + signal(SIGPIPE, SIG_IGN); + + /* Note that in repl mode we don't abort on connection error. + * A new attempt will be performed for every command send. */ + cliConnect(0); + repl(); + } + + /* Otherwise, we have some arguments to execute */ + if (cliConnect(0) != REDIS_OK) exit(1); + + if (config.eval) { + return evalMode(argc,argv); + } else { + return noninteractive(argc,argv); + } +} diff --git a/src/server.cpp b/src/server.cpp new file mode 100644 index 000000000..3c9a853dd --- /dev/null +++ b/src/server.cpp @@ -0,0 +1,7849 @@ +/* + * Copyright (c) 2009-2016, Salvatore Sanfilippo + * Copyright (c) 2019 John Sully + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include "monotonic.h" +#include "cluster.h" +#include "slowlog.h" +#include "bio.h" +#include "latency.h" +#include "atomicvar.h" +#include "storage.h" +#include "cron.h" +#include +#include "mt19937-64.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "aelocker.h" +#include "motd.h" +#include "t_nhash.h" +#include "readwritelock.h" +#ifdef __linux__ +#include +#include +#include +#endif + +int g_fTestMode = false; +const char *motd_url = ""; +const char *motd_cache_file = ""; + +/* Our shared "common" objects */ + +struct sharedObjectsStruct shared; + +/* Global vars that are actually used as constants. The following double + * values are used for double on-disk serialization, and are initialized + * at runtime to avoid strange compiler optimizations. */ + +double R_Zero, R_PosInf, R_NegInf, R_Nan; + +/*================================= Globals ================================= */ + +/* Global vars */ +namespace GlobalHidden { +struct redisServer server; /* Server global state */ +} +redisServer *g_pserver = &GlobalHidden::server; +struct redisServerConst cserver; +thread_local struct redisServerThreadVars *serverTL = NULL; // thread local server vars +fastlock time_thread_lock("Time thread lock"); +std::condition_variable_any time_thread_cv; +int sleeping_threads = 0; +void wakeTimeThread(); + +/* Our command table. + * + * Every entry is composed of the following fields: + * + * name: A string representing the command name. + * + * function: Pointer to the C function implementing the command. + * + * arity: Number of arguments, it is possible to use -N to say >= N + * + * sflags: Command flags as string. See below for a table of flags. + * + * flags: Flags as bitmask. Computed by Redis using the 'sflags' field. + * + * get_keys_proc: An optional function to get key arguments from a command. + * This is only used when the following three fields are not + * enough to specify what arguments are keys. + * + * first_key_index: First argument that is a key + * + * last_key_index: Last argument that is a key + * + * key_step: Step to get all the keys from first to last argument. + * For instance in MSET the step is two since arguments + * are key,val,key,val,... + * + * microseconds: Microseconds of total execution time for this command. + * + * calls: Total number of calls of this command. + * + * id: Command bit identifier for ACLs or other goals. + * + * The flags, microseconds and calls fields are computed by Redis and should + * always be set to zero. + * + * Command flags are expressed using space separated strings, that are turned + * into actual flags by the populateCommandTable() function. + * + * This is the meaning of the flags: + * + * write: Write command (may modify the key space). + * + * read-only: Commands just reading from keys without changing the content. + * Note that commands that don't read from the keyspace such as + * TIME, SELECT, INFO, administrative commands, and connection + * or transaction related commands (multi, exec, discard, ...) + * are not flagged as read-only commands, since they affect the + * server or the connection in other ways. + * + * use-memory: May increase memory usage once called. Don't allow if out + * of memory. + * + * admin: Administrative command, like SAVE or SHUTDOWN. + * + * pub-sub: Pub/Sub related command. + * + * no-script: Command not allowed in scripts. + * + * random: Random command. Command is not deterministic, that is, the same + * command with the same arguments, with the same key space, may + * have different results. For instance SPOP and RANDOMKEY are + * two random commands. + * + * to-sort: Sort command output array if called from script, so that the + * output is deterministic. When this flag is used (not always + * possible), then the "random" flag is not needed. + * + * ok-loading: Allow the command while loading the database. + * + * ok-stale: Allow the command while a replica has stale data but is not + * allowed to serve this data. Normally no command is accepted + * in this condition but just a few. + * + * no-monitor: Do not automatically propagate the command on MONITOR. + * + * no-slowlog: Do not automatically propagate the command to the slowlog. + * + * cluster-asking: Perform an implicit ASKING for this command, so the + * command will be accepted in cluster mode if the slot is marked + * as 'importing'. + * + * fast: Fast command: O(1) or O(log(N)) command that should never + * delay its execution as long as the kernel scheduler is giving + * us time. Note that commands that may trigger a DEL as a side + * effect (like SET) are not fast commands. + * + * may-replicate: Command may produce replication traffic, but should be + * allowed under circumstances where write commands are disallowed. + * Examples include PUBLISH, which replicates pubsub messages,and + * EVAL, which may execute write commands, which are replicated, + * or may just execute read commands. A command can not be marked + * both "write" and "may-replicate" + * + * The following additional flags are only used in order to put commands + * in a specific ACL category. Commands can have multiple ACL categories. + * + * @keyspace, @read, @write, @set, @sortedset, @list, @hash, @string, @bitmap, + * @hyperloglog, @stream, @admin, @fast, @slow, @pubsub, @blocking, @dangerous, + * @connection, @transaction, @scripting, @geo, @replication. + * + * Note that: + * + * 1) The read-only flag implies the @read ACL category. + * 2) The write flag implies the @write ACL category. + * 3) The fast flag implies the @fast ACL category. + * 4) The admin flag implies the @admin and @dangerous ACL category. + * 5) The pub-sub flag implies the @pubsub ACL category. + * 6) The lack of fast flag implies the @slow ACL category. + * 7) The non obvious "keyspace" category includes the commands + * that interact with keys without having anything to do with + * specific data structures, such as: DEL, RENAME, MOVE, SELECT, + * TYPE, EXPIRE*, PEXPIRE*, TTL, PTTL, ... + */ + +struct redisCommand redisCommandTable[] = { + {"module",moduleCommand,-2, + "admin no-script", + 0,NULL,0,0,0,0,0,0}, + + {"get",getCommand,2, + "read-only fast async @string", + 0,NULL,1,1,1,0,0,0}, + + {"getex",getexCommand,-2, + "write fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"getdel",getdelCommand,2, + "write fast @string", + 0,NULL,1,1,1,0,0,0}, + + /* Note that we can't flag set as fast, since it may perform an + * implicit DEL of a large key. */ + {"set",setCommand,-3, + "write use-memory @string", + 0,NULL,1,1,1,0,0,0}, + + {"setnx",setnxCommand,3, + "write use-memory fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"setex",setexCommand,4, + "write use-memory @string", + 0,NULL,1,1,1,0,0,0}, + + {"psetex",psetexCommand,4, + "write use-memory @string", + 0,NULL,1,1,1,0,0,0}, + + {"append",appendCommand,3, + "write use-memory fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"strlen",strlenCommand,2, + "read-only fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"del",delCommand,-2, + "write @keyspace", + 0,NULL,1,-1,1,0,0,0}, + + {"expdel",delCommand,-2, + "write @keyspace", + 0,NULL,1,-1,1,0,0,0}, + + {"unlink",unlinkCommand,-2, + "write fast @keyspace", + 0,NULL,1,-1,1,0,0,0}, + + {"exists",existsCommand,-2, + "read-only fast @keyspace", + 0,NULL,1,-1,1,0,0,0}, + + {"keydb.mexists",mexistsCommand,-2, + "read-only fast @keyspace", + 0,NULL,1,-1,1,0,0,0}, + + {"setbit",setbitCommand,4, + "write use-memory @bitmap", + 0,NULL,1,1,1,0,0,0}, + + {"getbit",getbitCommand,3, + "read-only fast @bitmap", + 0,NULL,1,1,1,0,0,0}, + + {"bitfield",bitfieldCommand,-2, + "write use-memory @bitmap", + 0,NULL,1,1,1,0,0,0}, + + {"bitfield_ro",bitfieldroCommand,-2, + "read-only fast @bitmap", + 0,NULL,1,1,1,0,0,0}, + + {"setrange",setrangeCommand,4, + "write use-memory @string", + 0,NULL,1,1,1,0,0,0}, + + {"getrange",getrangeCommand,4, + "read-only @string", + 0,NULL,1,1,1,0,0,0}, + + {"substr",getrangeCommand,4, + "read-only @string", + 0,NULL,1,1,1,0,0,0}, + + {"incr",incrCommand,2, + "write use-memory fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"decr",decrCommand,2, + "write use-memory fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"mget",mgetCommand,-2, + "read-only fast async @string", + 0,NULL,1,-1,1,0,0,0}, + + {"rpush",rpushCommand,-3, + "write use-memory fast @list", + 0,NULL,1,1,1,0,0,0}, + + {"lpush",lpushCommand,-3, + "write use-memory fast @list", + 0,NULL,1,1,1,0,0,0}, + + {"rpushx",rpushxCommand,-3, + "write use-memory fast @list", + 0,NULL,1,1,1,0,0,0}, + + {"lpushx",lpushxCommand,-3, + "write use-memory fast @list", + 0,NULL,1,1,1,0,0,0}, + + {"linsert",linsertCommand,5, + "write use-memory @list", + 0,NULL,1,1,1,0,0,0}, + + {"rpop",rpopCommand,-2, + "write fast @list", + 0,NULL,1,1,1,0,0,0}, + + {"lpop",lpopCommand,-2, + "write fast @list", + 0,NULL,1,1,1,0,0,0}, + + {"brpop",brpopCommand,-3, + "write no-script @list @blocking", + 0,NULL,1,-2,1,0,0,0}, + + {"brpoplpush",brpoplpushCommand,4, + "write use-memory no-script @list @blocking", + 0,NULL,1,2,1,0,0,0}, + + {"blmove",blmoveCommand,6, + "write use-memory no-script @list @blocking", + 0,NULL,1,2,1,0,0,0}, + + {"blpop",blpopCommand,-3, + "write no-script @list @blocking", + 0,NULL,1,-2,1,0,0,0}, + + {"llen",llenCommand,2, + "read-only fast @list", + 0,NULL,1,1,1,0,0,0}, + + {"lindex",lindexCommand,3, + "read-only @list", + 0,NULL,1,1,1,0,0,0}, + + {"lset",lsetCommand,4, + "write use-memory @list", + 0,NULL,1,1,1,0,0,0}, + + {"lrange",lrangeCommand,4, + "read-only @list", + 0,NULL,1,1,1,0,0,0}, + + {"ltrim",ltrimCommand,4, + "write @list", + 0,NULL,1,1,1,0,0,0}, + + {"lpos",lposCommand,-3, + "read-only @list", + 0,NULL,1,1,1,0,0,0}, + + {"lrem",lremCommand,4, + "write @list", + 0,NULL,1,1,1,0,0,0}, + + {"rpoplpush",rpoplpushCommand,3, + "write use-memory @list", + 0,NULL,1,2,1,0,0,0}, + + {"lmove",lmoveCommand,5, + "write use-memory @list", + 0,NULL,1,2,1,0,0,0}, + + {"sadd",saddCommand,-3, + "write use-memory fast @set", + 0,NULL,1,1,1,0,0,0}, + + {"srem",sremCommand,-3, + "write fast @set", + 0,NULL,1,1,1,0,0,0}, + + {"smove",smoveCommand,4, + "write fast @set", + 0,NULL,1,2,1,0,0,0}, + + {"sismember",sismemberCommand,3, + "read-only fast @set", + 0,NULL,1,1,1,0,0,0}, + + {"smismember",smismemberCommand,-3, + "read-only fast @set", + 0,NULL,1,1,1,0,0,0}, + + {"scard",scardCommand,2, + "read-only fast @set", + 0,NULL,1,1,1,0,0,0}, + + {"spop",spopCommand,-2, + "write random fast @set", + 0,NULL,1,1,1,0,0,0}, + + {"srandmember",srandmemberCommand,-2, + "read-only random @set", + 0,NULL,1,1,1,0,0,0}, + + {"sinter",sinterCommand,-2, + "read-only to-sort @set", + 0,NULL,1,-1,1,0,0,0}, + + {"sinterstore",sinterstoreCommand,-3, + "write use-memory @set", + 0,NULL,1,-1,1,0,0,0}, + + {"sunion",sunionCommand,-2, + "read-only to-sort @set", + 0,NULL,1,-1,1,0,0,0}, + + {"sunionstore",sunionstoreCommand,-3, + "write use-memory @set", + 0,NULL,1,-1,1,0,0,0}, + + {"sdiff",sdiffCommand,-2, + "read-only to-sort @set", + 0,NULL,1,-1,1,0,0,0}, + + {"sdiffstore",sdiffstoreCommand,-3, + "write use-memory @set", + 0,NULL,1,-1,1,0,0,0}, + + {"smembers",sinterCommand,2, + "read-only to-sort @set", + 0,NULL,1,1,1,0,0,0}, + + {"sscan",sscanCommand,-3, + "read-only random @set", + 0,NULL,1,1,1,0,0,0}, + + {"zadd",zaddCommand,-4, + "write use-memory fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zincrby",zincrbyCommand,4, + "write use-memory fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zrem",zremCommand,-3, + "write fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zremrangebyscore",zremrangebyscoreCommand,4, + "write @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zremrangebyrank",zremrangebyrankCommand,4, + "write @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zremrangebylex",zremrangebylexCommand,4, + "write @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zunionstore",zunionstoreCommand,-4, + "write use-memory @sortedset", + 0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0}, + + {"zinterstore",zinterstoreCommand,-4, + "write use-memory @sortedset", + 0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0}, + + {"zdiffstore",zdiffstoreCommand,-4, + "write use-memory @sortedset", + 0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0}, + + {"zunion",zunionCommand,-3, + "read-only @sortedset", + 0,zunionInterDiffGetKeys,0,0,0,0,0,0}, + + {"zinter",zinterCommand,-3, + "read-only @sortedset", + 0,zunionInterDiffGetKeys,0,0,0,0,0,0}, + + {"zdiff",zdiffCommand,-3, + "read-only @sortedset", + 0,zunionInterDiffGetKeys,0,0,0,0,0,0}, + + {"zrange",zrangeCommand,-4, + "read-only @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zrangestore",zrangestoreCommand,-5, + "write use-memory @sortedset", + 0,NULL,1,2,1,0,0,0}, + + {"zrangebyscore",zrangebyscoreCommand,-4, + "read-only @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zrevrangebyscore",zrevrangebyscoreCommand,-4, + "read-only @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zrangebylex",zrangebylexCommand,-4, + "read-only @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zrevrangebylex",zrevrangebylexCommand,-4, + "read-only @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zcount",zcountCommand,4, + "read-only fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zlexcount",zlexcountCommand,4, + "read-only fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zrevrange",zrevrangeCommand,-4, + "read-only @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zcard",zcardCommand,2, + "read-only fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zscore",zscoreCommand,3, + "read-only fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zmscore",zmscoreCommand,-3, + "read-only fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zrank",zrankCommand,3, + "read-only fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zrevrank",zrevrankCommand,3, + "read-only fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zscan",zscanCommand,-3, + "read-only random @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zpopmin",zpopminCommand,-2, + "write fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"zpopmax",zpopmaxCommand,-2, + "write fast @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"bzpopmin",bzpopminCommand,-3, + "write no-script fast @sortedset @blocking", + 0,NULL,1,-2,1,0,0,0}, + + {"bzpopmax",bzpopmaxCommand,-3, + "write no-script fast @sortedset @blocking", + 0,NULL,1,-2,1,0,0,0}, + + {"zrandmember",zrandmemberCommand,-2, + "read-only random @sortedset", + 0,NULL,1,1,1,0,0,0}, + + {"hset",hsetCommand,-4, + "write use-memory fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hsetnx",hsetnxCommand,4, + "write use-memory fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hget",hgetCommand,3, + "read-only fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hmset",hsetCommand,-4, + "write use-memory fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hmget",hmgetCommand,-3, + "read-only fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hincrby",hincrbyCommand,4, + "write use-memory fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hincrbyfloat",hincrbyfloatCommand,4, + "write use-memory fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hdel",hdelCommand,-3, + "write fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hlen",hlenCommand,2, + "read-only fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hstrlen",hstrlenCommand,3, + "read-only fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hkeys",hkeysCommand,2, + "read-only to-sort @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hvals",hvalsCommand,2, + "read-only to-sort @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hgetall",hgetallCommand,2, + "read-only random @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hexists",hexistsCommand,3, + "read-only fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hrandfield",hrandfieldCommand,-2, + "read-only random @hash", + 0,NULL,1,1,1,0,0,0}, + + {"hscan",hscanCommand,-3, + "read-only random @hash", + 0,NULL,1,1,1,0,0,0}, + + {"incrby",incrbyCommand,3, + "write use-memory fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"decrby",decrbyCommand,3, + "write use-memory fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"incrbyfloat",incrbyfloatCommand,3, + "write use-memory fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"getset",getsetCommand,3, + "write use-memory fast @string", + 0,NULL,1,1,1,0,0,0}, + + {"mset",msetCommand,-3, + "write use-memory @string", + 0,NULL,1,-1,2,0,0,0}, + + {"msetnx",msetnxCommand,-3, + "write use-memory @string", + 0,NULL,1,-1,2,0,0,0}, + + {"randomkey",randomkeyCommand,1, + "read-only random @keyspace", + 0,NULL,0,0,0,0,0,0}, + + {"select",selectCommand,2, + "ok-loading fast ok-stale @keyspace", + 0,NULL,0,0,0,0,0,0}, + + {"swapdb",swapdbCommand,3, + "write fast @keyspace @dangerous", + 0,NULL,0,0,0,0,0,0}, + + {"move",moveCommand,3, + "write fast @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"copy",copyCommand,-3, + "write use-memory @keyspace", + 0,NULL,1,2,1,0,0,0}, + + /* Like for SET, we can't mark rename as a fast command because + * overwriting the target key may result in an implicit slow DEL. */ + {"rename",renameCommand,3, + "write @keyspace", + 0,NULL,1,2,1,0,0,0}, + + {"renamenx",renamenxCommand,3, + "write fast @keyspace", + 0,NULL,1,2,1,0,0,0}, + + {"expire",expireCommand,3, + "write fast @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"expireat",expireatCommand,3, + "write fast @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"expiremember", expireMemberCommand, -4, + "write fast @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"expirememberat", expireMemberAtCommand, 4, + "write fast @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"pexpirememberat", pexpireMemberAtCommand, 4, + "write fast @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"pexpire",pexpireCommand,3, + "write fast @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"pexpireat",pexpireatCommand,3, + "write fast @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"keys",keysCommand,2, + "read-only to-sort @keyspace @dangerous", + 0,NULL,0,0,0,0,0,0}, + + {"scan",scanCommand,-2, + "read-only random @keyspace", + 0,NULL,0,0,0,0,0,0}, + + {"dbsize",dbsizeCommand,1, + "read-only fast @keyspace", + 0,NULL,0,0,0,0,0,0}, + + {"auth",authCommand,-2, + "no-auth no-script ok-loading ok-stale fast @connection", + 0,NULL,0,0,0,0,0,0}, + + /* We don't allow PING during loading since in Redis PING is used as + * failure detection, and a loading server is considered to be + * not available. */ + {"ping",pingCommand,-1, + "ok-stale ok-loading fast @connection @replication", + 0,NULL,0,0,0,0,0,0}, + + {"replping",pingCommand,-1, + "ok-stale fast @connection @replication", + 0,NULL,0,0,0,0,0,0}, + + {"echo",echoCommand,2, + "fast @connection", + 0,NULL,0,0,0,0,0,0}, + + {"save",saveCommand,1, + "admin no-script", + 0,NULL,0,0,0,0,0,0}, + + {"bgsave",bgsaveCommand,-1, + "admin no-script", + 0,NULL,0,0,0,0,0,0}, + + {"bgrewriteaof",bgrewriteaofCommand,1, + "admin no-script", + 0,NULL,0,0,0,0,0,0}, + + {"shutdown",shutdownCommand,-1, + "admin no-script ok-loading ok-stale noprop", + 0,NULL,0,0,0,0,0,0}, + + {"lastsave",lastsaveCommand,1, + "random fast ok-loading ok-stale @admin @dangerous", + 0,NULL,0,0,0,0,0,0}, + + {"type",typeCommand,2, + "read-only fast @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"multi",multiCommand,1, + "no-script fast ok-loading ok-stale @transaction", + 0,NULL,0,0,0,0,0,0}, + + {"exec",execCommand,1, + "no-script no-slowlog ok-loading ok-stale @transaction", + 0,NULL,0,0,0,0,0,0}, + + {"discard",discardCommand,1, + "no-script fast ok-loading ok-stale @transaction", + 0,NULL,0,0,0,0,0,0}, + + {"sync",syncCommand,1, + "admin no-script @replication", + 0,NULL,0,0,0,0,0,0}, + + {"psync",syncCommand,-3, + "admin no-script @replication", + 0,NULL,0,0,0,0,0,0}, + + {"replconf",replconfCommand,-1, + "admin no-script ok-loading ok-stale @replication", + 0,NULL,0,0,0,0,0,0}, + + {"flushdb",flushdbCommand,-1, + "write @keyspace @dangerous", + 0,NULL,0,0,0,0,0,0}, + + {"flushall",flushallCommand,-1, + "write @keyspace @dangerous", + 0,NULL,0,0,0,0,0,0}, + + {"sort",sortCommand,-2, + "write use-memory @list @set @sortedset @dangerous", + 0,sortGetKeys,1,1,1,0,0,0}, + + {"info",infoCommand,-1, + "ok-loading ok-stale random @dangerous", + 0,NULL,0,0,0,0,0,0}, + + {"monitor",monitorCommand,1, + "admin no-script ok-loading ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"ttl",ttlCommand,-2, + "read-only fast random @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"touch",touchCommand,-2, + "read-only fast @keyspace", + 0,NULL,1,-1,1,0,0,0}, + + {"pttl",pttlCommand,-2, + "read-only fast random @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"persist",persistCommand,-2, + "write fast @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"slaveof",replicaofCommand,3, + "admin no-script ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"replicaof",replicaofCommand,-3, + "admin no-script ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"role",roleCommand,1, + "ok-loading ok-stale no-script fast @dangerous", + 0,NULL,0,0,0,0,0,0}, + + {"debug",debugCommand,-2, + "admin no-script ok-loading ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"config",configCommand,-2, + "admin ok-loading ok-stale no-script", + 0,NULL,0,0,0,0,0,0}, + + {"subscribe",subscribeCommand,-2, + "pub-sub no-script ok-loading ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"unsubscribe",unsubscribeCommand,-1, + "pub-sub no-script ok-loading ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"psubscribe",psubscribeCommand,-2, + "pub-sub no-script ok-loading ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"punsubscribe",punsubscribeCommand,-1, + "pub-sub no-script ok-loading ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"publish",publishCommand,3, + "pub-sub ok-loading ok-stale fast may-replicate", + 0,NULL,0,0,0,0,0,0}, + + {"pubsub",pubsubCommand,-2, + "pub-sub ok-loading ok-stale random", + 0,NULL,0,0,0,0,0,0}, + + {"watch",watchCommand,-2, + "no-script fast ok-loading ok-stale @transaction", + 0,NULL,1,-1,1,0,0,0}, + + {"unwatch",unwatchCommand,1, + "no-script fast ok-loading ok-stale @transaction", + 0,NULL,0,0,0,0,0,0}, + + {"cluster",clusterCommand,-2, + "admin ok-stale random", + 0,NULL,0,0,0,0,0,0}, + + {"restore",restoreCommand,-4, + "write use-memory @keyspace @dangerous", + 0,NULL,1,1,1,0,0,0}, + + {"restore-asking",restoreCommand,-4, + "write use-memory cluster-asking @keyspace @dangerous", + 0,NULL,1,1,1,0,0,0}, + + {"migrate",migrateCommand,-6, + "write random @keyspace @dangerous", + 0,migrateGetKeys,3,3,1,0,0,0}, + + {"asking",askingCommand,1, + "fast @keyspace", + 0,NULL,0,0,0,0,0,0}, + + {"readonly",readonlyCommand,1, + "fast @keyspace", + 0,NULL,0,0,0,0,0,0}, + + {"readwrite",readwriteCommand,1, + "fast @keyspace", + 0,NULL,0,0,0,0,0,0}, + + {"dump",dumpCommand,2, + "read-only random @keyspace", + 0,NULL,1,1,1,0,0,0}, + + {"object",objectCommand,-2, + "read-only random @keyspace", + 0,NULL,2,2,1,0,0,0}, + + {"memory",memoryCommand,-2, + "random read-only", + 0,memoryGetKeys,0,0,0,0,0,0}, + + {"client",clientCommand,-2, + "admin no-script random ok-loading ok-stale @connection", + 0,NULL,0,0,0,0,0,0}, + + {"hello",helloCommand,-1, + "no-auth no-script fast ok-loading ok-stale @connection", + 0,NULL,0,0,0,0,0,0}, + + /* EVAL can modify the dataset, however it is not flagged as a write + * command since we do the check while running commands from Lua. + * + * EVAL and EVALSHA also feed monitors before the commands are executed, + * as opposed to after. + */ + {"eval",evalCommand,-3, + "no-script no-monitor may-replicate @scripting", + 0,evalGetKeys,0,0,0,0,0,0}, + + {"evalsha",evalShaCommand,-3, + "no-script no-monitor may-replicate @scripting", + 0,evalGetKeys,0,0,0,0,0,0}, + + {"slowlog",slowlogCommand,-2, + "admin random ok-loading ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"script",scriptCommand,-2, + "no-script may-replicate @scripting", + 0,NULL,0,0,0,0,0,0}, + + {"time",timeCommand,1, + "random fast ok-loading ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"bitop",bitopCommand,-4, + "write use-memory @bitmap", + 0,NULL,2,-1,1,0,0,0}, + + {"bitcount",bitcountCommand,-2, + "read-only @bitmap", + 0,NULL,1,1,1,0,0,0}, + + {"bitpos",bitposCommand,-3, + "read-only @bitmap", + 0,NULL,1,1,1,0,0,0}, + + {"wait",waitCommand,3, + "no-script @keyspace", + 0,NULL,0,0,0,0,0,0}, + + {"command",commandCommand,-1, + "ok-loading ok-stale random @connection", + 0,NULL,0,0,0,0,0,0}, + + {"geoadd",geoaddCommand,-5, + "write use-memory @geo", + 0,NULL,1,1,1,0,0,0}, + + /* GEORADIUS has store options that may write. */ + {"georadius",georadiusCommand,-6, + "write use-memory @geo", + 0,georadiusGetKeys,1,1,1,0,0,0}, + + {"georadius_ro",georadiusroCommand,-6, + "read-only @geo", + 0,NULL,1,1,1,0,0,0}, + + {"georadiusbymember",georadiusbymemberCommand,-5, + "write use-memory @geo", + 0,georadiusGetKeys,1,1,1,0,0,0}, + + {"georadiusbymember_ro",georadiusbymemberroCommand,-5, + "read-only @geo", + 0,NULL,1,1,1,0,0,0}, + + {"geohash",geohashCommand,-2, + "read-only @geo", + 0,NULL,1,1,1,0,0,0}, + + {"geopos",geoposCommand,-2, + "read-only @geo", + 0,NULL,1,1,1,0,0,0}, + + {"geodist",geodistCommand,-4, + "read-only @geo", + 0,NULL,1,1,1,0,0,0}, + + {"geosearch",geosearchCommand,-7, + "read-only @geo", + 0,NULL,1,1,1,0,0,0}, + + {"geosearchstore",geosearchstoreCommand,-8, + "write use-memory @geo", + 0,NULL,1,2,1,0,0,0}, + + {"pfselftest",pfselftestCommand,1, + "admin @hyperloglog", + 0,NULL,0,0,0,0,0,0}, + + {"pfadd",pfaddCommand,-2, + "write use-memory fast @hyperloglog", + 0,NULL,1,1,1,0,0,0}, + + /* Technically speaking PFCOUNT may change the key since it changes the + * final bytes in the HyperLogLog representation. However in this case + * we claim that the representation, even if accessible, is an internal + * affair, and the command is semantically read only. */ + {"pfcount",pfcountCommand,-2, + "read-only may-replicate @hyperloglog", + 0,NULL,1,-1,1,0,0,0}, + + {"pfmerge",pfmergeCommand,-2, + "write use-memory @hyperloglog", + 0,NULL,1,-1,1,0,0,0}, + + /* Unlike PFCOUNT that is considered as a read-only command (although + * it changes a bit), PFDEBUG may change the entire key when converting + * from sparse to dense representation */ + {"pfdebug",pfdebugCommand,-3, + "admin write use-memory @hyperloglog", + 0,NULL,2,2,1,0,0,0}, + + {"xadd",xaddCommand,-5, + "write use-memory fast random @stream", + 0,NULL,1,1,1,0,0,0}, + + {"xrange",xrangeCommand,-4, + "read-only @stream", + 0,NULL,1,1,1,0,0,0}, + + {"xrevrange",xrevrangeCommand,-4, + "read-only @stream", + 0,NULL,1,1,1,0,0,0}, + + {"xlen",xlenCommand,2, + "read-only fast @stream", + 0,NULL,1,1,1,0,0,0}, + + {"xread",xreadCommand,-4, + "read-only @stream @blocking", + 0,xreadGetKeys,0,0,0,0,0,0}, + + {"xreadgroup",xreadCommand,-7, + "write @stream @blocking", + 0,xreadGetKeys,0,0,0,0,0,0}, + + {"xgroup",xgroupCommand,-2, + "write use-memory @stream", + 0,NULL,2,2,1,0,0,0}, + + {"xsetid",xsetidCommand,3, + "write use-memory fast @stream", + 0,NULL,1,1,1,0,0,0}, + + {"xack",xackCommand,-4, + "write fast random @stream", + 0,NULL,1,1,1,0,0,0}, + + {"xpending",xpendingCommand,-3, + "read-only random @stream", + 0,NULL,1,1,1,0,0,0}, + + {"xclaim",xclaimCommand,-6, + "write random fast @stream", + 0,NULL,1,1,1,0,0,0}, + + {"xautoclaim",xautoclaimCommand,-6, + "write random fast @stream", + 0,NULL,1,1,1,0,0,0}, + + {"xinfo",xinfoCommand,-2, + "read-only random @stream", + 0,NULL,2,2,1,0,0,0}, + + {"xdel",xdelCommand,-3, + "write fast @stream", + 0,NULL,1,1,1,0,0,0}, + + {"xtrim",xtrimCommand,-4, + "write random @stream", + 0,NULL,1,1,1,0,0,0}, + + {"post",securityWarningCommand,-1, + "ok-loading ok-stale read-only", + 0,NULL,0,0,0,0,0,0}, + + {"host:",securityWarningCommand,-1, + "ok-loading ok-stale read-only", + 0,NULL,0,0,0,0,0,0}, + + {"latency",latencyCommand,-2, + "admin no-script ok-loading ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"acl",aclCommand,-2, + "admin no-script ok-loading ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"rreplay",replicaReplayCommand,-3, + "read-only fast noprop ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"keydb.cron",cronCommand,-5, + "write use-memory", + 0,NULL,1,1,1,0,0,0}, + + {"keydb.hrename", hrenameCommand, 4, + "write fast @hash", + 0,NULL,0,0,0,0,0,0}, + + {"stralgo",stralgoCommand,-2, + "read-only @string", + 0,lcsGetKeys,0,0,0,0,0,0}, + + {"keydb.nhget",nhgetCommand,-2, + "read-only fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"keydb.nhset",nhsetCommand,-3, + "read-only fast @hash", + 0,NULL,1,1,1,0,0,0}, + + {"KEYDB.MVCCRESTORE",mvccrestoreCommand, 5, + "write use-memory @keyspace @dangerous", + 0,NULL,1,1,1,0,0,0}, + + {"reset",resetCommand,1, + "no-script ok-stale ok-loading fast @connection", + 0,NULL,0,0,0,0,0,0}, + + {"failover",failoverCommand,-1, + "admin no-script ok-stale", + 0,NULL,0,0,0,0,0,0}, + + {"lfence", lfenceCommand,1, + "read-only random ok-stale", + 0,NULL,0,0,0,0,0,0} +}; + +/*============================ Utility functions ============================ */ + +/* We use a private localtime implementation which is fork-safe. The logging + * function of Redis may be called from other threads. */ +extern "C" void nolocks_localtime(struct tm *tmp, time_t t, time_t tz, int dst); +extern "C" pid_t gettid(); + +void processClients(); + +/* Low level logging. To use only for very big messages, otherwise + * serverLog() is to prefer. */ +#if defined(__has_feature) +# if __has_feature(thread_sanitizer) +__attribute__((no_sanitize("thread"))) +# endif +#endif +void serverLogRaw(int level, const char *msg) { + const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING }; + const char *c = ".-*# "; + FILE *fp; + char buf[64]; + int rawmode = (level & LL_RAW); + int log_to_stdout = g_pserver->logfile[0] == '\0'; + + level &= 0xff; /* clear flags */ + if (level < cserver.verbosity) return; + + fp = log_to_stdout ? stdout : fopen(g_pserver->logfile,"a"); + if (!fp) return; + + if (rawmode) { + fprintf(fp,"%s",msg); + } else { + int off; + struct timeval tv; + int role_char; + pid_t pid = getpid(); + + gettimeofday(&tv,NULL); + struct tm tm; + int daylight_active; + __atomic_load(&g_pserver->daylight_active, &daylight_active, __ATOMIC_RELAXED); + nolocks_localtime(&tm,tv.tv_sec,g_pserver->timezone,daylight_active); + off = strftime(buf,sizeof(buf),"%d %b %Y %H:%M:%S.",&tm); + snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000); + if (g_pserver->sentinel_mode) { + role_char = 'X'; /* Sentinel. */ + } else if (pid != cserver.pid) { + role_char = 'C'; /* RDB / AOF writing child. */ + } else { + role_char = (listLength(g_pserver->masters) ? 'S':'M'); /* Slave or Master. */ + } + fprintf(fp,"%d:%d:%c %s %c %s\n", + (int)getpid(),(int)gettid(),role_char, buf,c[level],msg); + } + fflush(fp); + + if (!log_to_stdout) fclose(fp); + if (g_pserver->syslog_enabled) syslog(syslogLevelMap[level], "%s", msg); +} + +/* Like serverLogRaw() but with printf-alike support. This is the function that + * is used across the code. The raw version is only used in order to dump + * the INFO output on crash. */ +#if defined(__has_feature) +# if __has_feature(thread_sanitizer) +__attribute__((no_sanitize("thread"))) +# endif +#endif +void _serverLog(int level, const char *fmt, ...) { + va_list ap; + char msg[LOG_MAX_LEN]; + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + serverLogRaw(level,msg); +} + +/* Log a fixed message without printf-alike capabilities, in a way that is + * safe to call from a signal handler. + * + * We actually use this only for signals that are not fatal from the point + * of view of Redis. Signals that are going to kill the server anyway and + * where we need printf-alike features are served by serverLog(). */ +#if defined(__has_feature) +# if __has_feature(thread_sanitizer) +__attribute__((no_sanitize("thread"))) +# endif +#endif +void serverLogFromHandler(int level, const char *msg) { + int fd; + int log_to_stdout = g_pserver->logfile[0] == '\0'; + char buf[64]; + + if ((level&0xff) < cserver.verbosity || (log_to_stdout && cserver.daemonize)) + return; + fd = log_to_stdout ? STDOUT_FILENO : + open(g_pserver->logfile, O_APPEND|O_CREAT|O_WRONLY, 0644); + if (fd == -1) return; + ll2string(buf,sizeof(buf),getpid()); + if (write(fd,buf,strlen(buf)) == -1) goto err; + if (write(fd,":signal-handler (",17) == -1) goto err; + ll2string(buf,sizeof(buf),time(NULL)); + if (write(fd,buf,strlen(buf)) == -1) goto err; + if (write(fd,") ",2) == -1) goto err; + if (write(fd,msg,strlen(msg)) == -1) goto err; + if (write(fd,"\n",1) == -1) goto err; +err: + if (!log_to_stdout) close(fd); +} + +/* Return the UNIX time in microseconds */ +long long ustime(void) { + struct timeval tv; + long long ust; + + gettimeofday(&tv, NULL); + ust = ((long long)tv.tv_sec)*1000000; + ust += tv.tv_usec; + return ust; +} + +/* Return the UNIX time in milliseconds */ +mstime_t mstime(void) { + return ustime()/1000; +} + +/* After an RDB dump or AOF rewrite we exit from children using _exit() instead of + * exit(), because the latter may interact with the same file objects used by + * the parent process. However if we are testing the coverage normal exit() is + * used in order to obtain the right coverage information. */ +void exitFromChild(int retcode) { +#ifdef COVERAGE_TEST + exit(retcode); +#else + _exit(retcode); +#endif +} + +/*====================== Hash table type implementation ==================== */ + +/* This is a hash table type that uses the SDS dynamic strings library as + * keys and redis objects as values (objects can hold SDS strings, + * lists, sets). */ + +void dictVanillaFree(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + zfree(val); +} + +void dictListDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + listRelease((list*)val); +} + +int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + int l1,l2; + DICT_NOTUSED(privdata); + + l1 = sdslen((sds)key1); + l2 = sdslen((sds)key2); + if (l1 != l2) return 0; + return memcmp(key1, key2, l1) == 0; +} + +void dictSdsNOPDestructor(void *, void *) {} + +void dictDbKeyDestructor(void *privdata, void *key) +{ + DICT_NOTUSED(privdata); + sdsfree((sds)key); +} + +/* A case insensitive version used for the command lookup table and other + * places where case insensitive non binary-safe comparison is needed. */ +int dictSdsKeyCaseCompare(void *privdata, const void *key1, + const void *key2) +{ + DICT_NOTUSED(privdata); + + return strcasecmp((const char*)key1, (const char*)key2) == 0; +} + +void dictObjectDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + if (val == NULL) return; /* Lazy freeing will set value to NULL. */ + decrRefCount((robj*)val); +} + +void dictSdsDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + sdsfree((sds)val); +} + +int dictObjKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + const robj *o1 = (const robj*)key1, *o2 = (const robj*)key2; + return dictSdsKeyCompare(privdata,ptrFromObj(o1),ptrFromObj(o2)); +} + +uint64_t dictObjHash(const void *key) { + const robj *o = (const robj*)key; + void *ptr = ptrFromObj(o); + return dictGenHashFunction(ptr, sdslen((sds)ptr)); +} + +uint64_t dictSdsHash(const void *key) { + return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +uint64_t dictSdsCaseHash(const void *key) { + return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +int dictEncObjKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + robj *o1 = (robj*) key1, *o2 = (robj*) key2; + int cmp; + + if (o1->encoding == OBJ_ENCODING_INT && + o2->encoding == OBJ_ENCODING_INT) + return ptrFromObj(o1) == ptrFromObj(o2); + + /* Due to OBJ_STATIC_REFCOUNT, we avoid calling getDecodedObject() without + * good reasons, because it would incrRefCount() the object, which + * is invalid. So we check to make sure dictFind() works with static + * objects as well. */ + if (o1->getrefcount() != OBJ_STATIC_REFCOUNT) o1 = getDecodedObject(o1); + if (o2->getrefcount() != OBJ_STATIC_REFCOUNT) o2 = getDecodedObject(o2); + cmp = dictSdsKeyCompare(privdata,ptrFromObj(o1),ptrFromObj(o2)); + if (o1->getrefcount() != OBJ_STATIC_REFCOUNT) decrRefCount(o1); + if (o2->getrefcount() != OBJ_STATIC_REFCOUNT) decrRefCount(o2); + return cmp; +} + +uint64_t dictEncObjHash(const void *key) { + robj *o = (robj*) key; + + if (sdsEncodedObject(o)) { + return dictGenHashFunction(ptrFromObj(o), sdslen((sds)ptrFromObj(o))); + } else if (o->encoding == OBJ_ENCODING_INT) { + char buf[32]; + int len; + + len = ll2string(buf,32,(long)ptrFromObj(o)); + return dictGenHashFunction((unsigned char*)buf, len); + } else { + serverPanic("Unknown string encoding"); + } +} + +/* Return 1 if currently we allow dict to expand. Dict may allocate huge + * memory to contain hash buckets when dict expands, that may lead redis + * rejects user's requests or evicts some keys, we can stop dict to expand + * provisionally if used memory will be over maxmemory after dict expands, + * but to guarantee the performance of redis, we still allow dict to expand + * if dict load factor exceeds HASHTABLE_MAX_LOAD_FACTOR. */ +int dictExpandAllowed(size_t moreMem, double usedRatio) { + if (usedRatio <= HASHTABLE_MAX_LOAD_FACTOR) { + return !overMaxmemoryAfterAlloc(moreMem); + } else { + return 1; + } +} + +void dictGCAsyncFree(dictAsyncRehashCtl *async); + +/* Generic hash table type where keys are Redis Objects, Values + * dummy pointers. */ +dictType objectKeyPointerValueDictType = { + dictEncObjHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictEncObjKeyCompare, /* key compare */ + dictObjectDestructor, /* key destructor */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Like objectKeyPointerValueDictType(), but values can be destroyed, if + * not NULL, calling zfree(). */ +dictType objectKeyHeapPointerValueDictType = { + dictEncObjHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictEncObjKeyCompare, /* key compare */ + dictObjectDestructor, /* key destructor */ + dictVanillaFree, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Set dictionary type. Keys are SDS strings, values are not used. */ +dictType setDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL /* val destructor */ +}; + +/* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ +dictType zsetDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* Note: SDS string shared & freed by skiplist */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + +/* db->dict, keys are sds strings, vals are Redis objects. */ +dictType dbDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + dictDbKeyDestructor, /* key destructor */ + dictObjectDestructor, /* val destructor */ + dictExpandAllowed, /* allow to expand */ + dictGCAsyncFree /* async free destructor */ +}; + +dictType dbExpiresDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + NULL, /* val destructor */ + dictExpandAllowed /* allow to expand */ + }; + +/* db->pdict, keys are sds strings, vals are Redis objects. */ +dictType dbTombstoneDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + dictDbKeyDestructor, /* key destructor */ + NULL, /* val destructor */ + dictExpandAllowed /* allow to expand */ +}; + +dictType dbSnapshotDictType = { + dictSdsHash, + NULL, + NULL, + dictSdsKeyCompare, + dictSdsNOPDestructor, + dictObjectDestructor, + dictExpandAllowed /* allow to expand */ +}; + +/* g_pserver->lua_scripts sha (as sds string) -> scripts (as robj) cache. */ +dictType shaScriptObjectDictType = { + dictSdsCaseHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCaseCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + dictObjectDestructor, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Command table. sds string -> command struct pointer. */ +dictType commandTableDictType = { + dictSdsCaseHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCaseCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Hash type hash table (note that small hashes are represented with ziplists) */ +dictType hashDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + dictSdsDestructor, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Dict type without destructor */ +dictType sdsReplyDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Keylist hash table type has unencoded redis objects as keys and + * lists as values. It's used for blocking operations (BLPOP) and to + * map swapped keys to a list of clients waiting for this keys to be loaded. */ +dictType keylistDictType = { + dictObjHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictObjKeyCompare, /* key compare */ + dictObjectDestructor, /* key destructor */ + dictListDestructor, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Cluster nodes hash table, mapping nodes addresses 1.2.3.4:9880 to + * clusterNode structures. */ +dictType clusterNodesDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Cluster re-addition blacklist. This maps node IDs to the time + * we can re-add this node. The goal is to avoid readding a removed + * node for some time. */ +dictType clusterNodesBlackListDictType = { + dictSdsCaseHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCaseCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Modules system dictionary type. Keys are module name, + * values are pointer to RedisModule struct. */ +dictType modulesDictType = { + dictSdsCaseHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCaseCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Migrate cache dict type. */ +dictType migrateCacheDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + +/* Replication cached script dict (g_pserver->repl_scriptcache_dict). + * Keys are sds SHA1 strings, while values are not used at all in the current + * implementation. */ +dictType replScriptCacheDictType = { + dictSdsCaseHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCaseCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL, /* val destructor */ + NULL /* allow to expand */ +}; + +int htNeedsResize(dict *dict) { + long long size, used; + + size = dictSlots(dict); + used = dictSize(dict); + return (size > DICT_HT_INITIAL_SIZE && + (used*100/size < HASHTABLE_MIN_FILL)); +} + +/* If the percentage of used slots in the HT reaches HASHTABLE_MIN_FILL + * we resize the hash table to save memory */ +void tryResizeHashTables(int dbid) { + g_pserver->db[dbid]->tryResize(); +} + +/* Our hash table implementation performs rehashing incrementally while + * we write/read from the hash table. Still if the server is idle, the hash + * table will use two tables for a long time. So we try to use 1 millisecond + * of CPU time at every call of this function to perform some rehashing. + * + * The function returns the number of rehashes if some rehashing was performed, otherwise 0 + * is returned. */ +int redisDbPersistentData::incrementallyRehash() { + /* Keys dictionary */ + int result = 0; + if (dictIsRehashing(m_pdict)) + result += dictRehashMilliseconds(m_pdict,1); + if (dictIsRehashing(m_pdictTombstone)) + dictRehashMilliseconds(m_pdictTombstone,1); // don't count this + return result; /* already used our millisecond for this loop... */ +} + +/* This function is called once a background process of some kind terminates, + * as we want to avoid resizing the hash tables when there is a child in order + * to play well with copy-on-write (otherwise when a resize happens lots of + * memory pages are copied). The goal of this function is to update the ability + * for dict.c to resize the hash tables accordingly to the fact we have an + * active fork child running. */ +void updateDictResizePolicy(void) { + if (!hasActiveChildProcess()) + dictEnableResize(); + else + dictDisableResize(); +} + +const char *strChildType(int type) { + switch(type) { + case CHILD_TYPE_RDB: return "RDB"; + case CHILD_TYPE_AOF: return "AOF"; + case CHILD_TYPE_LDB: return "LDB"; + case CHILD_TYPE_MODULE: return "MODULE"; + default: return "Unknown"; + } +} + +/* Return true if there are active children processes doing RDB saving, + * AOF rewriting, or some side process spawned by a loaded module. */ +int hasActiveChildProcess() { + return g_pserver->child_pid != -1; +} + +int hasActiveChildProcessOrBGSave() { + return g_pserver->FRdbSaveInProgress() || hasActiveChildProcess(); +} + +void resetChildState() { + g_pserver->child_type = CHILD_TYPE_NONE; + g_pserver->child_pid = -1; + g_pserver->stat_current_cow_bytes = 0; + g_pserver->stat_current_cow_updated = 0; + g_pserver->stat_current_save_keys_processed = 0; + g_pserver->stat_module_progress = 0; + g_pserver->stat_current_save_keys_total = 0; + updateDictResizePolicy(); + closeChildInfoPipe(); + moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD, + REDISMODULE_SUBEVENT_FORK_CHILD_DIED, + NULL); +} + +/* Return if child type is mutual exclusive with other fork children */ +int isMutuallyExclusiveChildType(int type) { + return type == CHILD_TYPE_RDB || type == CHILD_TYPE_AOF || type == CHILD_TYPE_MODULE; +} + +/* Return true if this instance has persistence completely turned off: + * both RDB and AOF are disabled. */ +int allPersistenceDisabled(void) { + return g_pserver->saveparamslen == 0 && g_pserver->aof_state == AOF_OFF; +} + +/* ======================= Cron: called every 100 ms ======================== */ + +/* Add a sample to the operations per second array of samples. */ +void trackInstantaneousMetric(int metric, long long current_reading) { + long long now = mstime(); + long long t = now - g_pserver->inst_metric[metric].last_sample_time; + long long ops = current_reading - + g_pserver->inst_metric[metric].last_sample_count; + long long ops_sec; + + ops_sec = t > 0 ? (ops*1000/t) : 0; + + g_pserver->inst_metric[metric].samples[g_pserver->inst_metric[metric].idx] = + ops_sec; + g_pserver->inst_metric[metric].idx++; + g_pserver->inst_metric[metric].idx %= STATS_METRIC_SAMPLES; + g_pserver->inst_metric[metric].last_sample_time = now; + g_pserver->inst_metric[metric].last_sample_count = current_reading; +} + +/* Return the mean of all the samples. */ +long long getInstantaneousMetric(int metric) { + int j; + long long sum = 0; + + for (j = 0; j < STATS_METRIC_SAMPLES; j++) + sum += g_pserver->inst_metric[metric].samples[j]; + return sum / STATS_METRIC_SAMPLES; +} + +/* The client query buffer is an sds.c string that can end with a lot of + * free space not used, this function reclaims space if needed. + * + * The function always returns 0 as it never terminates the client. */ +int clientsCronResizeQueryBuffer(client *c) { + AssertCorrectThread(c); + size_t querybuf_size = sdsAllocSize(c->querybuf); + time_t idletime = g_pserver->unixtime - c->lastinteraction; + + /* There are two conditions to resize the query buffer: + * 1) Query buffer is > BIG_ARG and too big for latest peak. + * 2) Query buffer is > BIG_ARG and client is idle. */ + if (querybuf_size > PROTO_MBULK_BIG_ARG && + ((querybuf_size/(c->querybuf_peak+1)) > 2 || + idletime > 2)) + { + /* Only resize the query buffer if it is actually wasting + * at least a few kbytes. */ + if (sdsavail(c->querybuf) > 1024*4) { + c->querybuf = sdsRemoveFreeSpace(c->querybuf); + } + } + /* Reset the peak again to capture the peak memory usage in the next + * cycle. */ + c->querybuf_peak = 0; + + /* Clients representing masters also use a "pending query buffer" that + * is the yet not applied part of the stream we are reading. Such buffer + * also needs resizing from time to time, otherwise after a very large + * transfer (a huge value or a big MIGRATE operation) it will keep using + * a lot of memory. */ + if (c->flags & CLIENT_MASTER) { + /* There are two conditions to resize the pending query buffer: + * 1) Pending Query buffer is > LIMIT_PENDING_QUERYBUF. + * 2) Used length is smaller than pending_querybuf_size/2 */ + size_t pending_querybuf_size = sdsAllocSize(c->pending_querybuf); + if(pending_querybuf_size > LIMIT_PENDING_QUERYBUF && + sdslen(c->pending_querybuf) < (pending_querybuf_size/2)) + { + c->pending_querybuf = sdsRemoveFreeSpace(c->pending_querybuf); + } + } + return 0; +} + +SymVer parseVersion(const char *version) +{ + SymVer ver = {-1,-1,-1}; + long versions[3] = {-1,-1,-1}; + const char *start = version; + const char *end = nullptr; + + for (int iver = 0; iver < 3; ++iver) + { + end = start; + while (*end != '\0' && *end != '.') + ++end; + + if (start >= end) + return ver; + + if (!string2l(start, end - start, versions + iver)) + return ver; + if (*end != '\0') + start = end+1; + else + break; + } + ver.major = versions[0]; + ver.minor = versions[1]; + ver.build = versions[2]; + + return ver; +} + +VersionCompareResult compareVersion(SymVer *pver) +{ + SymVer symVerThis = parseVersion(KEYDB_REAL_VERSION); + // Special case, 0.0.0 is equal to any version + if ((symVerThis.major == 0 && symVerThis.minor == 0 && symVerThis.build == 0) + || (pver->major == 0 && pver->minor == 0 && pver->build == 0)) + return VersionCompareResult::EqualVersion; + + if (pver->major <= 6 && pver->minor <= 3 && pver->build <= 3) + return VersionCompareResult::IncompatibleVersion; + + for (int iver = 0; iver < 3; ++iver) + { + long verThis, verOther; + switch (iver) + { + case 0: + verThis = symVerThis.major; verOther = pver->major; + break; + case 1: + verThis = symVerThis.minor; verOther = pver->minor; + break; + case 2: + verThis = symVerThis.build; verOther = pver->build; + } + + if (verThis < verOther) + return VersionCompareResult::NewerVersion; + if (verThis > verOther) + return VersionCompareResult::OlderVersion; + } + return VersionCompareResult::EqualVersion; +} + +/* This function is used in order to track clients using the biggest amount + * of memory in the latest few seconds. This way we can provide such information + * in the INFO output (clients section), without having to do an O(N) scan for + * all the clients. + * + * This is how it works. We have an array of CLIENTS_PEAK_MEM_USAGE_SLOTS slots + * where we track, for each, the biggest client output and input buffers we + * saw in that slot. Every slot correspond to one of the latest seconds, since + * the array is indexed by doing UNIXTIME % CLIENTS_PEAK_MEM_USAGE_SLOTS. + * + * When we want to know what was recently the peak memory usage, we just scan + * such few slots searching for the maximum value. */ +#define CLIENTS_PEAK_MEM_USAGE_SLOTS 8 +size_t ClientsPeakMemInput[CLIENTS_PEAK_MEM_USAGE_SLOTS] = {0}; +size_t ClientsPeakMemOutput[CLIENTS_PEAK_MEM_USAGE_SLOTS] = {0}; + +int clientsCronTrackExpansiveClients(client *c, int time_idx) { + size_t in_usage = sdsZmallocSize(c->querybuf) + c->argv_len_sum() + + (c->argv ? zmalloc_size(c->argv) : 0); + size_t out_usage = getClientOutputBufferMemoryUsage(c); + + /* Track the biggest values observed so far in this slot. */ + if (in_usage > ClientsPeakMemInput[time_idx]) ClientsPeakMemInput[time_idx] = in_usage; + if (out_usage > ClientsPeakMemOutput[time_idx]) ClientsPeakMemOutput[time_idx] = out_usage; + + return 0; /* This function never terminates the client. */ +} + +/* Iterating all the clients in getMemoryOverheadData() is too slow and + * in turn would make the INFO command too slow. So we perform this + * computation incrementally and track the (not instantaneous but updated + * to the second) total memory used by clients using clinetsCron() in + * a more incremental way (depending on g_pserver->hz). */ +int clientsCronTrackClientsMemUsage(client *c) { + size_t mem = 0; + int type = getClientType(c); + mem += getClientOutputBufferMemoryUsage(c); + mem += sdsZmallocSize(c->querybuf); + mem += zmalloc_size(c); + mem += c->argv_len_sum(); + if (c->argv) mem += zmalloc_size(c->argv); + /* Now that we have the memory used by the client, remove the old + * value from the old category, and add it back. */ + g_pserver->stat_clients_type_memory[c->client_cron_last_memory_type] -= + c->client_cron_last_memory_usage; + g_pserver->stat_clients_type_memory[type] += mem; + /* Remember what we added and where, to remove it next time. */ + c->client_cron_last_memory_usage = mem; + c->client_cron_last_memory_type = type; + return 0; +} + +/* Return the max samples in the memory usage of clients tracked by + * the function clientsCronTrackExpansiveClients(). */ +void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) { + size_t i = 0, o = 0; + for (int j = 0; j < CLIENTS_PEAK_MEM_USAGE_SLOTS; j++) { + if (ClientsPeakMemInput[j] > i) i = ClientsPeakMemInput[j]; + if (ClientsPeakMemOutput[j] > o) o = ClientsPeakMemOutput[j]; + } + *in_usage = i; + *out_usage = o; +} + +int closeClientOnOverload(client *c) { + if (g_pserver->overload_closed_clients > MAX_CLIENTS_SHED_PER_PERIOD) return false; + if (!g_pserver->is_overloaded) return false; + // Don't close masters, replicas, or pub/sub clients + if (c->flags & (CLIENT_MASTER | CLIENT_SLAVE | CLIENT_PENDING_WRITE | CLIENT_PUBSUB | CLIENT_BLOCKED)) return false; + freeClient(c); + ++g_pserver->overload_closed_clients; + return true; +} + +/* This function is called by serverCron() and is used in order to perform + * operations on clients that are important to perform constantly. For instance + * we use this function in order to disconnect clients after a timeout, including + * clients blocked in some blocking command with a non-zero timeout. + * + * The function makes some effort to process all the clients every second, even + * if this cannot be strictly guaranteed, since serverCron() may be called with + * an actual frequency lower than g_pserver->hz in case of latency events like slow + * commands. + * + * It is very important for this function, and the functions it calls, to be + * very fast: sometimes Redis has tens of hundreds of connected clients, and the + * default g_pserver->hz value is 10, so sometimes here we need to process thousands + * of clients per second, turning this function into a source of latency. + */ +#define CLIENTS_CRON_MIN_ITERATIONS 5 +void clientsCron(int iel) { + /* Try to process at least numclients/g_pserver->hz of clients + * per call. Since normally (if there are no big latency events) this + * function is called g_pserver->hz times per second, in the average case we + * process all the clients in 1 second. */ + int numclients = listLength(g_pserver->clients); + int iterations = numclients/g_pserver->hz; + mstime_t now = mstime(); + + /* Process at least a few clients while we are at it, even if we need + * to process less than CLIENTS_CRON_MIN_ITERATIONS to meet our contract + * of processing each client once per second. */ + if (iterations < CLIENTS_CRON_MIN_ITERATIONS) + iterations = (numclients < CLIENTS_CRON_MIN_ITERATIONS) ? + numclients : CLIENTS_CRON_MIN_ITERATIONS; + + + int curr_peak_mem_usage_slot = g_pserver->unixtime % CLIENTS_PEAK_MEM_USAGE_SLOTS; + /* Always zero the next sample, so that when we switch to that second, we'll + * only register samples that are greater in that second without considering + * the history of such slot. + * + * Note: our index may jump to any random position if serverCron() is not + * called for some reason with the normal frequency, for instance because + * some slow command is called taking multiple seconds to execute. In that + * case our array may end containing data which is potentially older + * than CLIENTS_PEAK_MEM_USAGE_SLOTS seconds: however this is not a problem + * since here we want just to track if "recently" there were very expansive + * clients from the POV of memory usage. */ + int zeroidx = (curr_peak_mem_usage_slot+1) % CLIENTS_PEAK_MEM_USAGE_SLOTS; + ClientsPeakMemInput[zeroidx] = 0; + ClientsPeakMemOutput[zeroidx] = 0; + + + while(listLength(g_pserver->clients) && iterations--) { + client *c; + listNode *head; + /* Rotate the list, take the current head, process. + * This way if the client must be removed from the list it's the + * first element and we don't incur into O(N) computation. */ + listRotateTailToHead(g_pserver->clients); + head = (listNode*)listFirst(g_pserver->clients); + c = (client*)listNodeValue(head); + if (c->iel == iel) + { + fastlock_lock(&c->lock); + /* The following functions do different service checks on the client. + * The protocol is that they return non-zero if the client was + * terminated. */ + if (clientsCronHandleTimeout(c,now)) continue; // Client free'd so don't release the lock + if (clientsCronResizeQueryBuffer(c)) goto LContinue; + if (clientsCronTrackExpansiveClients(c, curr_peak_mem_usage_slot)) goto LContinue; + if (clientsCronTrackClientsMemUsage(c)) goto LContinue; + if (closeClientOnOutputBufferLimitReached(c, 0)) continue; // Client also free'd + if (closeClientOnOverload(c)) continue; + LContinue: + fastlock_unlock(&c->lock); + } + } + + /* Free any pending clients */ + freeClientsInAsyncFreeQueue(iel); +} + +bool expireOwnKeys() +{ + if (iAmMaster()) { + return true; + } else if (!g_pserver->fActiveReplica && (listLength(g_pserver->masters) == 1)) { + redisMaster *mi = (redisMaster*)listNodeValue(listFirst(g_pserver->masters)); + if (mi->isActive) + return true; + } + return false; +} + +int hash_spin_worker() { + auto ctl = serverTL->rehashCtl; + return dictRehashSomeAsync(ctl, 1); +} + +/* This function handles 'background' operations we are required to do + * incrementally in Redis databases, such as active key expiring, resizing, + * rehashing. */ +void databasesCron(bool fMainThread) { + serverAssert(GlobalLocksAcquired()); + + if (fMainThread) { + /* Expire keys by random sampling. Not required for slaves + * as master will synthesize DELs for us. */ + if (g_pserver->active_expire_enabled) { + if (expireOwnKeys()) { + activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); + } else { + expireSlaveKeys(); + } + } + + /* Defrag keys gradually. */ + activeDefragCycle(); + } + + /* Perform hash tables rehashing if needed, but only if there are no + * other processes saving the DB on disk. Otherwise rehashing is bad + * as will cause a lot of copy-on-write of memory pages. */ + if (!hasActiveChildProcess()) { + /* We use global counters so if we stop the computation at a given + * DB we'll be able to start from the successive in the next + * cron loop iteration. */ + static unsigned int resize_db = 0; + static unsigned int rehash_db = 0; + static int rehashes_per_ms; + static int async_rehashes; + int dbs_per_call = CRON_DBS_PER_CALL; + int j; + + /* Don't test more DBs than we have. */ + if (dbs_per_call > cserver.dbnum) dbs_per_call = cserver.dbnum; + + if (fMainThread) { + /* Resize */ + for (j = 0; j < dbs_per_call; j++) { + tryResizeHashTables(resize_db % cserver.dbnum); + resize_db++; + } + } + + /* Rehash */ + if (g_pserver->activerehashing) { + for (j = 0; j < dbs_per_call; j++) { + if (serverTL->rehashCtl != nullptr) { + if (!serverTL->rehashCtl->done.load(std::memory_order_relaxed)) { + aeReleaseLock(); + if (dictRehashSomeAsync(serverTL->rehashCtl, rehashes_per_ms)) { + aeAcquireLock(); + break; + } + aeAcquireLock(); + } + + if (serverTL->rehashCtl->done.load(std::memory_order_relaxed)) { + dictCompleteRehashAsync(serverTL->rehashCtl, true /*fFree*/); + serverTL->rehashCtl = nullptr; + } + } + + serverAssert(serverTL->rehashCtl == nullptr); + ::dict *dict = g_pserver->db[rehash_db]->dictUnsafeKeyOnly(); + /* Are we async rehashing? And if so is it time to re-calibrate? */ + /* The recalibration limit is a prime number to ensure balancing across threads */ + if (g_pserver->enable_async_rehash && rehashes_per_ms > 0 && async_rehashes < 131 && !cserver.active_defrag_enabled && cserver.cthreads > 1 && dictSize(dict) > 2048 && dictIsRehashing(dict) && !g_pserver->loading && aeLockContention() > 1) { + serverTL->rehashCtl = dictRehashAsyncStart(dict, rehashes_per_ms * ((1000 / g_pserver->hz) / 10)); // Estimate 10% CPU time spent in lock contention + if (serverTL->rehashCtl) + ++async_rehashes; + } + if (serverTL->rehashCtl) + break; + + // Before starting anything new, can we end the rehash of a blocked thread? + while (dict->asyncdata != nullptr) { + auto asyncdata = dict->asyncdata; + if (asyncdata->done) { + dictCompleteRehashAsync(asyncdata, false /*fFree*/); // Don't free because we don't own the pointer + serverAssert(dict->asyncdata != asyncdata); + } else { + break; + } + } + + if (dict->asyncdata) + break; + + rehashes_per_ms = g_pserver->db[rehash_db]->incrementallyRehash(); + async_rehashes = 0; + if (rehashes_per_ms > 0) { + /* If the function did some work, stop here, we'll do + * more at the next cron loop. */ + if (!cserver.active_defrag_enabled) { + serverLog(LL_VERBOSE, "Calibrated rehashes per ms: %d", rehashes_per_ms); + } + break; + } else if (dict->asyncdata == nullptr) { + /* If this db didn't need rehash and we have none in flight, we'll try the next one. */ + rehash_db++; + rehash_db %= cserver.dbnum; + } + } + } + } + + if (serverTL->rehashCtl) { + setAeLockSetThreadSpinWorker(hash_spin_worker); + } else { + setAeLockSetThreadSpinWorker(nullptr); + } +} + +/* We take a cached value of the unix time in the global state because with + * virtual memory and aging there is to store the current time in objects at + * every object access, and accuracy is not needed. To access a global var is + * a lot faster than calling time(NULL). + * + * This function should be fast because it is called at every command execution + * in call(), so it is possible to decide if to update the daylight saving + * info or not using the 'update_daylight_info' argument. Normally we update + * such info only when calling this function from serverCron() but not when + * calling it from call(). */ +void updateCachedTime() { + long long t = ustime(); + __atomic_store(&g_pserver->ustime, &t, __ATOMIC_RELAXED); + t /= 1000; + __atomic_store(&g_pserver->mstime, &t, __ATOMIC_RELAXED); + t /= 1000; + g_pserver->unixtime = t; + + /* To get information about daylight saving time, we need to call + * localtime_r and cache the result. However calling localtime_r in this + * context is safe since we will never fork() while here, in the main + * thread. The logging function will call a thread safe version of + * localtime that has no locks. */ + struct tm tm; + time_t ut = g_pserver->unixtime; + localtime_r(&ut,&tm); + __atomic_store(&g_pserver->daylight_active, &tm.tm_isdst, __ATOMIC_RELAXED); +} + +void checkChildrenDone(void) { + int statloc = 0; + pid_t pid; + + if (g_pserver->FRdbSaveInProgress() && !cserver.fForkBgSave) + { + void *rval = nullptr; + int err = EAGAIN; + if (!g_pserver->rdbThreadVars.fDone || (err = pthread_join(g_pserver->rdbThreadVars.rdb_child_thread, &rval))) + { + if (err != EBUSY && err != EAGAIN) + serverLog(LL_WARNING, "Error joining the background RDB save thread: %s\n", strerror(errno)); + } + else + { + int exitcode = (int)reinterpret_cast(rval); + backgroundSaveDoneHandler(exitcode,g_pserver->rdbThreadVars.fRdbThreadCancel); + g_pserver->rdbThreadVars.fRdbThreadCancel = false; + g_pserver->rdbThreadVars.fDone = false; + if (exitcode == 0) receiveChildInfo(); + closeChildInfoPipe(); + } + } + else if ((pid = waitpid(-1, &statloc, WNOHANG)) != 0) { + int exitcode = WIFEXITED(statloc) ? WEXITSTATUS(statloc) : -1; + int bysignal = 0; + + if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc); + + /* sigKillChildHandler catches the signal and calls exit(), but we + * must make sure not to flag lastbgsave_status, etc incorrectly. + * We could directly terminate the child process via SIGUSR1 + * without handling it */ + if (exitcode == SERVER_CHILD_NOERROR_RETVAL) { + bysignal = SIGUSR1; + exitcode = 1; + } + + if (pid == -1) { + serverLog(LL_WARNING,"waitpid() returned an error: %s. " + "child_type: %s, child_pid = %d", + strerror(errno), + strChildType(g_pserver->child_type), + (int) g_pserver->child_pid); + } else if (pid == g_pserver->child_pid) { + if (g_pserver->child_type == CHILD_TYPE_RDB) { + backgroundSaveDoneHandler(exitcode, bysignal); + } else if (g_pserver->child_type == CHILD_TYPE_AOF) { + backgroundRewriteDoneHandler(exitcode, bysignal); + } else if (g_pserver->child_type == CHILD_TYPE_MODULE) { + ModuleForkDoneHandler(exitcode, bysignal); + } else { + serverPanic("Unknown child type %d for child pid %d", g_pserver->child_type, g_pserver->child_pid); + exit(1); + } + if (!bysignal && exitcode == 0) receiveChildInfo(); + resetChildState(); + } else { + if (!ldbRemoveChild(pid)) { + serverLog(LL_WARNING, + "Warning, detected child with unmatched pid: %ld", + (long) pid); + } + } + + /* start any pending forks immediately. */ + replicationStartPendingFork(); + } +} + +/* Called from serverCron and loadingCron to update cached memory metrics. */ +void cronUpdateMemoryStats() { + /* Record the max memory used since the server was started. */ + if (zmalloc_used_memory() > g_pserver->stat_peak_memory) + g_pserver->stat_peak_memory = zmalloc_used_memory(); + + run_with_period(100) { + /* Sample the RSS and other metrics here since this is a relatively slow call. + * We must sample the zmalloc_used at the same time we take the rss, otherwise + * the frag ratio calculate may be off (ratio of two samples at different times) */ + g_pserver->cron_malloc_stats.process_rss = zmalloc_get_rss(); + g_pserver->cron_malloc_stats.zmalloc_used = zmalloc_used_memory(); + /* Sampling the allocator info can be slow too. + * The fragmentation ratio it'll show is potentially more accurate + * it excludes other RSS pages such as: shared libraries, LUA and other non-zmalloc + * allocations, and allocator reserved pages that can be pursed (all not actual frag) */ + zmalloc_get_allocator_info(&g_pserver->cron_malloc_stats.allocator_allocated, + &g_pserver->cron_malloc_stats.allocator_active, + &g_pserver->cron_malloc_stats.allocator_resident); + /* in case the allocator isn't providing these stats, fake them so that + * fragmentation info still shows some (inaccurate metrics) */ + if (!g_pserver->cron_malloc_stats.allocator_resident) { + /* LUA memory isn't part of zmalloc_used, but it is part of the process RSS, + * so we must deduct it in order to be able to calculate correct + * "allocator fragmentation" ratio */ + size_t lua_memory = lua_gc(g_pserver->lua,LUA_GCCOUNT,0)*1024LL; + g_pserver->cron_malloc_stats.allocator_resident = g_pserver->cron_malloc_stats.process_rss - lua_memory; + } + if (!g_pserver->cron_malloc_stats.allocator_active) + g_pserver->cron_malloc_stats.allocator_active = g_pserver->cron_malloc_stats.allocator_resident; + if (!g_pserver->cron_malloc_stats.allocator_allocated) + g_pserver->cron_malloc_stats.allocator_allocated = g_pserver->cron_malloc_stats.zmalloc_used; + + if (g_pserver->force_eviction_percent) { + g_pserver->cron_malloc_stats.sys_available = getMemAvailable(); + } + } +} + +static std::atomic s_fFlushInProgress { false }; +void flushStorageWeak() +{ + bool fExpected = false; + if (s_fFlushInProgress.compare_exchange_strong(fExpected, true /* desired */, std::memory_order_seq_cst, std::memory_order_relaxed)) + { + g_pserver->asyncworkqueue->AddWorkFunction([]{ + aeAcquireLock(); + mstime_t storage_process_latency; + latencyStartMonitor(storage_process_latency); + std::vector vecdb; + for (int idb = 0; idb < cserver.dbnum; ++idb) { + if (g_pserver->db[idb]->processChanges(true)) + vecdb.push_back(g_pserver->db[idb]); + } + latencyEndMonitor(storage_process_latency); + latencyAddSampleIfNeeded("storage-process-changes", storage_process_latency); + aeReleaseLock(); + + std::vector vecsnapshotFree; + vecsnapshotFree.resize(vecdb.size()); + for (size_t idb = 0; idb < vecdb.size(); ++idb) + vecdb[idb]->commitChanges(&vecsnapshotFree[idb]); + + for (size_t idb = 0; idb < vecsnapshotFree.size(); ++idb) { + if (vecsnapshotFree[idb] != nullptr) + vecdb[idb]->endSnapshotAsync(vecsnapshotFree[idb]); + } + s_fFlushInProgress = false; + }, true /* fHiPri */); + } + else + { + serverLog(LOG_INFO, "Missed storage flush due to existing flush still in flight. Consider increasing storage-weak-flush-period"); + } +} + +/* This is our timer interrupt, called g_pserver->hz times per second. + * Here is where we do a number of things that need to be done asynchronously. + * For instance: + * + * - Active expired keys collection (it is also performed in a lazy way on + * lookup). + * - Software watchdog. + * - Update some statistic. + * - Incremental rehashing of the DBs hash tables. + * - Triggering BGSAVE / AOF rewrite, and handling of terminated children. + * - Clients timeout of different kinds. + * - Replication reconnection. + * - Many more... + * + * Everything directly called here will be called g_pserver->hz times per second, + * so in order to throttle execution of things we want to do less frequently + * a macro is used: run_with_period(milliseconds) { .... } + */ + +void unblockChildThreadIfNecessary(); +int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { + int j; + UNUSED(eventLoop); + UNUSED(id); + UNUSED(clientData); + + if (g_pserver->maxmemory && g_pserver->m_pstorageFactory) + performEvictions(false); + + /* If another threads unblocked one of our clients, and this thread has been idle + then beforeSleep won't have a chance to process the unblocking. So we also + process them here in the cron job to ensure they don't starve. + */ + if (listLength(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].unblocked_clients)) + { + processUnblockedClients(IDX_EVENT_LOOP_MAIN); + } + + /* Software watchdog: deliver the SIGALRM that will reach the signal + * handler if we don't return here fast enough. */ + if (g_pserver->watchdog_period) watchdogScheduleSignal(g_pserver->watchdog_period); + + g_pserver->hz = g_pserver->config_hz; + /* Adapt the g_pserver->hz value to the number of configured clients. If we have + * many clients, we want to call serverCron() with an higher frequency. */ + if (g_pserver->dynamic_hz) { + while (listLength(g_pserver->clients) / g_pserver->hz > + MAX_CLIENTS_PER_CLOCK_TICK) + { + g_pserver->hz += g_pserver->hz; // *= 2 + if (g_pserver->hz > CONFIG_MAX_HZ) { + g_pserver->hz = CONFIG_MAX_HZ; + break; + } + } + } + + /* A cancelled child thread could be hung waiting for us to read from a pipe */ + unblockChildThreadIfNecessary(); + + run_with_period(100) { + long long stat_net_input_bytes, stat_net_output_bytes; + stat_net_input_bytes = g_pserver->stat_net_input_bytes.load(std::memory_order_relaxed); + stat_net_output_bytes = g_pserver->stat_net_output_bytes.load(std::memory_order_relaxed); + + long long stat_numcommands; + __atomic_load(&g_pserver->stat_numcommands, &stat_numcommands, __ATOMIC_RELAXED); + trackInstantaneousMetric(STATS_METRIC_COMMAND,stat_numcommands); + trackInstantaneousMetric(STATS_METRIC_NET_INPUT, + stat_net_input_bytes); + trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT, + stat_net_output_bytes); + } + + /* We have just LRU_BITS bits per object for LRU information. + * So we use an (eventually wrapping) LRU clock. + * + * Note that even if the counter wraps it's not a big problem, + * everything will still work but some object will appear younger + * to Redis. However for this to happen a given object should never be + * touched for all the time needed to the counter to wrap, which is + * not likely. + * + * Note that you can change the resolution altering the + * LRU_CLOCK_RESOLUTION define. */ + g_pserver->lruclock = getLRUClock(); + + cronUpdateMemoryStats(); + + /* We received a SIGTERM, shutting down here in a safe way, as it is + * not ok doing so inside the signal handler. */ + if (g_pserver->shutdown_asap) { + if (prepareForShutdown(SHUTDOWN_NOFLAGS) == C_OK) throw ShutdownException(); + serverLog(LL_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information"); + g_pserver->shutdown_asap = 0; + } + + /* Show some info about non-empty databases */ + if (cserver.verbosity <= LL_VERBOSE) { + run_with_period(5000) { + for (j = 0; j < cserver.dbnum; j++) { + long long size, used, vkeys; + + size = g_pserver->db[j]->slots(); + used = g_pserver->db[j]->size(); + vkeys = g_pserver->db[j]->expireSize(); + if (used || vkeys) { + serverLog(LL_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size); + } + } + } + } + + /* Show information about connected clients */ + if (!g_pserver->sentinel_mode) { + run_with_period(5000) { + serverLog(LL_DEBUG, + "%lu clients connected (%lu replicas), %zu bytes in use", + listLength(g_pserver->clients)-listLength(g_pserver->slaves), + listLength(g_pserver->slaves), + zmalloc_used_memory()); + } + } + + /* We need to do a few operations on clients asynchronously. */ + clientsCron(IDX_EVENT_LOOP_MAIN); + + /* Handle background operations on Redis databases. */ + databasesCron(true /* fMainThread */); + + /* Start a scheduled AOF rewrite if this was requested by the user while + * a BGSAVE was in progress. */ + if (!hasActiveChildProcessOrBGSave() && + g_pserver->aof_rewrite_scheduled) + { + rewriteAppendOnlyFileBackground(); + } + + /* Check if a background saving or AOF rewrite in progress terminated. */ + if (hasActiveChildProcessOrBGSave() || ldbPendingChildren()) + { + run_with_period(1000) receiveChildInfo(); + checkChildrenDone(); + } else { + /* If there is not a background saving/rewrite in progress check if + * we have to save/rewrite now. */ + for (j = 0; j < g_pserver->saveparamslen; j++) { + struct saveparam *sp = g_pserver->saveparams+j; + + /* Save if we reached the given amount of changes, + * the given amount of seconds, and if the latest bgsave was + * successful or if, in case of an error, at least + * CONFIG_BGSAVE_RETRY_DELAY seconds already elapsed. */ + if (g_pserver->dirty >= sp->changes && + g_pserver->unixtime-g_pserver->lastsave > sp->seconds && + (g_pserver->unixtime-g_pserver->lastbgsave_try > + CONFIG_BGSAVE_RETRY_DELAY || + g_pserver->lastbgsave_status == C_OK)) + { + // Ensure rehashing is complete + bool fRehashInProgress = false; + if (g_pserver->activerehashing) { + for (int idb = 0; idb < cserver.dbnum && !fRehashInProgress; ++idb) { + if (g_pserver->db[idb]->FRehashing()) + fRehashInProgress = true; + } + } + + if (!fRehashInProgress) { + serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...", + sp->changes, (int)sp->seconds); + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + rdbSaveBackground(rsiptr); + } + break; + } + } + + /* Trigger an AOF rewrite if needed. */ + if (g_pserver->aof_state == AOF_ON && + !hasActiveChildProcessOrBGSave() && + g_pserver->aof_rewrite_perc && + g_pserver->aof_current_size > g_pserver->aof_rewrite_min_size) + { + long long base = g_pserver->aof_rewrite_base_size ? + g_pserver->aof_rewrite_base_size : 1; + long long growth = (g_pserver->aof_current_size*100/base) - 100; + if (growth >= g_pserver->aof_rewrite_perc) { + serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth); + rewriteAppendOnlyFileBackground(); + } + } + } + /* Just for the sake of defensive programming, to avoid forgeting to + * call this function when need. */ + updateDictResizePolicy(); + + + /* AOF postponed flush: Try at every cron cycle if the slow fsync + * completed. */ + if (g_pserver->aof_state == AOF_ON && g_pserver->aof_flush_postponed_start) + flushAppendOnlyFile(0); + + /* AOF write errors: in this case we have a buffer to flush as well and + * clear the AOF error in case of success to make the DB writable again, + * however to try every second is enough in case of 'hz' is set to + * a higher frequency. */ + run_with_period(1000) { + if (g_pserver->aof_state == AOF_ON && g_pserver->aof_last_write_status == C_ERR) + flushAppendOnlyFile(0); + } + + /* Clear the paused clients state if needed. */ + checkClientPauseTimeoutAndReturnIfPaused(); + + /* Replication cron function -- used to reconnect to master, + * detect transfer failures, start background RDB transfers and so forth. + * + * If Redis is trying to failover then run the replication cron faster so + * progress on the handshake happens more quickly. */ + if (g_pserver->failover_state != NO_FAILOVER) { + run_with_period(100) replicationCron(); + } else { + run_with_period(1000) replicationCron(); + } + + /* Run the Redis Cluster cron. */ + run_with_period(100) { + if (g_pserver->cluster_enabled) clusterCron(); + } + + /* Run the Sentinel timer if we are in sentinel mode. */ + if (g_pserver->sentinel_mode) sentinelTimer(); + + /* Cleanup expired MIGRATE cached sockets. */ + run_with_period(1000) { + migrateCloseTimedoutSockets(); + } + + /* Check for CPU Overload */ + run_with_period(10'000) { + g_pserver->is_overloaded = false; + g_pserver->overload_closed_clients = 0; + static clock_t last = 0; + if (g_pserver->overload_protect_threshold > 0) { + clock_t cur = clock(); + double perc = static_cast(cur - last) / (CLOCKS_PER_SEC*10); + perc /= cserver.cthreads; + perc *= 100.0; + serverLog(LL_WARNING, "CPU Used: %.2f", perc); + if (perc > g_pserver->overload_protect_threshold) { + serverLog(LL_WARNING, "\tWARNING: CPU overload detected."); + g_pserver->is_overloaded = true; + } + last = cur; + } + } + + /* Tune the fastlock to CPU load */ + run_with_period(30000) { + /* Tune the fastlock to CPU load */ + fastlock_auto_adjust_waits(); + } + + /* Reload the TLS cert if neccessary. This effectively rotates the + * cert if a change has been made on disk, but the futriix server hasn't + * been notified. */ + run_with_period(1000){ + tlsReload(); + } + + /* Resize tracking keys table if needed. This is also done at every + * command execution, but we want to be sure that if the last command + * executed changes the value via CONFIG SET, the server will perform + * the operation even if completely idle. */ + if (g_pserver->tracking_clients) trackingLimitUsedSlots(); + + /* Start a scheduled BGSAVE if the corresponding flag is set. This is + * useful when we are forced to postpone a BGSAVE because an AOF + * rewrite is in progress. + * + * Note: this code must be after the replicationCron() call above so + * make sure when refactoring this file to keep this order. This is useful + * because we want to give priority to RDB savings for replication. */ + if (!hasActiveChildProcessOrBGSave() && + g_pserver->rdb_bgsave_scheduled && + (g_pserver->unixtime-g_pserver->lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY || + g_pserver->lastbgsave_status == C_OK)) + { + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + if (rdbSaveBackground(rsiptr) == C_OK) + g_pserver->rdb_bgsave_scheduled = 0; + } + + if (cserver.storage_memory_model == STORAGE_WRITEBACK && g_pserver->m_pstorageFactory && !g_pserver->loading) { + run_with_period(g_pserver->storage_flush_period) { + flushStorageWeak(); + } + } + + /* Fire the cron loop modules event. */ + RedisModuleCronLoopV1 ei = {REDISMODULE_CRON_LOOP_VERSION,g_pserver->hz}; + moduleFireServerEvent(REDISMODULE_EVENT_CRON_LOOP, + 0, + &ei); + + + /* CRON functions may trigger async writes, so do this last */ + ProcessPendingAsyncWrites(); + + // Measure lock contention from a different thread to be more accurate + g_pserver->asyncworkqueue->AddWorkFunction([]{ + g_pserver->rglockSamples[g_pserver->ilockRingHead] = (uint16_t)aeLockContention(); + ++g_pserver->ilockRingHead; + if (g_pserver->ilockRingHead >= redisServer::s_lockContentionSamples) + g_pserver->ilockRingHead = 0; + }); + + run_with_period(10) { + if (!g_pserver->garbageCollector.empty()) { + // Server threads don't free the GC, but if we don't have a + // a bgsave or some other async task then we'll hold onto the + // data for too long + g_pserver->asyncworkqueue->AddWorkFunction([]{ + auto epoch = g_pserver->garbageCollector.startEpoch(); + g_pserver->garbageCollector.endEpoch(epoch); + }); + } + } + + if (g_pserver->soft_shutdown) { + /* Loop through our clients list and see if there are any active clients */ + listIter li; + listNode *ln; + listRewind(g_pserver->clients, &li); + bool fActiveClient = false; + while ((ln = listNext(&li)) && !fActiveClient) { + client *c = (client*)listNodeValue(ln); + if (c->flags & CLIENT_IGNORE_SOFT_SHUTDOWN) + continue; + fActiveClient = true; + } + if (!fActiveClient) { + if (prepareForShutdown(SHUTDOWN_NOFLAGS) == C_OK) { + serverLog(LL_WARNING, "All active clients have disconnected while a soft shutdown is pending. Shutting down now."); + throw ShutdownException(); + } + } + } + + g_pserver->cronloops++; + return 1000/g_pserver->hz; +} + +// serverCron for worker threads other than the main thread +int serverCronLite(struct aeEventLoop *eventLoop, long long id, void *clientData) +{ + UNUSED(id); + UNUSED(clientData); + + if (g_pserver->maxmemory && g_pserver->m_pstorageFactory) + performEvictions(false); + + int iel = ielFromEventLoop(eventLoop); + serverAssert(iel != IDX_EVENT_LOOP_MAIN); + + /* If another threads unblocked one of our clients, and this thread has been idle + then beforeSleep won't have a chance to process the unblocking. So we also + process them here in the cron job to ensure they don't starve. + */ + if (listLength(g_pserver->rgthreadvar[iel].unblocked_clients)) + { + processUnblockedClients(iel); + } + + /* Handle background operations on Redis databases. */ + databasesCron(false /* fMainThread */); + + /* Unpause clients if enough time has elapsed */ + checkClientPauseTimeoutAndReturnIfPaused(); + + ProcessPendingAsyncWrites(); // A bug but leave for now, events should clean up after themselves + clientsCron(iel); + + return 1000/g_pserver->hz; +} + +extern "C" void asyncFreeDictTable(dictEntry **de) +{ + if (de == nullptr || serverTL == nullptr || serverTL->gcEpoch.isReset()) { + zfree(de); + } else { + g_pserver->garbageCollector.enqueueCPtr(serverTL->gcEpoch, de); + } +} + +void blockingOperationStarts() { + if(!g_pserver->blocking_op_nesting++){ + __atomic_load(&g_pserver->mstime, &g_pserver->blocked_last_cron, __ATOMIC_ACQUIRE); + } +} + +void blockingOperationEnds() { + if(!(--g_pserver->blocking_op_nesting)){ + g_pserver->blocked_last_cron = 0; + } +} + +/* This function fill in the role of serverCron during RDB or AOF loading, and + * also during blocked scripts. + * It attempts to do its duties at a similar rate as the configured g_pserver->hz, + * and updates cronloops variable so that similarly to serverCron, the + * run_with_period can be used. */ +void whileBlockedCron() { + /* Here we may want to perform some cron jobs (normally done g_pserver->hz times + * per second). */ + + /* Since this function depends on a call to blockingOperationStarts, let's + * make sure it was done. */ + serverAssert(g_pserver->blocked_last_cron); + + /* In case we where called too soon, leave right away. This way one time + * jobs after the loop below don't need an if. and we don't bother to start + * latency monitor if this function is called too often. */ + if (g_pserver->blocked_last_cron >= g_pserver->mstime) + return; + + mstime_t latency; + latencyStartMonitor(latency); + + /* In some cases we may be called with big intervals, so we may need to do + * extra work here. This is because some of the functions in serverCron rely + * on the fact that it is performed every 10 ms or so. For instance, if + * activeDefragCycle needs to utilize 25% cpu, it will utilize 2.5ms, so we + * need to call it multiple times. */ + long hz_ms = 1000/g_pserver->hz; + while (g_pserver->blocked_last_cron < g_pserver->mstime) { + + /* Defrag keys gradually. */ + activeDefragCycle(); + + g_pserver->blocked_last_cron += hz_ms; + + /* Increment cronloop so that run_with_period works. */ + g_pserver->cronloops++; + } + + /* Other cron jobs do not need to be done in a loop. No need to check + * g_pserver->blocked_last_cron since we have an early exit at the top. */ + + /* Update memory stats during loading (excluding blocked scripts) */ + if (g_pserver->loading) cronUpdateMemoryStats(); + + latencyEndMonitor(latency); + latencyAddSampleIfNeeded("while-blocked-cron",latency); +} + +extern __thread int ProcessingEventsWhileBlocked; + +/* This function gets called every time Redis is entering the + * main loop of the event driven library, that is, before to sleep + * for ready file descriptors. + * + * Note: This function is (currently) called from two functions: + * 1. aeMain - The main server loop + * 2. processEventsWhileBlocked - Process clients during RDB/AOF load + * + * If it was called from processEventsWhileBlocked we don't want + * to perform all actions (For example, we don't want to expire + * keys), but we do need to perform some actions. + * + * The most important is freeClientsInAsyncFreeQueue but we also + * call some other low-risk functions. */ +void beforeSleep(struct aeEventLoop *eventLoop) { + AeLocker locker; + int iel = ielFromEventLoop(eventLoop); + + tlsProcessPendingData(); + + locker.arm(); + + /* end any snapshots created by fast async commands */ + for (int idb = 0; idb < cserver.dbnum; ++idb) { + if (serverTL->rgdbSnapshot[idb] != nullptr && serverTL->rgdbSnapshot[idb]->FStale()) { + g_pserver->db[idb]->endSnapshot(serverTL->rgdbSnapshot[idb]); + serverTL->rgdbSnapshot[idb] = nullptr; + } + } + + size_t zmalloc_used = zmalloc_used_memory(); + if (zmalloc_used > g_pserver->stat_peak_memory) + g_pserver->stat_peak_memory = zmalloc_used; + + serverAssert(g_pserver->repl_batch_offStart < 0); + + runAndPropogateToReplicas(processClients); + + /* Just call a subset of vital functions in case we are re-entering + * the event loop from processEventsWhileBlocked(). Note that in this + * case we keep track of the number of events we are processing, since + * processEventsWhileBlocked() wants to stop ASAP if there are no longer + * events to handle. */ + if (ProcessingEventsWhileBlocked) { + uint64_t processed = 0; + int aof_state = g_pserver->aof_state; + locker.disarm(); + processed += handleClientsWithPendingWrites(iel, aof_state); + locker.arm(); + processed += freeClientsInAsyncFreeQueue(iel); + g_pserver->events_processed_while_blocked += processed; + return; + } + + /* Handle precise timeouts of blocked clients. */ + handleBlockedClientsTimeout(); + + /* If tls still has pending unread data don't sleep at all. */ + aeSetDontWait(eventLoop, tlsHasPendingData()); + + /* Call the Redis Cluster before sleep function. Note that this function + * may change the state of Redis Cluster (from ok to fail or vice versa), + * so it's a good idea to call it before serving the unblocked clients + * later in this function. */ + if (g_pserver->cluster_enabled) clusterBeforeSleep(); + + /* Run a fast expire cycle (the called function will return + * ASAP if a fast cycle is not needed). */ + if (g_pserver->active_expire_enabled && (listLength(g_pserver->masters) == 0 || g_pserver->fActiveReplica)) + activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST); + + /* Unblock all the clients blocked for synchronous replication + * in WAIT. */ + if (listLength(g_pserver->clients_waiting_acks)) + processClientsWaitingReplicas(); + + /* Check if there are clients unblocked by modules that implement + * blocking commands. */ + if (moduleCount()) moduleHandleBlockedClients(ielFromEventLoop(eventLoop)); + + /* Try to process pending commands for clients that were just unblocked. */ + if (listLength(g_pserver->rgthreadvar[iel].unblocked_clients)) + { + processUnblockedClients(iel); + } + + /* Send all the slaves an ACK request if at least one client blocked + * during the previous event loop iteration. Note that we do this after + * processUnblockedClients(), so if there are multiple pipelined WAITs + * and the just unblocked WAIT gets blocked again, we don't have to wait + * a server cron cycle in absence of other event loop events. See #6623. + * + * We also don't send the ACKs while clients are paused, since it can + * increment the replication backlog, they'll be sent after the pause + * if we are still the master. */ + if (g_pserver->get_ack_from_slaves && !checkClientPauseTimeoutAndReturnIfPaused()) { + robj *argv[3]; + + argv[0] = shared.replconf; + argv[1] = shared.getack; + argv[2] = shared.special_asterick; /* Not used argument. */ + replicationFeedSlaves(g_pserver->slaves, g_pserver->replicaseldb, argv, 3); + g_pserver->get_ack_from_slaves = 0; + } + + /* We may have recieved updates from clients about their current offset. NOTE: + * this can't be done where the ACK is recieved since failover will disconnect + * our clients. */ + if (iel == IDX_EVENT_LOOP_MAIN) + updateFailoverStatus(); + + /* Send the invalidation messages to clients participating to the + * client side caching protocol in broadcasting (BCAST) mode. */ + trackingBroadcastInvalidationMessages(); + + /* Write the AOF buffer on disk */ + if (g_pserver->aof_state == AOF_ON) + flushAppendOnlyFile(0); + + static thread_local bool fFirstRun = true; + // note: we also copy the DB pointer in case a DB swap is done while the lock is released + std::vector vecdb; // note we cache the database pointer in case a dbswap is done while the lock is released + if (cserver.storage_memory_model == STORAGE_WRITETHROUGH && !g_pserver->loading) + { + if (!fFirstRun) { + mstime_t storage_process_latency; + latencyStartMonitor(storage_process_latency); + for (int idb = 0; idb < cserver.dbnum; ++idb) { + if (g_pserver->db[idb]->processChanges(false)) + vecdb.push_back(g_pserver->db[idb]); + } + latencyEndMonitor(storage_process_latency); + latencyAddSampleIfNeeded("storage-process-changes", storage_process_latency); + } else { + fFirstRun = false; + } + } + + int aof_state = g_pserver->aof_state; + + mstime_t commit_latency; + latencyStartMonitor(commit_latency); + if (g_pserver->m_pstorageFactory != nullptr) + { + locker.disarm(); + for (redisDb *db : vecdb) + db->commitChanges(); + locker.arm(); + } + latencyEndMonitor(commit_latency); + latencyAddSampleIfNeeded("storage-commit", commit_latency); + + /* We try to handle writes at the end so we don't have to reacquire the lock, + but if there is a pending async close we need to ensure the writes happen + first so perform it here */ + bool fSentReplies = false; + + std::unique_lock ul(g_lockasyncfree); + if (listLength(g_pserver->clients_to_close)) { + ul.unlock(); + locker.disarm(); + handleClientsWithPendingWrites(iel, aof_state); + locker.arm(); + fSentReplies = true; + } else { + ul.unlock(); + } + + if (!serverTL->gcEpoch.isReset()) + g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch, true /*fNoFree*/); + serverTL->gcEpoch.reset(); + + /* Close clients that need to be closed asynchronous */ + freeClientsInAsyncFreeQueue(iel); + + if (!serverTL->gcEpoch.isReset()) + g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch, true /*fNoFree*/); + serverTL->gcEpoch.reset(); + + /* Try to process blocked clients every once in while. Example: A module + * calls RM_SignalKeyAsReady from within a timer callback (So we don't + * visit processCommand() at all). */ + handleClientsBlockedOnKeys(); + + /* Before we are going to sleep, let the threads access the dataset by + * releasing the GIL. Redis main thread will not touch anything at this + * time. */ + serverAssert(g_pserver->repl_batch_offStart < 0); + locker.disarm(); + if (!fSentReplies) + handleClientsWithPendingWrites(iel, aof_state); + + aeThreadOffline(); + // Scope lock_guard + { + std::unique_lock lock(time_thread_lock); + sleeping_threads++; + serverAssert(sleeping_threads <= cserver.cthreads); + } + + if (!g_pserver->garbageCollector.empty()) { + // Server threads don't free the GC, but if we don't have a + // a bgsave or some other async task then we'll hold onto the + // data for too long + g_pserver->asyncworkqueue->AddWorkFunction([]{ + auto epoch = g_pserver->garbageCollector.startEpoch(); + g_pserver->garbageCollector.endEpoch(epoch); + }, true /*fHiPri*/); + } + + /* Determine whether the modules are enabled before sleeping, and use that result + both here, and after wakeup to avoid double acquire or release of the GIL */ + serverTL->modulesEnabledThisAeLoop = !!moduleCount(); + if (serverTL->modulesEnabledThisAeLoop) moduleReleaseGIL(TRUE /*fServerThread*/); + + /* Do NOT add anything below moduleReleaseGIL !!! */ +} + +/* This function is called immediately after the event loop multiplexing + * API returned, and the control is going to soon return to Redis by invoking + * the different events callbacks. */ +void afterSleep(struct aeEventLoop *eventLoop) { + UNUSED(eventLoop); + /* Do NOT add anything above moduleAcquireGIL !!! */ + + /* Aquire the modules GIL so that their threads won't touch anything. + Don't check here that modules are enabled, rather use the result from beforeSleep + Otherwise you may double acquire the GIL and cause deadlocks in the module */ + if (!ProcessingEventsWhileBlocked) { + if (serverTL->modulesEnabledThisAeLoop) moduleAcquireGIL(TRUE /*fServerThread*/); + aeThreadOnline(); + wakeTimeThread(); + + serverAssert(serverTL->gcEpoch.isReset()); + serverTL->gcEpoch = g_pserver->garbageCollector.startEpoch(); + + aeAcquireLock(); + for (int idb = 0; idb < cserver.dbnum; ++idb) + g_pserver->db[idb]->trackChanges(false); + aeReleaseLock(); + + serverTL->disable_async_commands = false; + } +} + +/* =========================== Server initialization ======================== */ + +void createSharedObjects(void) { + int j; + + /* Shared command responses */ + shared.crlf = makeObjectShared(createObject(OBJ_STRING,sdsnew("\r\n"))); + shared.ok = makeObjectShared(createObject(OBJ_STRING,sdsnew("+OK\r\n"))); + shared.emptybulk = makeObjectShared(createObject(OBJ_STRING,sdsnew("$0\r\n\r\n"))); + shared.czero = makeObjectShared(createObject(OBJ_STRING,sdsnew(":0\r\n"))); + shared.cone = makeObjectShared(createObject(OBJ_STRING,sdsnew(":1\r\n"))); + shared.emptyarray = makeObjectShared(createObject(OBJ_STRING,sdsnew("*0\r\n"))); + shared.pong = makeObjectShared(createObject(OBJ_STRING,sdsnew("+PONG\r\n"))); + shared.queued = makeObjectShared(createObject(OBJ_STRING,sdsnew("+QUEUED\r\n"))); + shared.emptyscan = makeObjectShared(createObject(OBJ_STRING,sdsnew("*2\r\n$1\r\n0\r\n*0\r\n"))); + shared.space = makeObjectShared(createObject(OBJ_STRING,sdsnew(" "))); + shared.colon = makeObjectShared(createObject(OBJ_STRING,sdsnew(":"))); + shared.plus = makeObjectShared(createObject(OBJ_STRING,sdsnew("+"))); + shared.nullbulk = makeObjectShared(createObject(OBJ_STRING,sdsnew("$0\r\n\r\n"))); + + /* Shared command error responses */ + shared.wrongtypeerr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-WRONGTYPE Operation against a key holding the wrong kind of value\r\n"))); + shared.err = makeObjectShared(createObject(OBJ_STRING,sdsnew("-ERR\r\n"))); + shared.nokeyerr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-ERR no such key\r\n"))); + shared.syntaxerr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-ERR syntax error\r\n"))); + shared.sameobjecterr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-ERR source and destination objects are the same\r\n"))); + shared.outofrangeerr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-ERR index out of range\r\n"))); + shared.noscripterr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-NOSCRIPT No matching script. Please use EVAL.\r\n"))); + shared.loadingerr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-LOADING KeyDB is loading the dataset in memory\r\n"))); + shared.slowscripterr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-BUSY KeyDB is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"))); + shared.masterdownerr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-MASTERDOWN Link with MASTER is down and replica-serve-stale-data is set to 'no'.\r\n"))); + shared.bgsaveerr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-MISCONF KeyDB is configured to save RDB snapshots, but it is currently not able to persist on disk. Commands that may modify the data set are disabled, because this instance is configured to report errors during writes if RDB snapshotting fails (stop-writes-on-bgsave-error option). Please check the KeyDB logs for details about the RDB error.\r\n"))); + shared.roslaveerr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-READONLY You can't write against a read only replica.\r\n"))); + shared.noautherr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-NOAUTH Authentication required.\r\n"))); + shared.oomerr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-OOM command not allowed when used memory > 'maxmemory'.\r\n"))); + shared.execaborterr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-EXECABORT Transaction discarded because of previous errors.\r\n"))); + shared.noreplicaserr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-NOREPLICAS Not enough good replicas to write.\r\n"))); + shared.busykeyerr = makeObjectShared(createObject(OBJ_STRING,sdsnew( + "-BUSYKEY Target key name already exists.\r\n"))); + + + /* The shared NULL depends on the protocol version. */ + shared.null[0] = NULL; + shared.null[1] = NULL; + shared.null[2] = makeObjectShared(createObject(OBJ_STRING,sdsnew("$-1\r\n"))); + shared.null[3] = makeObjectShared(createObject(OBJ_STRING,sdsnew("_\r\n"))); + + shared.nullarray[0] = NULL; + shared.nullarray[1] = NULL; + shared.nullarray[2] = makeObjectShared(createObject(OBJ_STRING,sdsnew("*-1\r\n"))); + shared.nullarray[3] = makeObjectShared(createObject(OBJ_STRING,sdsnew("_\r\n"))); + + shared.emptymap[0] = NULL; + shared.emptymap[1] = NULL; + shared.emptymap[2] = createObject(OBJ_STRING,sdsnew("*0\r\n")); + shared.emptymap[3] = createObject(OBJ_STRING,sdsnew("%0\r\n")); + + shared.emptyset[0] = NULL; + shared.emptyset[1] = NULL; + shared.emptyset[2] = createObject(OBJ_STRING,sdsnew("*0\r\n")); + shared.emptyset[3] = createObject(OBJ_STRING,sdsnew("~0\r\n")); + + for (j = 0; j < PROTO_SHARED_SELECT_CMDS; j++) { + char dictid_str[64]; + int dictid_len; + + dictid_len = ll2string(dictid_str,sizeof(dictid_str),j); + shared.select[j] = makeObjectShared(createObject(OBJ_STRING, + sdscatprintf(sdsempty(), + "*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n", + dictid_len, dictid_str))); + } + shared.messagebulk = makeObjectShared("$7\r\nmessage\r\n",13); + shared.pmessagebulk = makeObjectShared("$8\r\npmessage\r\n",14); + shared.subscribebulk = makeObjectShared("$9\r\nsubscribe\r\n",15); + shared.unsubscribebulk = makeObjectShared("$11\r\nunsubscribe\r\n",18); + shared.psubscribebulk = makeObjectShared("$10\r\npsubscribe\r\n",17); + shared.punsubscribebulk = makeObjectShared("$12\r\npunsubscribe\r\n",19); + + /* Shared command names */ + shared.del = makeObjectShared("DEL",3); + shared.unlink = makeObjectShared("UNLINK",6); + shared.rpop = makeObjectShared("RPOP",4); + shared.lpop = makeObjectShared("LPOP",4); + shared.lpush = makeObjectShared("LPUSH",5); + shared.rpoplpush = makeObjectShared("RPOPLPUSH",9); + shared.lmove = makeObjectShared("LMOVE",5); + shared.blmove = makeObjectShared("BLMOVE",6); + shared.zpopmin = makeObjectShared("ZPOPMIN",7); + shared.zpopmax = makeObjectShared("ZPOPMAX",7); + shared.multi = makeObjectShared("MULTI",5); + shared.exec = makeObjectShared("EXEC",4); + shared.hset = makeObjectShared("HSET",4); + shared.srem = makeObjectShared("SREM",4); + shared.xgroup = makeObjectShared("XGROUP",6); + shared.xclaim = makeObjectShared("XCLAIM",6); + shared.script = makeObjectShared("SCRIPT",6); + shared.replconf = makeObjectShared("REPLCONF",8); + shared.pexpireat = makeObjectShared("PEXPIREAT",9); + shared.pexpire = makeObjectShared("PEXPIRE",7); + shared.persist = makeObjectShared("PERSIST",7); + shared.set = makeObjectShared("SET",3); + shared.eval = makeObjectShared("EVAL",4); + + /* Shared command argument */ + shared.left = makeObjectShared("left",4); + shared.right = makeObjectShared("right",5); + shared.pxat = makeObjectShared("PXAT", 4); + shared.px = makeObjectShared("PX",2); + shared.time = makeObjectShared("TIME",4); + shared.retrycount = makeObjectShared("RETRYCOUNT",10); + shared.force = makeObjectShared("FORCE",5); + shared.justid = makeObjectShared("JUSTID",6); + shared.lastid = makeObjectShared("LASTID",6); + shared.default_username = makeObjectShared("default",7); + shared.ping = makeObjectShared("ping",4); + shared.replping = makeObjectShared("replping", 8); + shared.setid = makeObjectShared("SETID",5); + shared.keepttl = makeObjectShared("KEEPTTL",7); + shared.load = makeObjectShared("LOAD",4); + shared.createconsumer = makeObjectShared("CREATECONSUMER",14); + shared.getack = makeObjectShared("GETACK",6); + shared.special_asterick = makeObjectShared("*",1); + shared.special_equals = makeObjectShared("=",1); + shared.redacted = makeObjectShared("(redacted)",10); + + /* KeyDB Specific */ + shared.hdel = makeObjectShared(createStringObject("HDEL", 4)); + shared.zrem = makeObjectShared(createStringObject("ZREM", 4)); + shared.mvccrestore = makeObjectShared(createStringObject("KEYDB.MVCCRESTORE", 17)); + shared.pexpirememberat = makeObjectShared(createStringObject("PEXPIREMEMBERAT",15)); + + for (j = 0; j < OBJ_SHARED_INTEGERS; j++) { + shared.integers[j] = + makeObjectShared(createObject(OBJ_STRING,(void*)(long)j)); + shared.integers[j]->encoding = OBJ_ENCODING_INT; + } + for (j = 0; j < OBJ_SHARED_BULKHDR_LEN; j++) { + shared.mbulkhdr[j] = makeObjectShared(createObject(OBJ_STRING, + sdscatprintf(sdsempty(),"*%d\r\n",j))); + shared.bulkhdr[j] = makeObjectShared(createObject(OBJ_STRING, + sdscatprintf(sdsempty(),"$%d\r\n",j))); + } + /* The following two shared objects, minstring and maxstrings, are not + * actually used for their value but as a special object meaning + * respectively the minimum possible string and the maximum possible + * string in string comparisons for the ZRANGEBYLEX command. */ + shared.minstring = sdsnew("minstring"); + shared.maxstring = sdsnew("maxstring"); +} + +void initMasterInfo(redisMaster *master) +{ + if (cserver.default_masterauth) + master->masterauth = sdsdup(cserver.default_masterauth); + else + master->masterauth = NULL; + + if (cserver.default_masteruser) + master->masteruser = zstrdup(cserver.default_masteruser); + else + master->masteruser = NULL; + + master->masterport = 9880; + master->master = NULL; + master->cached_master = NULL; + master->master_initial_offset = -1; + + master->isActive = false; + + master->repl_state = REPL_STATE_NONE; + master->repl_down_since = 0; /* Never connected, repl is down since EVER. */ + master->mvccLastSync = 0; +} + +void initServerConfig(void) { + int j; + + updateCachedTime(); + getRandomHexChars(g_pserver->runid,CONFIG_RUN_ID_SIZE); + g_pserver->runid[CONFIG_RUN_ID_SIZE] = '\0'; + changeReplicationId(); + clearReplicationId2(); + g_pserver->hz = CONFIG_DEFAULT_HZ; /* Initialize it ASAP, even if it may get + updated later after loading the config. + This value may be used before the server + is initialized. */ + g_pserver->clients = listCreate(); + g_pserver->slaves = listCreate(); + g_pserver->monitors = listCreate(); + g_pserver->clients_timeout_table = raxNew(); + g_pserver->events_processed_while_blocked = 0; + g_pserver->timezone = getTimeZone(); /* Initialized by tzset(). */ + cserver.configfile = NULL; + cserver.executable = NULL; + g_pserver->hz = g_pserver->config_hz = CONFIG_DEFAULT_HZ; + g_pserver->bindaddr_count = 0; + g_pserver->unixsocket = NULL; + g_pserver->unixsocketperm = CONFIG_DEFAULT_UNIX_SOCKET_PERM; + g_pserver->sofd = -1; + g_pserver->active_expire_enabled = 1; + cserver.skip_checksum_validation = 0; + g_pserver->saveparams = NULL; + g_pserver->loading = 0; + g_pserver->loading_rdb_used_mem = 0; + g_pserver->logfile = zstrdup(CONFIG_DEFAULT_LOGFILE); + g_pserver->syslog_facility = LOG_LOCAL0; + cserver.supervised = 0; + cserver.supervised_mode = SUPERVISED_NONE; + g_pserver->aof_state = AOF_OFF; + g_pserver->aof_rewrite_base_size = 0; + g_pserver->aof_rewrite_scheduled = 0; + g_pserver->aof_flush_sleep = 0; + g_pserver->aof_last_fsync = time(NULL); + atomicSet(g_pserver->aof_bio_fsync_status,C_OK); + g_pserver->aof_rewrite_time_last = -1; + g_pserver->aof_rewrite_time_start = -1; + g_pserver->aof_lastbgrewrite_status = C_OK; + g_pserver->aof_delayed_fsync = 0; + g_pserver->aof_fd = -1; + g_pserver->aof_selected_db = -1; /* Make sure the first time will not match */ + g_pserver->aof_flush_postponed_start = 0; + cserver.pidfile = NULL; + g_pserver->rdb_filename = NULL; + g_pserver->rdb_s3bucketpath = NULL; + g_pserver->active_defrag_running = 0; + g_pserver->notify_keyspace_events = 0; + g_pserver->blocked_clients = 0; + memset(g_pserver->blocked_clients_by_type,0, + sizeof(g_pserver->blocked_clients_by_type)); + g_pserver->shutdown_asap = 0; + g_pserver->cluster_enabled = 0; + g_pserver->cluster_configfile = zstrdup(CONFIG_DEFAULT_CLUSTER_CONFIG_FILE); + g_pserver->migrate_cached_sockets = dictCreate(&migrateCacheDictType,NULL); + g_pserver->next_client_id = 1; /* Client IDs, start from 1 .*/ + + g_pserver->lruclock = getLRUClock(); + resetServerSaveParams(); + + appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */ + appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */ + appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ + + /* Replication related */ + g_pserver->masters = listCreate(); + g_pserver->enable_multimaster = CONFIG_DEFAULT_ENABLE_MULTIMASTER; + g_pserver->repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT; + g_pserver->master_repl_offset = 0; + g_pserver->repl_lowest_off.store(-1, std::memory_order_seq_cst); + + /* Replication partial resync backlog */ + g_pserver->repl_backlog = NULL; + g_pserver->repl_backlog_histlen = 0; + g_pserver->repl_backlog_idx = 0; + g_pserver->repl_backlog_off = 0; + g_pserver->repl_no_slaves_since = time(NULL); + + /* Failover related */ + g_pserver->failover_end_time = 0; + g_pserver->force_failover = 0; + g_pserver->target_replica_host = NULL; + g_pserver->target_replica_port = 0; + g_pserver->failover_state = NO_FAILOVER; + + /* Client output buffer limits */ + for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++) + cserver.client_obuf_limits[j] = clientBufferLimitsDefaults[j]; + + /* Linux OOM Score config */ + for (j = 0; j < CONFIG_OOM_COUNT; j++) + g_pserver->oom_score_adj_values[j] = configOOMScoreAdjValuesDefaults[j]; + + /* Double constants initialization */ + R_Zero = 0.0; + R_PosInf = 1.0/R_Zero; + R_NegInf = -1.0/R_Zero; + R_Nan = R_Zero/R_Zero; + + /* Command table -- we initialize it here as it is part of the + * initial configuration, since command names may be changed via + * futriix.conf using the rename-command directive. */ + g_pserver->commands = dictCreate(&commandTableDictType,NULL); + g_pserver->orig_commands = dictCreate(&commandTableDictType,NULL); + populateCommandTable(); + cserver.delCommand = lookupCommandByCString("del"); + cserver.multiCommand = lookupCommandByCString("multi"); + cserver.lpushCommand = lookupCommandByCString("lpush"); + cserver.lpopCommand = lookupCommandByCString("lpop"); + cserver.rpopCommand = lookupCommandByCString("rpop"); + cserver.zpopminCommand = lookupCommandByCString("zpopmin"); + cserver.zpopmaxCommand = lookupCommandByCString("zpopmax"); + cserver.sremCommand = lookupCommandByCString("srem"); + cserver.execCommand = lookupCommandByCString("exec"); + cserver.expireCommand = lookupCommandByCString("expire"); + cserver.pexpireCommand = lookupCommandByCString("pexpire"); + cserver.xclaimCommand = lookupCommandByCString("xclaim"); + cserver.xgroupCommand = lookupCommandByCString("xgroup"); + cserver.rreplayCommand = lookupCommandByCString("rreplay"); + cserver.rpoplpushCommand = lookupCommandByCString("rpoplpush"); + cserver.hdelCommand = lookupCommandByCString("hdel"); + cserver.zremCommand = lookupCommandByCString("zrem"); + cserver.lmoveCommand = lookupCommandByCString("lmove"); + + /* Debugging */ + g_pserver->watchdog_period = 0; + + /* By default we want scripts to be always replicated by effects + * (single commands executed by the script), and not by sending the + * script to the replica / AOF. This is the new way starting from + * Redis 5. However it is possible to revert it via futriix.conf. */ + g_pserver->lua_always_replicate_commands = 1; + + /* Multithreading */ + cserver.cthreads = CONFIG_DEFAULT_THREADS; + cserver.fThreadAffinity = CONFIG_DEFAULT_THREAD_AFFINITY; + + // This will get dereferenced before the second stage init where we have the true db count + // so make sure its zero and initialized + g_pserver->db = (redisDb**)zcalloc(sizeof(redisDb*)*std::max(cserver.dbnum, 1), MALLOC_LOCAL); + + cserver.threadAffinityOffset = 0; + + /* Client Pause related */ + g_pserver->client_pause_type = CLIENT_PAUSE_OFF; + g_pserver->client_pause_end_time = 0; + initConfigValues(); +} + +extern char **environ; + +/* Restart the server, executing the same executable that started this + * instance, with the same arguments and configuration file. + * + * The function is designed to directly call execve() so that the new + * server instance will retain the PID of the previous one. + * + * The list of flags, that may be bitwise ORed together, alter the + * behavior of this function: + * + * RESTART_SERVER_NONE No flags. + * RESTART_SERVER_GRACEFULLY Do a proper shutdown before restarting. + * RESTART_SERVER_CONFIG_REWRITE Rewrite the config file before restarting. + * + * On success the function does not return, because the process turns into + * a different process. On error C_ERR is returned. */ +int restartServer(int flags, mstime_t delay) { + int j; + + /* Check if we still have accesses to the executable that started this + * server instance. */ + if (access(cserver.executable,X_OK) == -1) { + serverLog(LL_WARNING,"Can't restart: this process has no " + "permissions to execute %s", cserver.executable); + return C_ERR; + } + + /* Config rewriting. */ + if (flags & RESTART_SERVER_CONFIG_REWRITE && + cserver.configfile && + rewriteConfig(cserver.configfile, 0) == -1) + { + serverLog(LL_WARNING,"Can't restart: configuration rewrite process " + "failed"); + return C_ERR; + } + + /* Perform a proper shutdown. */ + if (flags & RESTART_SERVER_GRACEFULLY && + prepareForShutdown(SHUTDOWN_NOFLAGS) != C_OK) + { + serverLog(LL_WARNING,"Can't restart: error preparing for shutdown"); + return C_ERR; + } + + /* Close all file descriptors, with the exception of stdin, stdout, strerr + * which are useful if we restart a Redis server which is not daemonized. */ + for (j = 3; j < (int)g_pserver->maxclients + 1024; j++) { + /* Test the descriptor validity before closing it, otherwise + * Valgrind issues a warning on close(). */ + if (fcntl(j,F_GETFD) != -1) + { + /* This user to just close() here, but sanitizers detected that as an FD race. + The race doesn't matter since we're about to call exec() however we want + to cut down on noise, so instead we ask the kernel to close when we call + exec(), and only do it ourselves if that fails. */ + if (fcntl(j, F_SETFD, FD_CLOEXEC) == -1) + { + close(j); // failed to set close on exec, close here + } + } + } + + if (flags & RESTART_SERVER_GRACEFULLY) { + if (g_pserver->m_pstorageFactory) { + for (int idb = 0; idb < cserver.dbnum; ++idb) { + g_pserver->db[idb]->storageProviderDelete(); + } + delete g_pserver->metadataDb; + } + } + + /* Execute the server with the original command line. */ + if (delay) usleep(delay*1000); + zfree(cserver.exec_argv[0]); + cserver.exec_argv[0] = zstrdup(cserver.executable); + execve(cserver.executable,cserver.exec_argv,environ); + + /* If an error occurred here, there is nothing we can do, but exit. */ + _exit(1); + + return C_ERR; /* Never reached. */ +} + +static void readOOMScoreAdj(void) { +#ifdef HAVE_PROC_OOM_SCORE_ADJ + char buf[64]; + int fd = open("/proc/self/oom_score_adj", O_RDONLY); + + if (fd < 0) return; + if (read(fd, buf, sizeof(buf)) > 0) + g_pserver->oom_score_adj_base = atoi(buf); + close(fd); +#endif +} + +/* This function will configure the current process's oom_score_adj according + * to user specified configuration. This is currently implemented on Linux + * only. + * + * A process_class value of -1 implies OOM_CONFIG_MASTER or OOM_CONFIG_REPLICA, + * depending on current role. + */ +int setOOMScoreAdj(int process_class) { + + if (g_pserver->oom_score_adj == OOM_SCORE_ADJ_NO) return C_OK; + if (process_class == -1) + process_class = (listLength(g_pserver->masters) ? CONFIG_OOM_REPLICA : CONFIG_OOM_MASTER); + + serverAssert(process_class >= 0 && process_class < CONFIG_OOM_COUNT); + +#ifdef HAVE_PROC_OOM_SCORE_ADJ + int fd; + int val; + char buf[64]; + + val = g_pserver->oom_score_adj_values[process_class]; + if (g_pserver->oom_score_adj == OOM_SCORE_RELATIVE) + val += g_pserver->oom_score_adj_base; + if (val > 1000) val = 1000; + if (val < -1000) val = -1000; + + snprintf(buf, sizeof(buf) - 1, "%d\n", val); + + fd = open("/proc/self/oom_score_adj", O_WRONLY); + if (fd < 0 || write(fd, buf, strlen(buf)) < 0) { + serverLog(LL_WARNING, "Unable to write oom_score_adj: %s", strerror(errno)); + if (fd != -1) close(fd); + return C_ERR; + } + + close(fd); + return C_OK; +#else + /* Unsupported */ + return C_ERR; +#endif +} + +/* This function will try to raise the max number of open files accordingly to + * the configured max number of clients. It also reserves a number of file + * descriptors (CONFIG_MIN_RESERVED_FDS) for extra operations of + * persistence, listening sockets, log files and so forth. + * + * If it will not be possible to set the limit accordingly to the configured + * max number of clients, the function will do the reverse setting + * g_pserver->maxclients to the value that we can actually handle. */ +void adjustOpenFilesLimit(void) { + rlim_t maxfiles = g_pserver->maxclients+CONFIG_MIN_RESERVED_FDS; + if (g_pserver->m_pstorageFactory) + maxfiles += g_pserver->m_pstorageFactory->filedsRequired(); + struct rlimit limit; + + if (getrlimit(RLIMIT_NOFILE,&limit) == -1) { + serverLog(LL_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.", + strerror(errno)); + g_pserver->maxclients = 1024-CONFIG_MIN_RESERVED_FDS; + } else { + rlim_t oldlimit = limit.rlim_cur; + + /* Set the max number of files if the current limit is not enough + * for our needs. */ + if (oldlimit < maxfiles) { + rlim_t bestlimit; + int setrlimit_error = 0; + + /* Try to set the file limit to match 'maxfiles' or at least + * to the higher value supported less than maxfiles. */ + bestlimit = maxfiles; + while(bestlimit > oldlimit) { + rlim_t decr_step = 16; + + limit.rlim_cur = bestlimit; + limit.rlim_max = bestlimit; + if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break; + setrlimit_error = errno; + + /* We failed to set file limit to 'bestlimit'. Try with a + * smaller limit decrementing by a few FDs per iteration. */ + if (bestlimit < decr_step) break; + bestlimit -= decr_step; + } + + /* Assume that the limit we get initially is still valid if + * our last try was even lower. */ + if (bestlimit < oldlimit) bestlimit = oldlimit; + + if (bestlimit < maxfiles) { + unsigned int old_maxclients = g_pserver->maxclients; + g_pserver->maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS; + /* maxclients is unsigned so may overflow: in order + * to check if maxclients is now logically less than 1 + * we test indirectly via bestlimit. */ + if (bestlimit <= CONFIG_MIN_RESERVED_FDS) { + serverLog(LL_WARNING,"Your current 'ulimit -n' " + "of %llu is not enough for the server to start. " + "Please increase your open file limit to at least " + "%llu. Exiting.", + (unsigned long long) oldlimit, + (unsigned long long) maxfiles); + exit(1); + } + serverLog(LL_WARNING,"You requested maxclients of %d " + "requiring at least %llu max file descriptors.", + old_maxclients, + (unsigned long long) maxfiles); + serverLog(LL_WARNING,"Server can't set maximum open files " + "to %llu because of OS error: %s.", + (unsigned long long) maxfiles, strerror(setrlimit_error)); + serverLog(LL_WARNING,"Current maximum open files is %llu. " + "maxclients has been reduced to %d to compensate for " + "low ulimit. " + "If you need higher maxclients increase 'ulimit -n'.", + (unsigned long long) bestlimit, g_pserver->maxclients); + } else { + serverLog(LL_NOTICE,"Increased maximum number of open files " + "to %llu (it was originally set to %llu).", + (unsigned long long) maxfiles, + (unsigned long long) oldlimit); + } + } + } +} + +/* Check that g_pserver->tcp_backlog can be actually enforced in Linux according + * to the value of /proc/sys/net/core/somaxconn, or warn about it. */ +void checkTcpBacklogSettings(void) { +#ifdef HAVE_PROC_SOMAXCONN + FILE *fp = fopen("/proc/sys/net/core/somaxconn","r"); + char buf[1024]; + if (!fp) return; + if (fgets(buf,sizeof(buf),fp) != NULL) { + int somaxconn = atoi(buf); + if (somaxconn > 0 && somaxconn < g_pserver->tcp_backlog) { + serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of %d.", g_pserver->tcp_backlog, somaxconn); + } + } + fclose(fp); +#endif +} + +void closeSocketListeners(socketFds *sfd) { + int j; + + for (j = 0; j < sfd->count; j++) { + if (sfd->fd[j] == -1) continue; + + aeDeleteFileEvent(serverTL->el, sfd->fd[j], AE_READABLE); + close(sfd->fd[j]); + } + + sfd->count = 0; +} + +/* Create an event handler for accepting new connections in TCP or TLS domain sockets. + * This works atomically for all socket fds */ +int createSocketAcceptHandler(socketFds *sfd, aeFileProc *accept_handler) { + int j; + + for (j = 0; j < sfd->count; j++) { + if (aeCreateFileEvent(serverTL->el, sfd->fd[j], AE_READABLE, accept_handler,NULL) == AE_ERR) { + /* Rollback */ + for (j = j-1; j >= 0; j--) aeDeleteFileEvent(serverTL->el, sfd->fd[j], AE_READABLE); + return C_ERR; + } + } + return C_OK; +} + +/* Initialize a set of file descriptors to listen to the specified 'port' + * binding the addresses specified in the Redis server configuration. + * + * The listening file descriptors are stored in the integer array 'fds' + * and their number is set in '*count'. + * + * The addresses to bind are specified in the global g_pserver->bindaddr array + * and their number is g_pserver->bindaddr_count. If the server configuration + * contains no specific addresses to bind, this function will try to + * bind * (all addresses) for both the IPv4 and IPv6 protocols. + * + * On success the function returns C_OK. + * + * On error the function returns C_ERR. For the function to be on + * error, at least one of the g_pserver->bindaddr addresses was + * impossible to bind, or no bind addresses were specified in the server + * configuration but the function is not able to bind * for at least + * one of the IPv4 or IPv6 protocols. */ +int listenToPort(int port, socketFds *sfd, int fReusePort, int fFirstListen) { + int j; + const char **bindaddr = (const char**)g_pserver->bindaddr; + int bindaddr_count = g_pserver->bindaddr_count; + const char *default_bindaddr[2] = {"*", "-::*"}; + + /* Force binding of 0.0.0.0 if no bind address is specified. */ + if (g_pserver->bindaddr_count == 0) { + bindaddr_count = 2; + bindaddr = default_bindaddr; + } + + for (j = 0; j < bindaddr_count; j++) { + const char* addr = bindaddr[j]; + int optional = *addr == '-'; + if (optional) addr++; + if (strchr(addr,':')) { + /* Bind IPv6 address. */ + sfd->fd[sfd->count] = anetTcp6Server(serverTL->neterr,port,addr,g_pserver->tcp_backlog,fReusePort,fFirstListen); + } else { + /* Bind IPv4 address. */ + sfd->fd[sfd->count] = anetTcpServer(serverTL->neterr,port,addr,g_pserver->tcp_backlog,fReusePort,fFirstListen); + } + if (sfd->fd[sfd->count] == ANET_ERR) { + int net_errno = errno; + serverLog(LL_WARNING, + "Warning: Could not create server TCP listening socket %s:%d: %s", + addr, port, serverTL->neterr); + if (net_errno == EADDRNOTAVAIL && optional) + continue; + if (net_errno == ENOPROTOOPT || net_errno == EPROTONOSUPPORT || + net_errno == ESOCKTNOSUPPORT || net_errno == EPFNOSUPPORT || + net_errno == EAFNOSUPPORT) + continue; + + /* Rollback successful listens before exiting */ + closeSocketListeners(sfd); + return C_ERR; + } + anetNonBlock(NULL,sfd->fd[sfd->count]); + anetCloexec(sfd->fd[sfd->count]); + sfd->count++; + } + return C_OK; +} + +/* Resets the stats that we expose via INFO or other means that we want + * to reset via CONFIG RESETSTAT. The function is also used in order to + * initialize these fields in initServer() at server startup. */ +void resetServerStats(void) { + int j; + + g_pserver->stat_numcommands = 0; + g_pserver->stat_numconnections = 0; + g_pserver->stat_expiredkeys = 0; + g_pserver->stat_expired_stale_perc = 0; + g_pserver->stat_expired_time_cap_reached_count = 0; + g_pserver->stat_expire_cycle_time_used = 0; + g_pserver->stat_evictedkeys = 0; + g_pserver->stat_keyspace_misses = 0; + g_pserver->stat_keyspace_hits = 0; + g_pserver->stat_active_defrag_hits = 0; + g_pserver->stat_active_defrag_misses = 0; + g_pserver->stat_active_defrag_key_hits = 0; + g_pserver->stat_active_defrag_key_misses = 0; + g_pserver->stat_active_defrag_scanned = 0; + g_pserver->stat_fork_time = 0; + g_pserver->stat_fork_rate = 0; + g_pserver->stat_total_forks = 0; + g_pserver->stat_rejected_conn = 0; + g_pserver->stat_sync_full = 0; + g_pserver->stat_sync_partial_ok = 0; + g_pserver->stat_sync_partial_err = 0; + g_pserver->stat_total_reads_processed = 0; + g_pserver->stat_total_writes_processed = 0; + for (j = 0; j < STATS_METRIC_COUNT; j++) { + g_pserver->inst_metric[j].idx = 0; + g_pserver->inst_metric[j].last_sample_time = mstime(); + g_pserver->inst_metric[j].last_sample_count = 0; + memset(g_pserver->inst_metric[j].samples,0, + sizeof(g_pserver->inst_metric[j].samples)); + } + g_pserver->stat_net_input_bytes = 0; + g_pserver->stat_net_output_bytes = 0; + g_pserver->stat_unexpected_error_replies = 0; + for (int iel = 0; iel < cserver.cthreads; ++iel) + g_pserver->rgthreadvar[iel].stat_total_error_replies = 0; + g_pserver->stat_dump_payload_sanitizations = 0; + g_pserver->aof_delayed_fsync = 0; +} + +/* Make the thread killable at any time, so that kill threads functions + * can work reliably (default cancelability type is PTHREAD_CANCEL_DEFERRED). + * Needed for pthread_cancel used by the fast memory test used by the crash report. */ +void makeThreadKillable(void) { + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); +} + +static void initNetworkingThread(int iel, int fReusePort) +{ + /* Open the TCP listening socket for the user commands. */ + if (fReusePort || (iel == IDX_EVENT_LOOP_MAIN)) + { + if (g_pserver->port != 0 && + listenToPort(g_pserver->port,&g_pserver->rgthreadvar[iel].ipfd, fReusePort, (iel == IDX_EVENT_LOOP_MAIN)) == C_ERR) { + serverLog(LL_WARNING, "Failed listening on port %u (TCP), aborting.", g_pserver->port); + exit(1); + } + if (g_pserver->tls_port != 0 && + listenToPort(g_pserver->tls_port,&g_pserver->rgthreadvar[iel].tlsfd, fReusePort, (iel == IDX_EVENT_LOOP_MAIN)) == C_ERR) { + serverLog(LL_WARNING, "Failed listening on port %u (TLS), aborting.", g_pserver->port); + exit(1); + } + } + else + { + // We use the main threads file descriptors + memcpy(&g_pserver->rgthreadvar[iel].ipfd, &g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].ipfd, sizeof(socketFds)); + g_pserver->rgthreadvar[iel].ipfd.count = g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].ipfd.count; + } + + /* Create an event handler for accepting new connections in TCP */ + for (int j = 0; j < g_pserver->rgthreadvar[iel].ipfd.count; j++) { + if (aeCreateFileEvent(g_pserver->rgthreadvar[iel].el, g_pserver->rgthreadvar[iel].ipfd.fd[j], AE_READABLE|AE_READ_THREADSAFE, + acceptTcpHandler,NULL) == AE_ERR) + { + serverPanic( + "Unrecoverable error creating g_pserver->ipfd file event."); + } + } + + makeThreadKillable(); + + for (int j = 0; j < g_pserver->rgthreadvar[iel].tlsfd.count; j++) { + if (aeCreateFileEvent(g_pserver->rgthreadvar[iel].el, g_pserver->rgthreadvar[iel].tlsfd.fd[j], AE_READABLE|AE_READ_THREADSAFE, + acceptTLSHandler,NULL) == AE_ERR) + { + serverPanic( + "Unrecoverable error creating g_pserver->tlsfd file event."); + } + } +} + +static void initNetworking(int fReusePort) +{ + // We only initialize the main thread here, since RDB load is a special case that processes + // clients before our server threads are launched. + initNetworkingThread(IDX_EVENT_LOOP_MAIN, fReusePort); + + /* Open the listening Unix domain socket. */ + if (g_pserver->unixsocket != NULL) { + unlink(g_pserver->unixsocket); /* don't care if this fails */ + g_pserver->sofd = anetUnixServer(serverTL->neterr,g_pserver->unixsocket, + g_pserver->unixsocketperm, g_pserver->tcp_backlog); + if (g_pserver->sofd == ANET_ERR) { + serverLog(LL_WARNING, "Opening Unix socket: %s", serverTL->neterr); + exit(1); + } + anetNonBlock(NULL,g_pserver->sofd); + } + + /* Abort if there are no listening sockets at all. */ + if (g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].ipfd.count == 0 && g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].tlsfd.count == 0 && g_pserver->sofd < 0) { + serverLog(LL_WARNING, "Configured to not listen anywhere, exiting."); + exit(1); + } + + if (g_pserver->sofd > 0 && aeCreateFileEvent(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el,g_pserver->sofd,AE_READABLE|AE_READ_THREADSAFE, + acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating g_pserver->sofd file event."); +} + +static void initServerThread(struct redisServerThreadVars *pvar, int fMain) +{ + pvar->unblocked_clients = listCreate(); + pvar->clients_pending_asyncwrite = listCreate(); + pvar->ipfd.count = 0; + pvar->tlsfd.count = 0; + pvar->cclients = 0; + pvar->in_eval = 0; + pvar->in_exec = 0; + pvar->el = aeCreateEventLoop(g_pserver->maxclients+CONFIG_FDSET_INCR); + pvar->current_client = nullptr; + pvar->fRetrySetAofEvent = false; + if (pvar->el == NULL) { + serverLog(LL_WARNING, + "Failed creating the event loop. Error message: '%s'", + strerror(errno)); + exit(1); + } + aeSetBeforeSleepProc(pvar->el, beforeSleep, AE_SLEEP_THREADSAFE); + aeSetAfterSleepProc(pvar->el, afterSleep, AE_SLEEP_THREADSAFE); + + fastlock_init(&pvar->lockPendingWrite, "lockPendingWrite"); + + if (!fMain) + { + if (aeCreateTimeEvent(pvar->el, 1, serverCronLite, NULL, NULL) == AE_ERR) { + serverPanic("Can't create event loop timers."); + exit(1); + } + } + + /* Register a readable event for the pipe used to awake the event loop + * when a blocked client in a module needs attention. */ + if (aeCreateFileEvent(pvar->el, g_pserver->module_blocked_pipe[0], AE_READABLE, + moduleBlockedClientPipeReadable,NULL) == AE_ERR) { + serverPanic( + "Error registering the readable event for the module " + "blocked clients subsystem."); + } +} + +void initServer(void) { + signal(SIGHUP, SIG_IGN); + signal(SIGPIPE, SIG_IGN); + setupSignalHandlers(); + makeThreadKillable(); + + zfree(g_pserver->db); // initServerConfig created a dummy array, free that now + g_pserver->db = (redisDb**)zmalloc(sizeof(redisDb*)*cserver.dbnum, MALLOC_LOCAL); + + /* Create the Redis databases, and initialize other internal state. */ + if (g_pserver->m_pstorageFactory == nullptr) { + for (int j = 0; j < cserver.dbnum; j++) { + g_pserver->db[j] = new (MALLOC_LOCAL) redisDb(); + g_pserver->db[j]->initialize(j); + } + } else { + // Read FLASH metadata and load the appropriate storage dbid into each databse index, as each DB index can have different storage dbid mapped due to the swapdb command. + g_pserver->metadataDb = g_pserver->m_pstorageFactory->createMetadataDb(); + for (int idb = 0; idb < cserver.dbnum; ++idb) + { + int storage_dbid = idb; + std::string dbid_key = "db-" + std::to_string(idb); + g_pserver->metadataDb->retrieve(dbid_key.c_str(), dbid_key.length(), [&](const char *, size_t, const void *data, size_t){ + storage_dbid = *(int*)data; + }); + + g_pserver->db[idb] = new (MALLOC_LOCAL) redisDb(); + g_pserver->db[idb]->initialize(idb, storage_dbid); + } + } + + for (int i = 0; i < MAX_EVENT_LOOPS; ++i) + { + g_pserver->rgthreadvar[i].rgdbSnapshot = (const redisDbPersistentDataSnapshot**)zcalloc(sizeof(redisDbPersistentDataSnapshot*)*cserver.dbnum, MALLOC_LOCAL); + serverAssert(g_pserver->rgthreadvar[i].rgdbSnapshot != nullptr); + } + g_pserver->modulethreadvar.rgdbSnapshot = (const redisDbPersistentDataSnapshot**)zcalloc(sizeof(redisDbPersistentDataSnapshot*)*cserver.dbnum, MALLOC_LOCAL); + serverAssert(g_pserver->modulethreadvar.rgdbSnapshot != nullptr); + + serverAssert(g_pserver->rgthreadvar[0].rgdbSnapshot != nullptr); + + /* Fixup Master Client Database */ + listIter li; + listNode *ln; + listRewind(g_pserver->masters, &li); + while ((ln = listNext(&li))) + { + redisMaster *mi = (redisMaster*)listNodeValue(ln); + serverAssert(mi->master == nullptr); + if (mi->cached_master != nullptr) + selectDb(mi->cached_master, 0); + } + + g_pserver->aof_state = g_pserver->aof_enabled ? AOF_ON : AOF_OFF; + g_pserver->hz = g_pserver->config_hz; + cserver.pid = getpid(); + g_pserver->in_fork_child = CHILD_TYPE_NONE; + cserver.main_thread_id = pthread_self(); + g_pserver->errors = raxNew(); + g_pserver->clients_index = raxNew(); + g_pserver->clients_to_close = listCreate(); + g_pserver->replicaseldb = -1; /* Force to emit the first SELECT command. */ + g_pserver->ready_keys = listCreate(); + g_pserver->clients_waiting_acks = listCreate(); + g_pserver->get_ack_from_slaves = 0; + cserver.system_memory_size = zmalloc_get_memory_size(); + g_pserver->paused_clients = listCreate(); + g_pserver->events_processed_while_blocked = 0; + g_pserver->blocked_last_cron = 0; + g_pserver->replication_allowed = 1; + g_pserver->blocking_op_nesting = 0; + g_pserver->rdb_pipe_read = -1; + g_pserver->client_pause_type = CLIENT_PAUSE_OFF; + + + if ((g_pserver->tls_port || g_pserver->tls_replication || g_pserver->tls_cluster) + && tlsConfigure(&g_pserver->tls_ctx_config) == C_ERR) { + serverLog(LL_WARNING, "Failed to configure TLS. Check logs for more info."); + exit(1); + } + + createSharedObjects(); + adjustOpenFilesLimit(); + const char *clk_msg = monotonicInit(); + serverLog(LL_NOTICE, "monotonic clock: %s", clk_msg); + + evictionPoolAlloc(); /* Initialize the LRU keys pool. */ + g_pserver->pubsub_channels = dictCreate(&keylistDictType,NULL); + g_pserver->pubsub_patterns = dictCreate(&keylistDictType,NULL); + g_pserver->cronloops = 0; + g_pserver->child_pid = -1; + g_pserver->child_type = CHILD_TYPE_NONE; + g_pserver->rdbThreadVars.fRdbThreadCancel = false; + g_pserver->rdb_child_type = RDB_CHILD_TYPE_NONE; + g_pserver->rdb_pipe_conns = NULL; + g_pserver->rdb_pipe_numconns = 0; + g_pserver->rdb_pipe_numconns_writing = 0; + g_pserver->rdb_pipe_buff = NULL; + g_pserver->rdb_pipe_bufflen = 0; + g_pserver->rdb_bgsave_scheduled = 0; + g_pserver->child_info_pipe[0] = -1; + g_pserver->child_info_pipe[1] = -1; + g_pserver->child_info_nread = 0; + aofRewriteBufferReset(); + g_pserver->aof_buf = sdsempty(); + g_pserver->lastsave = time(NULL); /* At startup we consider the DB saved. */ + g_pserver->lastbgsave_try = 0; /* At startup we never tried to BGSAVE. */ + g_pserver->rdb_save_time_last = -1; + g_pserver->rdb_save_time_start = -1; + g_pserver->dirty = 0; + resetServerStats(); + /* A few stats we don't want to reset: server startup time, and peak mem. */ + cserver.stat_starttime = time(NULL); + g_pserver->stat_peak_memory = 0; + g_pserver->stat_current_cow_bytes = 0; + g_pserver->stat_current_cow_updated = 0; + g_pserver->stat_current_save_keys_processed = 0; + g_pserver->stat_current_save_keys_total = 0; + g_pserver->stat_rdb_cow_bytes = 0; + g_pserver->stat_aof_cow_bytes = 0; + g_pserver->stat_module_cow_bytes = 0; + g_pserver->stat_module_progress = 0; + for (int j = 0; j < CLIENT_TYPE_COUNT; j++) + g_pserver->stat_clients_type_memory[j] = 0; + g_pserver->cron_malloc_stats.zmalloc_used = 0; + g_pserver->cron_malloc_stats.process_rss = 0; + g_pserver->cron_malloc_stats.allocator_allocated = 0; + g_pserver->cron_malloc_stats.allocator_active = 0; + g_pserver->cron_malloc_stats.allocator_resident = 0; + g_pserver->cron_malloc_stats.sys_available = 0; + g_pserver->cron_malloc_stats.sys_total = g_pserver->force_eviction_percent ? getMemTotal() : 0; + g_pserver->lastbgsave_status = C_OK; + g_pserver->aof_last_write_status = C_OK; + g_pserver->aof_last_write_errno = 0; + g_pserver->repl_good_slaves_count = 0; + + g_pserver->mvcc_tstamp = 0; + + + /* Create the timer callback, this is our way to process many background + * operations incrementally, like clients timeout, eviction of unaccessed + * expired keys and so forth. */ + if (aeCreateTimeEvent(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el, 1, serverCron, NULL, NULL) == AE_ERR) { + serverPanic("Can't create event loop timers."); + exit(1); + } + + /* Open the AOF file if needed. */ + if (g_pserver->aof_state == AOF_ON) { + g_pserver->aof_fd = open(g_pserver->aof_filename, + O_WRONLY|O_APPEND|O_CREAT,0644); + if (g_pserver->aof_fd == -1) { + serverLog(LL_WARNING, "Can't open the append-only file: %s", + strerror(errno)); + exit(1); + } + } + + /* 32 bit instances are limited to 4GB of address space, so if there is + * no explicit limit in the user provided configuration we set a limit + * at 3 GB using maxmemory with 'noeviction' policy'. This avoids + * useless crashes of the Redis instance for out of memory. */ + if (sizeof(void*) == 4 && g_pserver->maxmemory == 0) { + serverLog(LL_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now."); + g_pserver->maxmemory = 3072LL*(1024*1024); /* 3 GB */ + g_pserver->maxmemory_policy = MAXMEMORY_NO_EVICTION; + } + + /* Generate UUID */ + static_assert(sizeof(uuid_t) == sizeof(cserver.uuid), "UUIDs are standardized at 16-bytes"); + uuid_generate((unsigned char*)cserver.uuid); + + if (g_pserver->cluster_enabled) clusterInit(); + replicationScriptCacheInit(); + scriptingInit(1); + slowlogInit(); + latencyMonitorInit(); + + if (g_pserver->m_pstorageFactory) { + if (g_pserver->metadataDb) { + g_pserver->metadataDb->retrieve("repl-id", 7, [&](const char *, size_t, const void *data, size_t cb){ + if (cb == sizeof(g_pserver->replid)) { + memcpy(g_pserver->replid, data, cb); + } + }); + g_pserver->metadataDb->retrieve("repl-offset", 11, [&](const char *, size_t, const void *data, size_t cb){ + if (cb == sizeof(g_pserver->master_repl_offset)) { + g_pserver->master_repl_offset = *(long long*)data; + } + }); + + int repl_stream_db = -1; + g_pserver->metadataDb->retrieve("repl-stream-db", 14, [&](const char *, size_t, const void *data, size_t){ + repl_stream_db = *(int*)data; + }); + + /* !!! AFTER THIS POINT WE CAN NO LONGER READ FROM THE META DB AS IT WILL BE OVERWRITTEN !!! */ + // replicationCacheMasterUsingMyself triggers the overwrite + + listIter li; + listNode *ln; + listRewind(g_pserver->masters, &li); + while ((ln = listNext(&li))) + { + redisMaster *mi = (redisMaster*)listNodeValue(ln); + /* If we are a replica, create a cached master from this + * information, in order to allow partial resynchronizations + * with masters. */ + replicationCacheMasterUsingMyself(mi); + selectDb(mi->cached_master, repl_stream_db); + } + } + } + + saveMasterStatusToStorage(false); // eliminate the repl-offset field + + /* Initialize ACL default password if it exists */ + ACLUpdateDefaultUserPassword(g_pserver->requirepass); +} + +/* Some steps in server initialization need to be done last (after modules + * are loaded). + * Specifically, creation of threads due to a race bug in ld.so, in which + * Thread Local Storage initialization collides with dlopen call. + * see: https://sourceware.org/bugzilla/show_bug.cgi?id=19329 */ +void InitServerLast() { + + /* We have to initialize storage providers after the cluster has been initialized */ + moduleFireServerEvent(REDISMODULE_EVENT_LOADING, REDISMODULE_SUBEVENT_LOADING_FLASH_START, NULL); + for (int idb = 0; idb < cserver.dbnum; ++idb) + { + g_pserver->db[idb]->storageProviderInitialize(); + } + moduleFireServerEvent(REDISMODULE_EVENT_LOADING, REDISMODULE_SUBEVENT_LOADING_ENDED, NULL); + + bioInit(); + set_jemalloc_bg_thread(cserver.jemalloc_bg_thread); + g_pserver->initial_memory_usage = zmalloc_used_memory(); + + g_pserver->asyncworkqueue = new (MALLOC_LOCAL) AsyncWorkQueue(cserver.cthreads); + + // Allocate the repl backlog + +} + +/* Parse the flags string description 'strflags' and set them to the + * command 'c'. If the flags are all valid C_OK is returned, otherwise + * C_ERR is returned (yet the recognized flags are set in the command). */ +int populateCommandTableParseFlags(struct redisCommand *c, const char *strflags) { + int argc; + sds *argv; + + /* Split the line into arguments for processing. */ + argv = sdssplitargs(strflags,&argc); + if (argv == NULL) return C_ERR; + + for (int j = 0; j < argc; j++) { + char *flag = argv[j]; + if (!strcasecmp(flag,"write")) { + c->flags |= CMD_WRITE|CMD_CATEGORY_WRITE; + } else if (!strcasecmp(flag,"read-only")) { + c->flags |= CMD_READONLY|CMD_CATEGORY_READ; + } else if (!strcasecmp(flag,"use-memory")) { + c->flags |= CMD_DENYOOM; + } else if (!strcasecmp(flag,"admin")) { + c->flags |= CMD_ADMIN|CMD_CATEGORY_ADMIN|CMD_CATEGORY_DANGEROUS; + } else if (!strcasecmp(flag,"pub-sub")) { + c->flags |= CMD_PUBSUB|CMD_CATEGORY_PUBSUB; + } else if (!strcasecmp(flag,"no-script")) { + c->flags |= CMD_NOSCRIPT; + } else if (!strcasecmp(flag,"random")) { + c->flags |= CMD_RANDOM; + } else if (!strcasecmp(flag,"to-sort")) { + c->flags |= CMD_SORT_FOR_SCRIPT; + } else if (!strcasecmp(flag,"ok-loading")) { + c->flags |= CMD_LOADING; + } else if (!strcasecmp(flag,"ok-stale")) { + c->flags |= CMD_STALE; + } else if (!strcasecmp(flag,"no-monitor")) { + c->flags |= CMD_SKIP_MONITOR; + } else if (!strcasecmp(flag,"no-slowlog")) { + c->flags |= CMD_SKIP_SLOWLOG; + } else if (!strcasecmp(flag,"cluster-asking")) { + c->flags |= CMD_ASKING; + } else if (!strcasecmp(flag,"fast")) { + c->flags |= CMD_FAST | CMD_CATEGORY_FAST; + } else if (!strcasecmp(flag,"noprop")) { + c->flags |= CMD_SKIP_PROPOGATE; + } else if (!strcasecmp(flag,"no-auth")) { + c->flags |= CMD_NO_AUTH; + } else if (!strcasecmp(flag,"may-replicate")) { + c->flags |= CMD_MAY_REPLICATE; + } else if (!strcasecmp(flag,"async")) { + c->flags |= CMD_ASYNC_OK; + } else { + /* Parse ACL categories here if the flag name starts with @. */ + uint64_t catflag; + if (flag[0] == '@' && + (catflag = ACLGetCommandCategoryFlagByName(flag+1)) != 0) + { + c->flags |= catflag; + } else { + sdsfreesplitres(argv,argc); + return C_ERR; + } + } + } + /* If it's not @fast is @slow in this binary world. */ + if (!(c->flags & CMD_CATEGORY_FAST)) c->flags |= CMD_CATEGORY_SLOW; + + sdsfreesplitres(argv,argc); + return C_OK; +} + +/* Populates the KeyDB Command Table starting from the hard coded list + * we have on top of server.cpp file. */ +void populateCommandTable(void) { + int j; + int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand); + + for (j = 0; j < numcommands; j++) { + struct redisCommand *c = redisCommandTable+j; + int retval1, retval2; + + /* Translate the command string flags description into an actual + * set of flags. */ + if (populateCommandTableParseFlags(c,c->sflags) == C_ERR) + serverPanic("Unsupported command flag"); + + c->id = ACLGetCommandID(c->name); /* Assign the ID used for ACL. */ + retval1 = dictAdd(g_pserver->commands, sdsnew(c->name), c); + /* Populate an additional dictionary that will be unaffected + * by rename-command statements in futriix.conf. */ + retval2 = dictAdd(g_pserver->orig_commands, sdsnew(c->name), c); + serverAssert(retval1 == DICT_OK && retval2 == DICT_OK); + } +} + +void resetCommandTableStats(void) { + struct redisCommand *c; + dictEntry *de; + dictIterator *di; + + di = dictGetSafeIterator(g_pserver->commands); + while((de = dictNext(di)) != NULL) { + c = (struct redisCommand *) dictGetVal(de); + c->microseconds = 0; + c->calls = 0; + c->rejected_calls = 0; + c->failed_calls = 0; + } + dictReleaseIterator(di); + +} + +static void zfree_noconst(void *p) { + zfree(p); +} + +void fuzzOutOfMemoryHandler(size_t allocation_size) { + serverLog(LL_WARNING,"Out Of Memory allocating %zu bytes!", + allocation_size); + exit(EXIT_FAILURE); // don't crash because it causes false positives +} + +void resetErrorTableStats(void) { + raxFreeWithCallback(g_pserver->errors, zfree_noconst); + g_pserver->errors = raxNew(); +} + +/* ========================== Redis OP Array API ============================ */ + +void redisOpArrayInit(redisOpArray *oa) { + oa->ops = NULL; + oa->numops = 0; +} + +int redisOpArrayAppend(redisOpArray *oa, struct redisCommand *cmd, int dbid, + robj **argv, int argc, int target) +{ + redisOp *op; + + oa->ops = (redisOp*)zrealloc(oa->ops,sizeof(redisOp)*(oa->numops+1), MALLOC_LOCAL); + op = oa->ops+oa->numops; + op->cmd = cmd; + op->dbid = dbid; + op->argv = argv; + op->argc = argc; + op->target = target; + oa->numops++; + return oa->numops; +} + +void redisOpArrayFree(redisOpArray *oa) { + while(oa->numops) { + int j; + redisOp *op; + + oa->numops--; + op = oa->ops+oa->numops; + for (j = 0; j < op->argc; j++) + decrRefCount(op->argv[j]); + zfree(op->argv); + } + zfree(oa->ops); + oa->ops = NULL; +} + +/* ====================== Commands lookup and execution ===================== */ + +struct redisCommand *lookupCommand(sds name) { + return (struct redisCommand*)dictFetchValue(g_pserver->commands, name); +} + +struct redisCommand *lookupCommandByCString(const char *s) { + struct redisCommand *cmd; + sds name = sdsnew(s); + + cmd = (struct redisCommand*)dictFetchValue(g_pserver->commands, name); + sdsfree(name); + return cmd; +} + +/* Lookup the command in the current table, if not found also check in + * the original table containing the original command names unaffected by + * futriix.conf rename-command statement. + * + * This is used by functions rewriting the argument vector such as + * rewriteClientCommandVector() in order to set client->cmd pointer + * correctly even if the command was renamed. */ +struct redisCommand *lookupCommandOrOriginal(sds name) { + struct redisCommand *cmd = (struct redisCommand*)dictFetchValue(g_pserver->commands, name); + + if (!cmd) cmd = (struct redisCommand*)dictFetchValue(g_pserver->orig_commands,name); + return cmd; +} + +/* Propagate the specified command (in the context of the specified database id) + * to AOF and Slaves. + * + * flags are an xor between: + * + PROPAGATE_NONE (no propagation of command at all) + * + PROPAGATE_AOF (propagate into the AOF file if is enabled) + * + PROPAGATE_REPL (propagate into the replication link) + * + * This should not be used inside commands implementation since it will not + * wrap the resulting commands in MULTI/EXEC. Use instead alsoPropagate(), + * preventCommandPropagation(), forceCommandPropagation(). + * + * However for functions that need to (also) propagate out of the context of a + * command execution, for example when serving a blocked client, you + * want to use propagate(). + */ +void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, + int flags) +{ + serverAssert(GlobalLocksAcquired()); + if (!g_pserver->replication_allowed) + return; + + /* Propagate a MULTI request once we encounter the first command which + * is a write command. + * This way we'll deliver the MULTI/..../EXEC block as a whole and + * both the AOF and the replication link will have the same consistency + * and atomicity guarantees. */ + if (serverTL->in_exec && !serverTL->propagate_in_transaction) + execCommandPropagateMulti(dbid); + + /* This needs to be unreachable since the dataset should be fixed during + * client pause, otherwise data may be lossed during a failover. */ + serverAssert(!(areClientsPaused() && !serverTL->client_pause_in_transaction)); + + if (g_pserver->aof_state != AOF_OFF && flags & PROPAGATE_AOF) + feedAppendOnlyFile(cmd,dbid,argv,argc); + if (flags & PROPAGATE_REPL) + replicationFeedSlaves(g_pserver->slaves,dbid,argv,argc); +} + +/* Used inside commands to schedule the propagation of additional commands + * after the current command is propagated to AOF / Replication. + * + * 'cmd' must be a pointer to the Redis command to replicate, dbid is the + * database ID the command should be propagated into. + * Arguments of the command to propagate are passed as an array of redis + * objects pointers of len 'argc', using the 'argv' vector. + * + * The function does not take a reference to the passed 'argv' vector, + * so it is up to the caller to release the passed argv (but it is usually + * stack allocated). The function automatically increments ref count of + * passed objects, so the caller does not need to. */ +void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, + int target) +{ + robj **argvcopy; + int j; + + if (g_pserver->loading) return; /* No propagation during loading. */ + + argvcopy = (robj**)zmalloc(sizeof(robj*)*argc, MALLOC_LOCAL); + for (j = 0; j < argc; j++) { + argvcopy[j] = argv[j]; + incrRefCount(argv[j]); + } + redisOpArrayAppend(&g_pserver->also_propagate,cmd,dbid,argvcopy,argc,target); +} + +/* It is possible to call the function forceCommandPropagation() inside a + * Redis command implementation in order to to force the propagation of a + * specific command execution into AOF / Replication. */ +void forceCommandPropagation(client *c, int flags) { + serverAssert(c->cmd->flags & (CMD_WRITE | CMD_MAY_REPLICATE)); + if (flags & PROPAGATE_REPL) c->flags |= CLIENT_FORCE_REPL; + if (flags & PROPAGATE_AOF) c->flags |= CLIENT_FORCE_AOF; +} + +/* Avoid that the executed command is propagated at all. This way we + * are free to just propagate what we want using the alsoPropagate() + * API. */ +void preventCommandPropagation(client *c) { + c->flags |= CLIENT_PREVENT_PROP; +} + +/* AOF specific version of preventCommandPropagation(). */ +void preventCommandAOF(client *c) { + c->flags |= CLIENT_PREVENT_AOF_PROP; +} + +/* Replication specific version of preventCommandPropagation(). */ +void preventCommandReplication(client *c) { + c->flags |= CLIENT_PREVENT_REPL_PROP; +} + +/* Log the last command a client executed into the slowlog. */ +void slowlogPushCurrentCommand(client *c, struct redisCommand *cmd, ustime_t duration) { + /* Some commands may contain sensitive data that should not be available in the slowlog. */ + if (cmd->flags & CMD_SKIP_SLOWLOG) + return; + + /* If command argument vector was rewritten, use the original + * arguments. */ + robj **argv = c->original_argv ? c->original_argv : c->argv; + int argc = c->original_argv ? c->original_argc : c->argc; + slowlogPushEntryIfNeeded(c,argv,argc,duration); +} + +/* Call() is the core of Redis execution of a command. + * + * The following flags can be passed: + * CMD_CALL_NONE No flags. + * CMD_CALL_SLOWLOG Check command speed and log in the slow log if needed. + * CMD_CALL_STATS Populate command stats. + * CMD_CALL_PROPAGATE_AOF Append command to AOF if it modified the dataset + * or if the client flags are forcing propagation. + * CMD_CALL_PROPAGATE_REPL Send command to slaves if it modified the dataset + * or if the client flags are forcing propagation. + * CMD_CALL_PROPAGATE Alias for PROPAGATE_AOF|PROPAGATE_REPL. + * CMD_CALL_FULL Alias for SLOWLOG|STATS|PROPAGATE. + * + * The exact propagation behavior depends on the client flags. + * Specifically: + * + * 1. If the client flags CLIENT_FORCE_AOF or CLIENT_FORCE_REPL are set + * and assuming the corresponding CMD_CALL_PROPAGATE_AOF/REPL is set + * in the call flags, then the command is propagated even if the + * dataset was not affected by the command. + * 2. If the client flags CLIENT_PREVENT_REPL_PROP or CLIENT_PREVENT_AOF_PROP + * are set, the propagation into AOF or to slaves is not performed even + * if the command modified the dataset. + * + * Note that regardless of the client flags, if CMD_CALL_PROPAGATE_AOF + * or CMD_CALL_PROPAGATE_REPL are not set, then respectively AOF or + * slaves propagation will never occur. + * + * Client flags are modified by the implementation of a given command + * using the following API: + * + * forceCommandPropagation(client *c, int flags); + * preventCommandPropagation(client *c); + * preventCommandAOF(client *c); + * preventCommandReplication(client *c); + * + */ +void call(client *c, int flags) { + long long dirty; + monotime call_timer; + int client_old_flags = c->flags; + struct redisCommand *real_cmd = c->cmd; + serverAssert(((flags & CMD_CALL_ASYNC) && (c->cmd->flags & CMD_READONLY)) || GlobalLocksAcquired()); + + /* We need to transfer async writes before a client's repl state gets changed. Otherwise + we won't be able to propogate them correctly. */ + if (c->cmd->flags & CMD_CATEGORY_REPLICATION) { + flushReplBacklogToClients(); + ProcessPendingAsyncWrites(); + } + + /* Initialization: clear the flags that must be set by the command on + * demand, and initialize the array for additional commands propagation. */ + c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP); + redisOpArray prev_also_propagate; + if (!(flags & CMD_CALL_ASYNC)) { + prev_also_propagate = g_pserver->also_propagate; + redisOpArrayInit(&g_pserver->also_propagate); + } + + /* Call the command. */ + dirty = g_pserver->dirty; + serverTL->prev_err_count = serverTL->stat_total_error_replies; + g_pserver->fixed_time_expire++; + incrementMvccTstamp(); + elapsedStart(&call_timer); + try { + c->cmd->proc(c); + } catch (robj_roptr o) { + addReply(c, o); + } catch (robj *o) { + addReply(c, o); + } catch (const char *sz) { + addReplyError(c, sz); + } + serverTL->commandsExecuted++; + const long duration = elapsedUs(call_timer); + c->duration = duration; + if (flags & CMD_CALL_ASYNC) + dirty = 0; // dirty is bogus in this case as there's no synchronization + else + dirty = g_pserver->dirty-dirty; + if (dirty < 0) dirty = 0; + + if (dirty) + c->mvccCheckpoint = getMvccTstamp(); + + /* Update failed command calls if required. + * We leverage a static variable (prev_err_count) to retain + * the counter across nested function calls and avoid logging + * the same error twice. */ + if ((serverTL->stat_total_error_replies - serverTL->prev_err_count) > 0) { + real_cmd->failed_calls++; + } + + /* After executing command, we will close the client after writing entire + * reply if it is set 'CLIENT_CLOSE_AFTER_COMMAND' flag. */ + if (c->flags & CLIENT_CLOSE_AFTER_COMMAND) { + c->flags &= ~CLIENT_CLOSE_AFTER_COMMAND; + c->flags |= CLIENT_CLOSE_AFTER_REPLY; + } + + /* When EVAL is called loading the AOF we don't want commands called + * from Lua to go into the slowlog or to populate statistics. */ + if (g_pserver->loading && c->flags & CLIENT_LUA) + flags &= ~(CMD_CALL_SLOWLOG | CMD_CALL_STATS); + + /* If the caller is Lua, we want to force the EVAL caller to propagate + * the script if the command flag or client flag are forcing the + * propagation. */ + if (c->flags & CLIENT_LUA && g_pserver->lua_caller) { + if (c->flags & CLIENT_FORCE_REPL) + g_pserver->lua_caller->flags |= CLIENT_FORCE_REPL; + if (c->flags & CLIENT_FORCE_AOF) + g_pserver->lua_caller->flags |= CLIENT_FORCE_AOF; + } + + /* Note: the code below uses the real command that was executed + * c->cmd and c->lastcmd may be different, in case of MULTI-EXEC or + * re-written commands such as EXPIRE, GEOADD, etc. */ + + /* Record the latency this command induced on the main thread. + * unless instructed by the caller not to log. (happens when processing + * a MULTI-EXEC from inside an AOF). */ + if (flags & CMD_CALL_SLOWLOG) { + const char *latency_event = (real_cmd->flags & CMD_FAST) ? + "fast-command" : "command"; + latencyAddSampleIfNeeded(latency_event,duration/1000); + } + + /* Log the command into the Slow log if needed. + * If the client is blocked we will handle slowlog when it is unblocked. */ + if ((flags & CMD_CALL_SLOWLOG) && !(c->flags & CLIENT_BLOCKED)) { + if (duration >= g_pserver->slowlog_log_slower_than) { + AeLocker locker; + locker.arm(c); + slowlogPushCurrentCommand(c, real_cmd, duration); + } + } + + /* Send the command to clients in MONITOR mode if applicable. + * Administrative commands are considered too dangerous to be shown. */ + if (!(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN))) { + robj **argv = c->original_argv ? c->original_argv : c->argv; + int argc = c->original_argv ? c->original_argc : c->argc; + replicationFeedMonitors(c,g_pserver->monitors,c->db->id,argv,argc); + } + + /* Clear the original argv. + * If the client is blocked we will handle slowlog when it is unblocked. */ + if (!(c->flags & CLIENT_BLOCKED)) + freeClientOriginalArgv(c); + + /* populate the per-command statistics that we show in INFO commandstats. */ + if (flags & CMD_CALL_STATS) { + __atomic_fetch_add(&real_cmd->microseconds, duration, __ATOMIC_RELAXED); + __atomic_fetch_add(&real_cmd->calls, 1, __ATOMIC_RELAXED); + } + + /* Propagate the command into the AOF and replication link */ + if (flags & CMD_CALL_PROPAGATE && + (c->flags & CLIENT_PREVENT_PROP) != CLIENT_PREVENT_PROP) + { + int propagate_flags = PROPAGATE_NONE; + + /* Check if the command operated changes in the data set. If so + * set for replication / AOF propagation. */ + if (dirty) propagate_flags |= (PROPAGATE_AOF|PROPAGATE_REPL); + + /* If the client forced AOF / replication of the command, set + * the flags regardless of the command effects on the data set. */ + if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL; + if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF; + + /* However prevent AOF / replication propagation if the command + * implementation called preventCommandPropagation() or similar, + * or if we don't have the call() flags to do so. */ + if (c->flags & CLIENT_PREVENT_REPL_PROP || + !(flags & CMD_CALL_PROPAGATE_REPL)) + propagate_flags &= ~PROPAGATE_REPL; + if (c->flags & CLIENT_PREVENT_AOF_PROP || + !(flags & CMD_CALL_PROPAGATE_AOF)) + propagate_flags &= ~PROPAGATE_AOF; + + if ((c->cmd->flags & CMD_SKIP_PROPOGATE) && g_pserver->fActiveReplica) + propagate_flags &= ~PROPAGATE_REPL; + + /* Call propagate() only if at least one of AOF / replication + * propagation is needed. Note that modules commands handle replication + * in an explicit way, so we never replicate them automatically. */ + if (propagate_flags != PROPAGATE_NONE && !(c->cmd->flags & CMD_MODULE)) + propagate(c->cmd,c->db->id,c->argv,c->argc,propagate_flags); + } + + /* Restore the old replication flags, since call() can be executed + * recursively. */ + c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP); + c->flags |= client_old_flags & + (CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP); + + if (!(flags & CMD_CALL_ASYNC)) { + /* Handle the alsoPropagate() API to handle commands that want to propagate + * multiple separated commands. Note that alsoPropagate() is not affected + * by CLIENT_PREVENT_PROP flag. */ + if (g_pserver->also_propagate.numops) { + int j; + redisOp *rop; + + if (flags & CMD_CALL_PROPAGATE) { + bool multi_emitted = false; + /* Wrap the commands in g_pserver->also_propagate array, + * but don't wrap it if we are already in MULTI context, + * in case the nested MULTI/EXEC. + * + * And if the array contains only one command, no need to + * wrap it, since the single command is atomic. */ + if (g_pserver->also_propagate.numops > 1 && + !(c->cmd->flags & CMD_MODULE) && + !(c->flags & CLIENT_MULTI) && + !(flags & CMD_CALL_NOWRAP)) + { + execCommandPropagateMulti(c->db->id); + multi_emitted = true; + } + + for (j = 0; j < g_pserver->also_propagate.numops; j++) { + rop = &g_pserver->also_propagate.ops[j]; + int target = rop->target; + /* Whatever the command wish is, we honor the call() flags. */ + if (!(flags&CMD_CALL_PROPAGATE_AOF)) target &= ~PROPAGATE_AOF; + if (!(flags&CMD_CALL_PROPAGATE_REPL)) target &= ~PROPAGATE_REPL; + if (target) + propagate(rop->cmd,rop->dbid,rop->argv,rop->argc,target); + } + + if (multi_emitted) { + execCommandPropagateExec(c->db->id); + } + } + redisOpArrayFree(&g_pserver->also_propagate); + } + + g_pserver->also_propagate = prev_also_propagate; + } + + /* Client pause takes effect after a transaction has finished. This needs + * to be located after everything is propagated. */ + if (!serverTL->in_exec && serverTL->client_pause_in_transaction) { + serverTL->client_pause_in_transaction = 0; + } + + /* If the client has keys tracking enabled for client side caching, + * make sure to remember the keys it fetched via this command. */ + if (c->cmd->flags & CMD_READONLY) { + client *caller = (c->flags & CLIENT_LUA && g_pserver->lua_caller) ? + g_pserver->lua_caller : c; + if (caller->flags & CLIENT_TRACKING && + !(caller->flags & CLIENT_TRACKING_BCAST)) + { + trackingRememberKeys(caller); + } + } + + __atomic_fetch_add(&g_pserver->stat_numcommands, 1, __ATOMIC_RELAXED); + serverTL->fixed_time_expire--; + serverTL->prev_err_count = serverTL->stat_total_error_replies; + + if (!(flags & CMD_CALL_ASYNC)) { + /* Record peak memory after each command and before the eviction that runs + * before the next command. */ + size_t zmalloc_used = zmalloc_used_memory(); + if (zmalloc_used > g_pserver->stat_peak_memory) + g_pserver->stat_peak_memory = zmalloc_used; + } +} + +/* Used when a command that is ready for execution needs to be rejected, due to + * varios pre-execution checks. it returns the appropriate error to the client. + * If there's a transaction is flags it as dirty, and if the command is EXEC, + * it aborts the transaction. + * Note: 'reply' is expected to end with \r\n */ +void rejectCommand(client *c, robj *reply, int severity = ERR_CRITICAL) { + flagTransaction(c); + if (c->cmd) c->cmd->rejected_calls++; + if (c->cmd && c->cmd->proc == execCommand) { + execCommandAbort(c, szFromObj(reply)); + } + else { + /* using addReplyError* rather than addReply so that the error can be logged. */ + addReplyErrorObject(c, reply, severity); + } +} + +void lfenceCommand(client *c) { + c->mvccCheckpoint = getMvccTstamp(); + addReply(c, shared.ok); +} + +void rejectCommandFormat(client *c, const char *fmt, ...) { + if (c->cmd) c->cmd->rejected_calls++; + flagTransaction(c); + va_list ap; + va_start(ap,fmt); + sds s = sdscatvprintf(sdsempty(),fmt,ap); + va_end(ap); + /* Make sure there are no newlines in the string, otherwise invalid protocol + * is emitted (The args come from the user, they may contain any character). */ + sdsmapchars(s, "\r\n", " ", 2); + if (c->cmd && c->cmd->proc == execCommand) { + execCommandAbort(c, s); + sdsfree(s); + } else { + /* The following frees 's'. */ + addReplyErrorSds(c, s); + } +} + +/* Returns 1 for commands that may have key names in their arguments, but have + * no pre-determined key positions. */ +static int cmdHasMovableKeys(struct redisCommand *cmd) { + return (cmd->getkeys_proc && !(cmd->flags & CMD_MODULE)) || + cmd->flags & CMD_MODULE_GETKEYS; +} + +/* If this function gets called we already read a whole + * command, arguments are in the client argv/argc fields. + * processCommand() execute the command or prepare the + * server for a bulk read from the client. + * + * If C_OK is returned the client is still alive and valid and + * other operations can be performed by the caller. Otherwise + * if C_ERR is returned the client was destroyed (i.e. after QUIT). */ +int processCommand(client *c, int callFlags) { + AssertCorrectThread(c); + serverAssert((callFlags & CMD_CALL_ASYNC) || GlobalLocksAcquired()); + if (!g_pserver->lua_timedout) { + /* Both EXEC and EVAL call call() directly so there should be + * no way in_exec or in_eval or propagate_in_transaction is 1. + * That is unless lua_timedout, in which case client may run + * some commands. Also possible that some other thread set + * propagate_in_transaction if this is an async command. */ + serverAssert(!serverTL->propagate_in_transaction); + serverAssert(!serverTL->in_exec); + serverAssert(!serverTL->in_eval); + } + + if (moduleHasCommandFilters()) + { + moduleCallCommandFilters(c); + } + + /* The QUIT command is handled separately. Normal command procs will + * go through checking for replication and QUIT will cause trouble + * when FORCE_REPLICATION is enabled and would be implemented in + * a regular command proc. */ + if (!strcasecmp((const char*)ptrFromObj(c->argv[0]),"quit")) { + addReply(c,shared.ok); + c->flags |= CLIENT_CLOSE_AFTER_REPLY; + return C_ERR; + } + + /* Now lookup the command and check ASAP about trivial error conditions + * such as wrong arity, bad command name and so forth. */ + c->cmd = c->lastcmd = lookupCommand((sds)ptrFromObj(c->argv[0])); + if (!c->cmd) { + sds args = sdsempty(); + int i; + for (i=1; i < c->argc && sdslen(args) < 128; i++) + args = sdscatprintf(args, "`%.*s`, ", 128-(int)sdslen(args), (char*)ptrFromObj(c->argv[i])); + rejectCommandFormat(c,"unknown command `%s`, with args beginning with: %s", + (char*)ptrFromObj(c->argv[0]), args); + sdsfree(args); + return C_OK; + } else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) || + (c->argc < -c->cmd->arity)) { + rejectCommandFormat(c,"wrong number of arguments for '%s' command", + c->cmd->name); + return C_OK; + } + + int is_read_command = (c->cmd->flags & CMD_READONLY) || + (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_READONLY)); + int is_write_command = (c->cmd->flags & CMD_WRITE) || + (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE)); + int is_denyoom_command = (c->cmd->flags & CMD_DENYOOM) || + (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_DENYOOM)); + int is_denystale_command = !(c->cmd->flags & CMD_STALE) || + (c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_STALE)); + int is_denyloading_command = !(c->cmd->flags & CMD_LOADING) || + (c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_LOADING)); + int is_may_replicate_command = (c->cmd->flags & (CMD_WRITE | CMD_MAY_REPLICATE)) || + (c->cmd->proc == execCommand && (c->mstate.cmd_flags & (CMD_WRITE | CMD_MAY_REPLICATE))); + + if (authRequired(c)) { + /* AUTH and HELLO and no auth commands are valid even in + * non-authenticated state. */ + if (!(c->cmd->flags & CMD_NO_AUTH)) { + rejectCommand(c,shared.noautherr); + return C_OK; + } + } + + /* Check if the user can run this command according to the current + * ACLs. */ + int acl_errpos; + int acl_retval = ACLCheckAllPerm(c,&acl_errpos); + if (acl_retval != ACL_OK) { + addACLLogEntry(c,acl_retval,acl_errpos,NULL); + switch (acl_retval) { + case ACL_DENIED_CMD: + rejectCommandFormat(c, + "-NOPERM this user has no permissions to run " + "the '%s' command or its subcommand", c->cmd->name); + break; + case ACL_DENIED_KEY: + rejectCommandFormat(c, + "-NOPERM this user has no permissions to access " + "one of the keys used as arguments"); + break; + case ACL_DENIED_CHANNEL: + rejectCommandFormat(c, + "-NOPERM this user has no permissions to access " + "one of the channels used as arguments"); + break; + default: + rejectCommandFormat(c, "no permission"); + break; + } + return C_OK; + } + + /* If cluster is enabled perform the cluster redirection here. + * However we don't perform the redirection if: + * 1) The sender of this command is our master. + * 2) The command has no key arguments. */ + if (g_pserver->cluster_enabled && + !(c->flags & CLIENT_MASTER) && + !(c->flags & CLIENT_LUA && + g_pserver->lua_caller->flags & CLIENT_MASTER) && + !(!cmdHasMovableKeys(c->cmd) && c->cmd->firstkey == 0 && + c->cmd->proc != execCommand)) + { + int hashslot; + int error_code; + clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc, + &hashslot,&error_code); + if (n == NULL || n != g_pserver->cluster->myself) { + if (c->cmd->proc == execCommand) { + discardTransaction(c); + } else { + flagTransaction(c); + } + clusterRedirectClient(c,n,hashslot,error_code); + c->cmd->rejected_calls++; + return C_OK; + } + } + + /* Handle the maxmemory directive. + * + * Note that we do not want to reclaim memory if we are here re-entering + * the event loop since there is a busy Lua script running in timeout + * condition, to avoid mixing the propagation of scripts with the + * propagation of DELs due to eviction. */ + if (g_pserver->maxmemory && !g_pserver->lua_timedout && !(callFlags & CMD_CALL_ASYNC)) { + int out_of_memory = (performEvictions(false /*fPreSnapshot*/) == EVICT_FAIL); + /* freeMemoryIfNeeded may flush replica output buffers. This may result + * into a replica, that may be the active client, to be freed. */ + if (serverTL->current_client == NULL) return C_ERR; + + int reject_cmd_on_oom = is_denyoom_command; + /* If client is in MULTI/EXEC context, queuing may consume an unlimited + * amount of memory, so we want to stop that. + * However, we never want to reject DISCARD, or even EXEC (unless it + * contains denied commands, in which case is_denyoom_command is already + * set. */ + if (c->flags & CLIENT_MULTI && + c->cmd->proc != execCommand && + c->cmd->proc != discardCommand && + c->cmd->proc != resetCommand) { + reject_cmd_on_oom = 1; + } + + if (out_of_memory && reject_cmd_on_oom) { + rejectCommand(c, shared.oomerr); + return C_OK; + } + + /* Save out_of_memory result at script start, otherwise if we check OOM + * until first write within script, memory used by lua stack and + * arguments might interfere. */ + if (c->cmd->proc == evalCommand || c->cmd->proc == evalShaCommand) { + g_pserver->lua_oom = out_of_memory; + } + } + + /* Make sure to use a reasonable amount of memory for client side + * caching metadata. */ + if (g_pserver->tracking_clients) trackingLimitUsedSlots(); + + + /* Don't accept write commands if there are problems persisting on disk + * and if this is a master instance. */ + int deny_write_type = writeCommandsDeniedByDiskError(); + if (deny_write_type != DISK_ERROR_TYPE_NONE && + listLength(g_pserver->masters) == 0 && + (is_write_command ||c->cmd->proc == pingCommand)) + { + if (deny_write_type == DISK_ERROR_TYPE_RDB) + rejectCommand(c, shared.bgsaveerr); + else + rejectCommandFormat(c, + "-MISCONF Errors writing to the AOF file: %s", + strerror(g_pserver->aof_last_write_errno)); + return C_OK; + } + + /* Don't accept write commands if there are not enough good slaves and + * user configured the min-slaves-to-write option. */ + if (listLength(g_pserver->masters) == 0 && + g_pserver->repl_min_slaves_to_write && + g_pserver->repl_min_slaves_max_lag && + is_write_command && + g_pserver->repl_good_slaves_count < g_pserver->repl_min_slaves_to_write) + { + rejectCommand(c, shared.noreplicaserr); + return C_OK; + } + + /* Don't accept write commands if this is a read only replica. But + * accept write commands if this is our master. */ + if (listLength(g_pserver->masters) && g_pserver->repl_slave_ro && + !(c->flags & CLIENT_MASTER) && + is_write_command) + { + rejectCommand(c, shared.roslaveerr); + return C_OK; + } + + /* Only allow a subset of commands in the context of Pub/Sub if the + * connection is in RESP2 mode. With RESP3 there are no limits. */ + if ((c->flags & CLIENT_PUBSUB && c->resp == 2) && + c->cmd->proc != pingCommand && + c->cmd->proc != subscribeCommand && + c->cmd->proc != unsubscribeCommand && + c->cmd->proc != psubscribeCommand && + c->cmd->proc != punsubscribeCommand && + c->cmd->proc != resetCommand) { + rejectCommandFormat(c, + "Can't execute '%s': only (P)SUBSCRIBE / " + "(P)UNSUBSCRIBE / PING / QUIT / RESET are allowed in this context", + c->cmd->name); + return C_OK; + } + + if (listLength(g_pserver->masters)) + { + /* Only allow commands with flag "t", such as INFO, SLAVEOF and so on, + * when replica-serve-stale-data is no and we are a replica with a broken + * link with master. */ + if (FBrokenLinkToMaster() && + g_pserver->repl_serve_stale_data == 0 && + is_denystale_command && + !(g_pserver->fActiveReplica && c->cmd->proc == syncCommand) + && !FInReplicaReplay()) + { + rejectCommand(c, shared.masterdownerr); + return C_OK; + } + } + + /* Loading DB? Return an error if the command has not the + * CMD_LOADING flag. */ + if (g_pserver->loading && is_denyloading_command) { + /* Active Replicas can execute read only commands, and optionally write commands */ + if (!(g_pserver->loading == LOADING_REPLICATION && g_pserver->fActiveReplica && ((c->cmd->flags & CMD_READONLY) || g_pserver->fWriteDuringActiveLoad))) + { + rejectCommand(c, shared.loadingerr, ERR_WARNING); + return C_OK; + } + } + + /* Lua script too slow? Only allow a limited number of commands. + * Note that we need to allow the transactions commands, otherwise clients + * sending a transaction with pipelining without error checking, may have + * the MULTI plus a few initial commands refused, then the timeout + * condition resolves, and the bottom-half of the transaction gets + * executed, see Github PR #7022. */ + if (g_pserver->lua_timedout && + c->cmd->proc != authCommand && + c->cmd->proc != helloCommand && + c->cmd->proc != replconfCommand && + c->cmd->proc != multiCommand && + c->cmd->proc != discardCommand && + c->cmd->proc != watchCommand && + c->cmd->proc != unwatchCommand && + c->cmd->proc != resetCommand && + !(c->cmd->proc == shutdownCommand && + c->argc == 2 && + tolower(((char*)ptrFromObj(c->argv[1]))[0]) == 'n') && + !(c->cmd->proc == scriptCommand && + c->argc == 2 && + tolower(((char*)ptrFromObj(c->argv[1]))[0]) == 'k')) + { + rejectCommand(c, shared.slowscripterr); + return C_OK; + } + + /* Prevent a replica from sending commands that access the keyspace. + * The main objective here is to prevent abuse of client pause check + * from which replicas are exempt. */ + if ((c->flags & CLIENT_SLAVE) && (is_may_replicate_command || is_write_command || is_read_command)) { + rejectCommandFormat(c, "Replica can't interract with the keyspace"); + return C_OK; + } + + /* If the server is paused, block the client until + * the pause has ended. Replicas are never paused. */ + if (!(c->flags & CLIENT_SLAVE) && + ((g_pserver->client_pause_type == CLIENT_PAUSE_ALL) || + (g_pserver->client_pause_type == CLIENT_PAUSE_WRITE && is_may_replicate_command))) + { + c->bpop.timeout = 0; + blockClient(c,BLOCKED_PAUSE); + return C_OK; + } + + /* Exec the command */ + if (c->flags & CLIENT_MULTI && + c->cmd->proc != execCommand && c->cmd->proc != discardCommand && + c->cmd->proc != multiCommand && c->cmd->proc != watchCommand && + c->cmd->proc != resetCommand) + { + queueMultiCommand(c); + addReply(c,shared.queued); + } else { + /* If the command was replication or admin related we *must* flush our buffers first. This is in case + something happens which would modify what we would send to replicas */ + if (c->cmd->flags & (CMD_MODULE | CMD_ADMIN)) + flushReplBacklogToClients(); + + if (c->flags & CLIENT_AUDIT_LOGGING){ + getKeysResult result = GETKEYS_RESULT_INIT; + int numkeys = getKeysFromCommand(c->cmd, c->argv, c->argc, &result); + int *keyindex = result.keys; + + sds str = sdsempty(); + for (int j = 0; j < numkeys; j++) { + str = sdscatsds(str, (sds)ptrFromObj(c->argv[keyindex[j]])); + str = sdscat(str, " "); + } + + if (numkeys > 0) + { + serverLog(LL_NOTICE, "Audit Log: %s, cmd %s, keys: %s", c->fprint, c->cmd->name, str); + } else { + serverLog(LL_NOTICE, "Audit Log: %s, cmd %s", c->fprint, c->cmd->name); + } + sdsfree(str); + } + + call(c,callFlags); + c->woff = g_pserver->master_repl_offset; + + if (c->cmd->flags & (CMD_MODULE | CMD_ADMIN)) + flushReplBacklogToClients(); + + if (listLength(g_pserver->ready_keys)) + handleClientsBlockedOnKeys(); + } + + return C_OK; +} + +bool client::postFunction(std::function fn, bool fLock) { + this->casyncOpsPending++; + return aePostFunction(g_pserver->rgthreadvar[this->iel].el, [this, fn]{ + std::lock_guardlock)> lock(this->lock); + fn(this); + --casyncOpsPending; + }, fLock) == AE_OK; +} + +std::vector clientArgs(client *c) { + std::vector args; + for (int j = 0; j < c->argc; j++) { + args.push_back(robj_sharedptr(c->argv[j])); + } + return args; +} + +bool client::asyncCommand(std::function &)> &&mainFn, + std::function &&postFn) +{ + serverAssert(FCorrectThread(this)); + if (serverTL->in_eval) + return false; // we cannot block clients in EVAL + const redisDbPersistentDataSnapshot *snapshot = nullptr; + if (!(this->flags & (CLIENT_MULTI | CLIENT_BLOCKED))) + snapshot = this->db->createSnapshot(this->mvccCheckpoint, false /* fOptional */); + if (snapshot == nullptr) { + return false; + } + aeEventLoop *el = serverTL->el; + blockClient(this, BLOCKED_ASYNC); + g_pserver->asyncworkqueue->AddWorkFunction([el, this, mainFn, postFn, snapshot] { + std::vector args = clientArgs(this); + aePostFunction(el, [this, mainFn, postFn, snapshot, args] { + aeReleaseLock(); + std::unique_locklock)> lock(this->lock); + AeLocker locker; + locker.arm(this); + unblockClient(this); + mainFn(snapshot, args); + locker.disarm(); + lock.unlock(); + if (postFn) + postFn(snapshot); + this->db->endSnapshotAsync(snapshot); + aeAcquireLock(); + }); + }); + return true; +} + +/* ====================== Error lookup and execution ===================== */ + +void incrementErrorCount(const char *fullerr, size_t namelen) { + struct redisError *error = (struct redisError*)raxFind(g_pserver->errors,(unsigned char*)fullerr,namelen); + if (error == raxNotFound) { + error = (struct redisError*)zmalloc(sizeof(*error)); + error->count = 0; + raxInsert(g_pserver->errors,(unsigned char*)fullerr,namelen,error,NULL); + } + error->count++; +} + +/*================================== Shutdown =============================== */ + +/* Close listening sockets. Also unlink the unix domain socket if + * unlink_unix_socket is non-zero. */ +void closeListeningSockets(int unlink_unix_socket) { + int j; + + for (int iel = 0; iel < cserver.cthreads; ++iel) + { + for (j = 0; j < g_pserver->rgthreadvar[iel].ipfd.count; j++) + close(g_pserver->rgthreadvar[iel].ipfd.fd[j]); + for (j = 0; j < g_pserver->rgthreadvar[iel].tlsfd.count; j++) + close(g_pserver->rgthreadvar[iel].tlsfd.fd[j]); + } + if (g_pserver->sofd != -1) close(g_pserver->sofd); + if (g_pserver->cluster_enabled) + for (j = 0; j < g_pserver->cfd.count; j++) close(g_pserver->cfd.fd[j]); + if (unlink_unix_socket && g_pserver->unixsocket) { + serverLog(LL_NOTICE,"Removing the unix socket file."); + unlink(g_pserver->unixsocket); /* don't care if this fails */ + } +} + +int prepareForShutdown(int flags) { + /* When SHUTDOWN is called while the server is loading a dataset in + * memory we need to make sure no attempt is performed to save + * the dataset on shutdown (otherwise it could overwrite the current DB + * with half-read data). + * + * Also when in Sentinel mode clear the SAVE flag and force NOSAVE. */ + if (g_pserver->loading || g_pserver->sentinel_mode) + flags = (flags & ~SHUTDOWN_SAVE) | SHUTDOWN_NOSAVE; + + int save = flags & SHUTDOWN_SAVE; + int nosave = flags & SHUTDOWN_NOSAVE; + + serverLog(LL_WARNING,"User requested shutdown..."); + if (cserver.supervised_mode == SUPERVISED_SYSTEMD) + redisCommunicateSystemd("STOPPING=1\n"); + + /* Kill all the Lua debugger forked sessions. */ + ldbKillForkedSessions(); + + /* Kill the saving child if there is a background saving in progress. + We want to avoid race conditions, for instance our saving child may + overwrite the synchronous saving did by SHUTDOWN. */ + if (g_pserver->FRdbSaveInProgress()) { + serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!"); + killRDBChild(); + /* Note that, in killRDBChild normally has backgroundSaveDoneHandler + * doing it's cleanup, but in this case this code will not be reached, + * so we need to call rdbRemoveTempFile which will close fd(in order + * to unlink file actully) in background thread. + * The temp rdb file fd may won't be closed when redis exits quickly, + * but OS will close this fd when process exits. */ + rdbRemoveTempFile(g_pserver->rdbThreadVars.tmpfileNum, 0); + } + + /* Kill module child if there is one. */ + if (g_pserver->child_type == CHILD_TYPE_MODULE) { + serverLog(LL_WARNING,"There is a module fork child. Killing it!"); + TerminateModuleForkChild(g_pserver->child_pid,0); + } + + if (g_pserver->aof_state != AOF_OFF) { + /* Kill the AOF saving child as the AOF we already have may be longer + * but contains the full dataset anyway. */ + if (g_pserver->child_type == CHILD_TYPE_AOF) { + /* If we have AOF enabled but haven't written the AOF yet, don't + * shutdown or else the dataset will be lost. */ + if (g_pserver->aof_state == AOF_WAIT_REWRITE) { + serverLog(LL_WARNING, "Writing initial AOF, can't exit."); + return C_ERR; + } + serverLog(LL_WARNING, + "There is a child rewriting the AOF. Killing it!"); + killAppendOnlyChild(); + } + /* Append only file: flush buffers and fsync() the AOF at exit */ + serverLog(LL_NOTICE,"Calling fsync() on the AOF file."); + flushAppendOnlyFile(1); + if (redis_fsync(g_pserver->aof_fd) == -1) { + serverLog(LL_WARNING,"Fail to fsync the AOF file: %s.", + strerror(errno)); + } + } + + /* Create a new RDB file before exiting. */ + if ((g_pserver->saveparamslen > 0 && !nosave) || save) { + serverLog(LL_NOTICE,"Saving the final RDB snapshot before exiting."); + if (cserver.supervised_mode == SUPERVISED_SYSTEMD) + redisCommunicateSystemd("STATUS=Saving the final RDB snapshot\n"); + /* Snapshotting. Perform a SYNC SAVE and exit */ + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + if (rdbSave(nullptr, rsiptr) != C_OK) { + /* Ooops.. error saving! The best we can do is to continue + * operating. Note that if there was a background saving process, + * in the next cron() Redis will be notified that the background + * saving aborted, handling special stuff like slaves pending for + * synchronization... */ + serverLog(LL_WARNING,"Error trying to save the DB, can't exit."); + if (cserver.supervised_mode == SUPERVISED_SYSTEMD) + redisCommunicateSystemd("STATUS=Error trying to save the DB, can't exit.\n"); + return C_ERR; + } + + // Also Dump To FLASH if Applicable + for (int idb = 0; idb < cserver.dbnum; ++idb) { + if (g_pserver->db[idb]->processChanges(false)) + g_pserver->db[idb]->commitChanges(); + } + saveMasterStatusToStorage(true); + } + + /* Fire the shutdown modules event. */ + moduleFireServerEvent(REDISMODULE_EVENT_SHUTDOWN,0,NULL); + + /* Remove the pid file if possible and needed. */ + if (cserver.daemonize || cserver.pidfile) { + serverLog(LL_NOTICE,"Removing the pid file."); + unlink(cserver.pidfile); + } + + if (g_pserver->repl_batch_idxStart >= 0) { + flushReplBacklogToClients(); + g_pserver->repl_batch_offStart = -1; + g_pserver->repl_batch_idxStart = -1; + } + + /* Best effort flush of replica output buffers, so that we hopefully + * send them pending writes. */ + flushSlavesOutputBuffers(); + g_pserver->repl_batch_idxStart = -1; + g_pserver->repl_batch_offStart = -1; + + /* Close the listening sockets. Apparently this allows faster restarts. */ + closeListeningSockets(1); + + if (g_pserver->asyncworkqueue) + { + aeReleaseLock(); + g_pserver->asyncworkqueue->shutdown(); + aeAcquireLock(); + } + + for (int iel = 0; iel < cserver.cthreads; ++iel) + { + aePostFunction(g_pserver->rgthreadvar[iel].el, [iel]{ + g_pserver->rgthreadvar[iel].el->stop = 1; + }); + } + + serverLog(LL_WARNING,"%s is now ready to exit, bye bye...", + g_pserver->sentinel_mode ? "Sentinel" : "futriix"); + + return C_OK; +} + +/*================================== Commands =============================== */ + +/* Sometimes Redis cannot accept write commands because there is a persistence + * error with the RDB or AOF file, and Redis is configured in order to stop + * accepting writes in such situation. This function returns if such a + * condition is active, and the type of the condition. + * + * Function return values: + * + * DISK_ERROR_TYPE_NONE: No problems, we can accept writes. + * DISK_ERROR_TYPE_AOF: Don't accept writes: AOF errors. + * DISK_ERROR_TYPE_RDB: Don't accept writes: RDB errors. + */ +int writeCommandsDeniedByDiskError(void) { + if (g_pserver->stop_writes_on_bgsave_err && + g_pserver->saveparamslen > 0 && + g_pserver->lastbgsave_status == C_ERR) + { + return DISK_ERROR_TYPE_RDB; + } else if (g_pserver->aof_state != AOF_OFF) { + if (g_pserver->aof_last_write_status == C_ERR) { + return DISK_ERROR_TYPE_AOF; + } + /* AOF fsync error. */ + int aof_bio_fsync_status; + atomicGet(g_pserver->aof_bio_fsync_status,aof_bio_fsync_status); + if (aof_bio_fsync_status == C_ERR) { + atomicGet(g_pserver->aof_bio_fsync_errno,g_pserver->aof_last_write_errno); + return DISK_ERROR_TYPE_AOF; + } + } + + return DISK_ERROR_TYPE_NONE; +} + +/* The PING command. It works in a different way if the client is in + * in Pub/Sub mode. */ +void pingCommand(client *c) { + /* The command takes zero or one arguments. */ + if (c->argc > 2) { + addReplyErrorFormat(c,"wrong number of arguments for '%s' command", + c->cmd->name); + return; + } + + if (g_pserver->soft_shutdown && !(c->flags & CLIENT_IGNORE_SOFT_SHUTDOWN)) { + addReplyError(c, "-SHUTDOWN PENDING"); + return; + } + + if (c->flags & CLIENT_PUBSUB && c->resp == 2) { + addReply(c,shared.mbulkhdr[2]); + addReplyBulkCBuffer(c,"pong",4); + if (c->argc == 1) + addReplyBulkCBuffer(c,"",0); + else + addReplyBulk(c,c->argv[1]); + } else { + if (c->argc == 1) + addReply(c,shared.pong); + else + addReplyBulk(c,c->argv[1]); + } +} + +void echoCommand(client *c) { + addReplyBulk(c,c->argv[1]); +} + +void timeCommand(client *c) { + struct timeval tv; + + /* gettimeofday() can only fail if &tv is a bad address so we + * don't check for errors. */ + gettimeofday(&tv,NULL); + addReplyArrayLen(c,2); + addReplyBulkLongLong(c,tv.tv_sec); + addReplyBulkLongLong(c,tv.tv_usec); +} + +/* Helper function for addReplyCommand() to output flags. */ +int addReplyCommandFlag(client *c, struct redisCommand *cmd, int f, const char *reply) { + if (cmd->flags & f) { + addReplyStatus(c, reply); + return 1; + } + return 0; +} + +/* Output the representation of a Redis command. Used by the COMMAND command. */ +void addReplyCommand(client *c, struct redisCommand *cmd) { + if (!cmd) { + addReplyNull(c); + } else { + /* We are adding: command name, arg count, flags, first, last, offset, categories */ + addReplyArrayLen(c, 7); + addReplyBulkCString(c, cmd->name); + addReplyLongLong(c, cmd->arity); + + int flagcount = 0; + void *flaglen = addReplyDeferredLen(c); + flagcount += addReplyCommandFlag(c,cmd,CMD_WRITE, "write"); + flagcount += addReplyCommandFlag(c,cmd,CMD_READONLY, "readonly"); + flagcount += addReplyCommandFlag(c,cmd,CMD_DENYOOM, "denyoom"); + flagcount += addReplyCommandFlag(c,cmd,CMD_ADMIN, "admin"); + flagcount += addReplyCommandFlag(c,cmd,CMD_PUBSUB, "pubsub"); + flagcount += addReplyCommandFlag(c,cmd,CMD_NOSCRIPT, "noscript"); + flagcount += addReplyCommandFlag(c,cmd,CMD_RANDOM, "random"); + flagcount += addReplyCommandFlag(c,cmd,CMD_SORT_FOR_SCRIPT,"sort_for_script"); + flagcount += addReplyCommandFlag(c,cmd,CMD_LOADING, "loading"); + flagcount += addReplyCommandFlag(c,cmd,CMD_STALE, "stale"); + flagcount += addReplyCommandFlag(c,cmd,CMD_SKIP_MONITOR, "skip_monitor"); + flagcount += addReplyCommandFlag(c,cmd,CMD_SKIP_SLOWLOG, "skip_slowlog"); + flagcount += addReplyCommandFlag(c,cmd,CMD_ASKING, "asking"); + flagcount += addReplyCommandFlag(c,cmd,CMD_FAST, "fast"); + flagcount += addReplyCommandFlag(c,cmd,CMD_NO_AUTH, "no_auth"); + flagcount += addReplyCommandFlag(c,cmd,CMD_MAY_REPLICATE, "may_replicate"); + if (cmdHasMovableKeys(cmd)) { + addReplyStatus(c, "movablekeys"); + flagcount += 1; + } + setDeferredSetLen(c, flaglen, flagcount); + + addReplyLongLong(c, cmd->firstkey); + addReplyLongLong(c, cmd->lastkey); + addReplyLongLong(c, cmd->keystep); + + addReplyCommandCategories(c,cmd); + } +} + +/* COMMAND */ +void commandCommand(client *c) { + dictIterator *di; + dictEntry *de; + + if (c->argc == 2 && !strcasecmp((const char*)ptrFromObj(c->argv[1]),"help")) { + const char *help[] = { +"(no subcommand)", +" Return details about all KeyDB commands.", +"COUNT", +" Return the total number of commands in this KeyDB server.", +"GETKEYS ", +" Return the keys from a full KeyDB command.", +"INFO [ ...]", +" Return details about multiple KeyDB commands.", +NULL + }; + addReplyHelp(c, help); + } else if (c->argc == 1) { + addReplyArrayLen(c, dictSize(g_pserver->commands)); + di = dictGetIterator(g_pserver->commands); + while ((de = dictNext(di)) != NULL) { + addReplyCommand(c, (redisCommand*)dictGetVal(de)); + } + dictReleaseIterator(di); + } else if (!strcasecmp((const char*)ptrFromObj(c->argv[1]), "info")) { + int i; + addReplyArrayLen(c, c->argc-2); + for (i = 2; i < c->argc; i++) { + addReplyCommand(c, (redisCommand*)dictFetchValue(g_pserver->commands, ptrFromObj(c->argv[i]))); + } + } else if (!strcasecmp((const char*)ptrFromObj(c->argv[1]), "count") && c->argc == 2) { + addReplyLongLong(c, dictSize(g_pserver->commands)); + } else if (!strcasecmp((const char*)ptrFromObj(c->argv[1]),"getkeys") && c->argc >= 3) { + struct redisCommand *cmd = (redisCommand*)lookupCommand((sds)ptrFromObj(c->argv[2])); + getKeysResult result = GETKEYS_RESULT_INIT; + int j; + + if (!cmd) { + addReplyError(c,"Invalid command specified"); + return; + } else if (cmd->getkeys_proc == NULL && cmd->firstkey == 0) { + addReplyError(c,"The command has no key arguments"); + return; + } else if ((cmd->arity > 0 && cmd->arity != c->argc-2) || + ((c->argc-2) < -cmd->arity)) + { + addReplyError(c,"Invalid number of arguments specified for command"); + return; + } + + if (!getKeysFromCommand(cmd,c->argv+2,c->argc-2,&result)) { + addReplyError(c,"Invalid arguments specified for command"); + } else { + addReplyArrayLen(c,result.numkeys); + for (j = 0; j < result.numkeys; j++) addReplyBulk(c,c->argv[result.keys[j]+2]); + } + getKeysFreeResult(&result); + } else { + addReplySubcommandSyntaxError(c); + } +} + +/* Convert an amount of bytes into a human readable string in the form + * of 100B, 2G, 100M, 4K, and so forth. */ +void bytesToHuman(char *s, unsigned long long n, size_t bufsize) { + double d; + + if (n < 1024) { + /* Bytes */ + snprintf(s,bufsize,"%lluB",n); + } else if (n < (1024*1024)) { + d = (double)n/(1024); + snprintf(s,bufsize,"%.2fK",d); + } else if (n < (1024LL*1024*1024)) { + d = (double)n/(1024*1024); + snprintf(s,bufsize,"%.2fM",d); + } else if (n < (1024LL*1024*1024*1024)) { + d = (double)n/(1024LL*1024*1024); + snprintf(s,bufsize,"%.2fG",d); + } else if (n < (1024LL*1024*1024*1024*1024)) { + d = (double)n/(1024LL*1024*1024*1024); + snprintf(s,bufsize,"%.2fT",d); + } else if (n < (1024LL*1024*1024*1024*1024*1024)) { + d = (double)n/(1024LL*1024*1024*1024*1024); + snprintf(s,bufsize,"%.2fP",d); + } else { + /* Let's hope we never need this */ + snprintf(s,bufsize,"%lluB",n); + } +} + +/* Characters we sanitize on INFO output to maintain expected format. */ +static char unsafe_info_chars[] = "#:\n\r"; +static char unsafe_info_chars_substs[] = "____"; /* Must be same length as above */ + +/* Returns a sanitized version of s that contains no unsafe info string chars. + * If no unsafe characters are found, simply returns s. Caller needs to + * free tmp if it is non-null on return. + */ +const char *getSafeInfoString(const char *s, size_t len, char **tmp) { + *tmp = NULL; + if (mempbrk(s, len, unsafe_info_chars,sizeof(unsafe_info_chars)-1) + == NULL) return s; + char *_new = *tmp = (char*)zmalloc(len + 1); + memcpy(_new, s, len); + _new[len] = '\0'; + return memmapchars(_new, len, unsafe_info_chars, unsafe_info_chars_substs, + sizeof(unsafe_info_chars)-1); +} + +/* Create the string returned by the INFO command. This is decoupled + * by the INFO command itself as we need to report the same information + * on memory corruption problems. */ +sds genRedisInfoString(const char *section) { + sds info = sdsempty(); + time_t uptime = g_pserver->unixtime-cserver.stat_starttime; + int j; + int allsections = 0, defsections = 0, everything = 0, modules = 0; + int sections = 0; + + if (section == NULL) section = "default"; + allsections = strcasecmp(section,"all") == 0; + defsections = strcasecmp(section,"default") == 0; + everything = strcasecmp(section,"everything") == 0; + modules = strcasecmp(section,"modules") == 0; + if (everything) allsections = 1; + + /* Server */ + if (allsections || defsections || !strcasecmp(section,"server")) { + static int call_uname = 1; + static struct utsname name; + const char *mode; + const char *supervised; + + if (g_pserver->cluster_enabled) mode = "cluster"; + else if (g_pserver->sentinel_mode) mode = "sentinel"; + else mode = "standalone"; + + if (cserver.supervised) { + if (cserver.supervised_mode == SUPERVISED_UPSTART) supervised = "upstart"; + else if (cserver.supervised_mode == SUPERVISED_SYSTEMD) supervised = "systemd"; + else supervised = "unknown"; + } else { + supervised = "no"; + } + + if (sections++) info = sdscat(info,"\r\n"); + + if (call_uname) { + /* Uname can be slow and is always the same output. Cache it. */ + uname(&name); + call_uname = 0; + } + + unsigned int lruclock = g_pserver->lruclock.load(); + ustime_t ustime; + __atomic_load(&g_pserver->ustime, &ustime, __ATOMIC_RELAXED); + info = sdscatfmt(info, + "# Server\r\n" + "redis_version:%s\r\n" + "redis_git_sha1:%s\r\n" + "redis_git_dirty:%i\r\n" + "redis_build_id:%s\r\n" + "redis_mode:%s\r\n" + "os:%s %s %s\r\n" + "arch_bits:%i\r\n" + "multiplexing_api:%s\r\n" + "atomicvar_api:%s\r\n" + "gcc_version:%i.%i.%i\r\n" + "process_id:%I\r\n" + "process_supervised:%s\r\n" + "run_id:%s\r\n" + "tcp_port:%i\r\n" + "server_time_usec:%I\r\n" + "uptime_in_seconds:%I\r\n" + "uptime_in_days:%I\r\n" + "hz:%i\r\n" + "configured_hz:%i\r\n" + "lru_clock:%u\r\n" + "executable:%s\r\n" + "config_file:%s\r\n" + "availability_zone:%s\r\n" + "features:%s\r\n", + KEYDB_SET_VERSION, + redisGitSHA1(), + strtol(redisGitDirty(),NULL,10) > 0, + redisBuildIdString(), + mode, + name.sysname, name.release, name.machine, + (int)sizeof(void*)*8, + aeGetApiName(), + REDIS_ATOMIC_API, +#ifdef __GNUC__ + __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__, +#else + 0,0,0, +#endif + (int64_t) getpid(), + supervised, + g_pserver->runid, + g_pserver->port ? g_pserver->port : g_pserver->tls_port, + (int64_t)ustime, + (int64_t)uptime, + (int64_t)(uptime/(3600*24)), + g_pserver->hz.load(), + g_pserver->config_hz, + lruclock, + cserver.executable ? cserver.executable : "", + cserver.configfile ? cserver.configfile : "", + g_pserver->sdsAvailabilityZone, + "cluster_mget"); + } + + /* Clients */ + if (allsections || defsections || !strcasecmp(section,"clients")) { + size_t maxin, maxout; + getExpansiveClientsInfo(&maxin,&maxout); + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Clients\r\n" + "connected_clients:%lu\r\n" + "cluster_connections:%lu\r\n" + "maxclients:%u\r\n" + "client_recent_max_input_buffer:%zu\r\n" + "client_recent_max_output_buffer:%zu\r\n" + "blocked_clients:%d\r\n" + "tracking_clients:%d\r\n" + "clients_in_timeout_table:%" PRIu64 "\r\n" + "current_client_thread:%d\r\n", + listLength(g_pserver->clients)-listLength(g_pserver->slaves), + getClusterConnectionsCount(), + g_pserver->maxclients, + maxin, maxout, + g_pserver->blocked_clients, + g_pserver->tracking_clients, + raxSize(g_pserver->clients_timeout_table), + static_cast(serverTL - g_pserver->rgthreadvar)); + for (int ithread = 0; ithread < cserver.cthreads; ++ithread) + { + info = sdscatprintf(info, + "thread_%d_clients:%d\r\n", + ithread, g_pserver->rgthreadvar[ithread].cclients); + } + } + + /* Memory */ + if (allsections || defsections || !strcasecmp(section,"memory")) { + char hmem[64]; + char peak_hmem[64]; + char total_system_hmem[64]; + char used_memory_lua_hmem[64]; + char used_memory_scripts_hmem[64]; + char used_memory_rss_hmem[64]; + char maxmemory_hmem[64]; + size_t zmalloc_used = zmalloc_used_memory(); + size_t total_system_mem = cserver.system_memory_size; + const char *evict_policy = evictPolicyToString(); + long long memory_lua = g_pserver->lua ? (long long)lua_gc(g_pserver->lua,LUA_GCCOUNT,0)*1024 : 0; + struct redisMemOverhead *mh = getMemoryOverheadData(); + char available_system_mem[64] = "unavailable"; + + /* Peak memory is updated from time to time by serverCron() so it + * may happen that the instantaneous value is slightly bigger than + * the peak value. This may confuse users, so we update the peak + * if found smaller than the current memory usage. */ + if (zmalloc_used > g_pserver->stat_peak_memory) + g_pserver->stat_peak_memory = zmalloc_used; + + if (g_pserver->cron_malloc_stats.sys_available) { + snprintf(available_system_mem, 64, "%lu", g_pserver->cron_malloc_stats.sys_available); + } + + bytesToHuman(hmem,zmalloc_used,sizeof(hmem)); + bytesToHuman(peak_hmem,g_pserver->stat_peak_memory,sizeof(peak_hmem)); + bytesToHuman(total_system_hmem,total_system_mem,sizeof(total_system_hmem)); + bytesToHuman(used_memory_lua_hmem,memory_lua,sizeof(used_memory_lua_hmem)); + bytesToHuman(used_memory_scripts_hmem,mh->lua_caches,sizeof(used_memory_scripts_hmem)); + bytesToHuman(used_memory_rss_hmem,g_pserver->cron_malloc_stats.process_rss,sizeof(used_memory_rss_hmem)); + bytesToHuman(maxmemory_hmem,g_pserver->maxmemory,sizeof(maxmemory_hmem)); + + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Memory\r\n" + "used_memory:%zu\r\n" + "used_memory_human:%s\r\n" + "used_memory_rss:%zu\r\n" + "used_memory_rss_human:%s\r\n" + "used_memory_peak:%zu\r\n" + "used_memory_peak_human:%s\r\n" + "used_memory_peak_perc:%.2f%%\r\n" + "used_memory_overhead:%zu\r\n" + "used_memory_startup:%zu\r\n" + "used_memory_dataset:%zu\r\n" + "used_memory_dataset_perc:%.2f%%\r\n" + "allocator_allocated:%zu\r\n" + "allocator_active:%zu\r\n" + "allocator_resident:%zu\r\n" + "total_system_memory:%lu\r\n" + "total_system_memory_human:%s\r\n" + "used_memory_lua:%lld\r\n" + "used_memory_lua_human:%s\r\n" + "used_memory_scripts:%lld\r\n" + "used_memory_scripts_human:%s\r\n" + "number_of_cached_scripts:%lu\r\n" + "maxmemory:%lld\r\n" + "maxmemory_human:%s\r\n" + "maxmemory_policy:%s\r\n" + "allocator_frag_ratio:%.2f\r\n" + "allocator_frag_bytes:%zu\r\n" + "allocator_rss_ratio:%.2f\r\n" + "allocator_rss_bytes:%zd\r\n" + "rss_overhead_ratio:%.2f\r\n" + "rss_overhead_bytes:%zd\r\n" + "mem_fragmentation_ratio:%.2f\r\n" + "mem_fragmentation_bytes:%zd\r\n" + "mem_not_counted_for_evict:%zu\r\n" + "mem_replication_backlog:%zu\r\n" + "mem_clients_slaves:%zu\r\n" + "mem_clients_normal:%zu\r\n" + "mem_aof_buffer:%zu\r\n" + "mem_allocator:%s\r\n" + "active_defrag_running:%d\r\n" + "lazyfree_pending_objects:%zu\r\n" + "lazyfreed_objects:%zu\r\n" + "storage_provider:%s\r\n" + "available_system_memory:%s\r\n", + zmalloc_used, + hmem, + g_pserver->cron_malloc_stats.process_rss, + used_memory_rss_hmem, + g_pserver->stat_peak_memory, + peak_hmem, + mh->peak_perc, + mh->overhead_total, + mh->startup_allocated, + mh->dataset, + mh->dataset_perc, + g_pserver->cron_malloc_stats.allocator_allocated, + g_pserver->cron_malloc_stats.allocator_active, + g_pserver->cron_malloc_stats.allocator_resident, + (unsigned long)total_system_mem, + total_system_hmem, + memory_lua, + used_memory_lua_hmem, + (long long) mh->lua_caches, + used_memory_scripts_hmem, + dictSize(g_pserver->lua_scripts), + g_pserver->maxmemory, + maxmemory_hmem, + evict_policy, + mh->allocator_frag, + mh->allocator_frag_bytes, + mh->allocator_rss, + mh->allocator_rss_bytes, + mh->rss_extra, + mh->rss_extra_bytes, + mh->total_frag, /* This is the total RSS overhead, including + fragmentation, but not just it. This field + (and the next one) is named like that just + for backward compatibility. */ + mh->total_frag_bytes, + freeMemoryGetNotCountedMemory(), + mh->repl_backlog, + mh->clients_slaves, + mh->clients_normal, + mh->aof_buffer, + ZMALLOC_LIB, + g_pserver->active_defrag_running, + lazyfreeGetPendingObjectsCount(), + lazyfreeGetFreedObjectsCount(), + g_pserver->m_pstorageFactory ? g_pserver->m_pstorageFactory->name() : "none", + available_system_mem + ); + freeMemoryOverheadData(mh); + } + + /* Persistence */ + if (allsections || defsections || !strcasecmp(section,"persistence")) { + if (sections++) info = sdscat(info,"\r\n"); + double fork_perc = 0; + if (g_pserver->stat_module_progress) { + fork_perc = g_pserver->stat_module_progress * 100; + } else if (g_pserver->stat_current_save_keys_total) { + fork_perc = ((double)g_pserver->stat_current_save_keys_processed / g_pserver->stat_current_save_keys_total) * 100; + } + int aof_bio_fsync_status; + atomicGet(g_pserver->aof_bio_fsync_status,aof_bio_fsync_status); + + info = sdscatprintf(info, + "# Persistence\r\n" + "loading:%d\r\n" + "current_cow_size:%zu\r\n" + "current_cow_size_age:%lu\r\n" + "current_fork_perc:%.2f\r\n" + "current_save_keys_processed:%zu\r\n" + "current_save_keys_total:%zu\r\n" + "rdb_changes_since_last_save:%lld\r\n" + "rdb_bgsave_in_progress:%d\r\n" + "rdb_last_save_time:%jd\r\n" + "rdb_last_bgsave_status:%s\r\n" + "rdb_last_bgsave_time_sec:%jd\r\n" + "rdb_current_bgsave_time_sec:%jd\r\n" + "rdb_last_cow_size:%zu\r\n" + "aof_enabled:%d\r\n" + "aof_rewrite_in_progress:%d\r\n" + "aof_rewrite_scheduled:%d\r\n" + "aof_last_rewrite_time_sec:%jd\r\n" + "aof_current_rewrite_time_sec:%jd\r\n" + "aof_last_bgrewrite_status:%s\r\n" + "aof_last_write_status:%s\r\n" + "aof_last_cow_size:%zu\r\n" + "module_fork_in_progress:%d\r\n" + "module_fork_last_cow_size:%zu\r\n", + !!g_pserver->loading.load(std::memory_order_relaxed), /* Note: libraries expect 1 or 0 here so coerce our enum */ + g_pserver->stat_current_cow_bytes, + g_pserver->stat_current_cow_updated ? (unsigned long) elapsedMs(g_pserver->stat_current_cow_updated) / 1000 : 0, + fork_perc, + g_pserver->stat_current_save_keys_processed, + g_pserver->stat_current_save_keys_total, + g_pserver->dirty, + g_pserver->FRdbSaveInProgress(), + (intmax_t)g_pserver->lastsave, + (g_pserver->lastbgsave_status == C_OK) ? "ok" : "err", + (intmax_t)g_pserver->rdb_save_time_last, + (intmax_t)(g_pserver->FRdbSaveInProgress() ? + time(NULL)-g_pserver->rdb_save_time_start : -1), + g_pserver->stat_rdb_cow_bytes, + g_pserver->aof_state != AOF_OFF, + g_pserver->child_type == CHILD_TYPE_AOF, + g_pserver->aof_rewrite_scheduled, + (intmax_t)g_pserver->aof_rewrite_time_last, + (intmax_t)((g_pserver->child_type != CHILD_TYPE_AOF) ? + -1 : time(NULL)-g_pserver->aof_rewrite_time_start), + (g_pserver->aof_lastbgrewrite_status == C_OK) ? "ok" : "err", + (g_pserver->aof_last_write_status == C_OK && + aof_bio_fsync_status == C_OK) ? "ok" : "err", + g_pserver->stat_aof_cow_bytes, + g_pserver->child_type == CHILD_TYPE_MODULE, + g_pserver->stat_module_cow_bytes); + + if (g_pserver->aof_enabled) { + info = sdscatprintf(info, + "aof_current_size:%lld\r\n" + "aof_base_size:%lld\r\n" + "aof_pending_rewrite:%d\r\n" + "aof_buffer_length:%zu\r\n" + "aof_rewrite_buffer_length:%lu\r\n" + "aof_pending_bio_fsync:%llu\r\n" + "aof_delayed_fsync:%lu\r\n", + (long long) g_pserver->aof_current_size, + (long long) g_pserver->aof_rewrite_base_size, + g_pserver->aof_rewrite_scheduled, + sdslen(g_pserver->aof_buf), + aofRewriteBufferSize(), + bioPendingJobsOfType(BIO_AOF_FSYNC), + g_pserver->aof_delayed_fsync); + } + + if (g_pserver->loading) { + double perc = 0; + time_t eta, elapsed; + off_t remaining_bytes = 1; + + if (g_pserver->loading_total_bytes) { + perc = ((double)g_pserver->loading_loaded_bytes / g_pserver->loading_total_bytes) * 100; + remaining_bytes = g_pserver->loading_total_bytes - g_pserver->loading_loaded_bytes; + } else if(g_pserver->loading_rdb_used_mem) { + perc = ((double)g_pserver->loading_loaded_bytes / g_pserver->loading_rdb_used_mem) * 100; + remaining_bytes = g_pserver->loading_rdb_used_mem - g_pserver->loading_loaded_bytes; + /* used mem is only a (bad) estimation of the rdb file size, avoid going over 100% */ + if (perc > 99.99) perc = 99.99; + if (remaining_bytes < 1) remaining_bytes = 1; + } + + elapsed = time(NULL)-g_pserver->loading_start_time; + if (elapsed == 0) { + eta = 1; /* A fake 1 second figure if we don't have + enough info */ + } else { + eta = (elapsed*remaining_bytes)/(g_pserver->loading_loaded_bytes+1); + } + + info = sdscatprintf(info, + "loading_start_time:%jd\r\n" + "loading_total_bytes:%llu\r\n" + "loading_rdb_used_mem:%llu\r\n" + "loading_loaded_bytes:%llu\r\n" + "loading_loaded_perc:%.2f\r\n" + "loading_eta_seconds:%jd\r\n", + (intmax_t) g_pserver->loading_start_time, + (unsigned long long) g_pserver->loading_total_bytes, + (unsigned long long) g_pserver->loading_rdb_used_mem, + (unsigned long long) g_pserver->loading_loaded_bytes, + perc, + (intmax_t)eta + ); + } + if (g_pserver->m_pstorageFactory) + { + info = sdscat(info, g_pserver->m_pstorageFactory->getInfo().get()); + } + } + + /* Stats */ + if (allsections || defsections || !strcasecmp(section,"stats")) { + double avgLockContention = 0; + for (unsigned i = 0; i < redisServer::s_lockContentionSamples; ++i) + avgLockContention += g_pserver->rglockSamples[i]; + avgLockContention /= redisServer::s_lockContentionSamples; + + long long stat_total_reads_processed, stat_total_writes_processed; + long long stat_net_input_bytes, stat_net_output_bytes; + stat_total_reads_processed = g_pserver->stat_total_reads_processed.load(std::memory_order_relaxed); + stat_total_writes_processed = g_pserver->stat_total_writes_processed.load(std::memory_order_relaxed); + stat_net_input_bytes = g_pserver->stat_net_input_bytes.load(std::memory_order_relaxed); + stat_net_output_bytes = g_pserver->stat_net_output_bytes.load(std::memory_order_relaxed); + + long long stat_total_error_replies = 0; + for (int iel = 0; iel < cserver.cthreads; ++iel) + stat_total_error_replies += g_pserver->rgthreadvar[iel].stat_total_error_replies; + + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Stats\r\n" + "total_connections_received:%lld\r\n" + "total_commands_processed:%lld\r\n" + "instantaneous_ops_per_sec:%lld\r\n" + "total_net_input_bytes:%lld\r\n" + "total_net_output_bytes:%lld\r\n" + "instantaneous_input_kbps:%.2f\r\n" + "instantaneous_output_kbps:%.2f\r\n" + "rejected_connections:%lld\r\n" + "sync_full:%lld\r\n" + "sync_partial_ok:%lld\r\n" + "sync_partial_err:%lld\r\n" + "expired_keys:%lld\r\n" + "expired_stale_perc:%.2f\r\n" + "expired_time_cap_reached_count:%lld\r\n" + "expire_cycle_cpu_milliseconds:%lld\r\n" + "evicted_keys:%lld\r\n" + "keyspace_hits:%lld\r\n" + "keyspace_misses:%lld\r\n" + "pubsub_channels:%ld\r\n" + "pubsub_patterns:%lu\r\n" + "latest_fork_usec:%lld\r\n" + "total_forks:%lld\r\n" + "migrate_cached_sockets:%ld\r\n" + "slave_expires_tracked_keys:%zu\r\n" + "active_defrag_hits:%lld\r\n" + "active_defrag_misses:%lld\r\n" + "active_defrag_key_hits:%lld\r\n" + "active_defrag_key_misses:%lld\r\n" + "tracking_total_keys:%lld\r\n" + "tracking_total_items:%lld\r\n" + "tracking_total_prefixes:%lld\r\n" + "unexpected_error_replies:%lld\r\n" + "total_error_replies:%lld\r\n" + "dump_payload_sanitizations:%lld\r\n" + "total_reads_processed:%lld\r\n" + "total_writes_processed:%lld\r\n" + "instantaneous_lock_contention:%d\r\n" + "avg_lock_contention:%f\r\n" + "storage_provider_read_hits:%lld\r\n" + "storage_provider_read_misses:%lld\r\n", + g_pserver->stat_numconnections, + g_pserver->stat_numcommands, + getInstantaneousMetric(STATS_METRIC_COMMAND), + stat_net_input_bytes, + stat_net_output_bytes, + (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024, + (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024, + g_pserver->stat_rejected_conn, + g_pserver->stat_sync_full, + g_pserver->stat_sync_partial_ok, + g_pserver->stat_sync_partial_err, + g_pserver->stat_expiredkeys, + g_pserver->stat_expired_stale_perc*100, + g_pserver->stat_expired_time_cap_reached_count, + g_pserver->stat_expire_cycle_time_used/1000, + g_pserver->stat_evictedkeys, + g_pserver->stat_keyspace_hits, + g_pserver->stat_keyspace_misses, + dictSize(g_pserver->pubsub_channels), + dictSize(g_pserver->pubsub_patterns), + g_pserver->stat_fork_time, + g_pserver->stat_total_forks, + dictSize(g_pserver->migrate_cached_sockets), + getSlaveKeyWithExpireCount(), + g_pserver->stat_active_defrag_hits, + g_pserver->stat_active_defrag_misses, + g_pserver->stat_active_defrag_key_hits, + g_pserver->stat_active_defrag_key_misses, + (unsigned long long) trackingGetTotalKeys(), + (unsigned long long) trackingGetTotalItems(), + (unsigned long long) trackingGetTotalPrefixes(), + g_pserver->stat_unexpected_error_replies, + stat_total_error_replies, + g_pserver->stat_dump_payload_sanitizations, + stat_total_reads_processed, + stat_total_writes_processed, + aeLockContention(), + avgLockContention, + g_pserver->stat_storage_provider_read_hits, + g_pserver->stat_storage_provider_read_misses); + } + + /* Replication */ + if (allsections || defsections || !strcasecmp(section,"replication")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Replication\r\n" + "role:%s\r\n", + listLength(g_pserver->masters) == 0 ? "master" + : g_pserver->fActiveReplica ? "active-replica" : "slave"); + if (listLength(g_pserver->masters)) { + int connectedMasters = 0; + info = sdscatprintf(info, "master_global_link_status:%s\r\n", + FBrokenLinkToMaster(&connectedMasters) ? "down" : "up"); + + info = sdscatprintf(info, "connected_masters:%d\r\n", connectedMasters); + + int cmasters = 0; + listIter li; + listNode *ln; + listRewind(g_pserver->masters, &li); + while ((ln = listNext(&li))) + { + long long slave_repl_offset = 1; + long long slave_read_repl_offset = 1; + redisMaster *mi = (redisMaster*)listNodeValue(ln); + + if (mi->master){ + slave_repl_offset = mi->master->reploff; + slave_read_repl_offset = mi->master->read_reploff; + } else if (mi->cached_master){ + slave_repl_offset = mi->cached_master->reploff; + slave_read_repl_offset = mi->cached_master->read_reploff; + } + + char master_prefix[128] = ""; + if (cmasters != 0) { + snprintf(master_prefix, sizeof(master_prefix), "_%d", cmasters); + } + + info = sdscatprintf(info, + "master%s_host:%s\r\n" + "master%s_port:%d\r\n" + "master%s_link_status:%s\r\n" + "master%s_last_io_seconds_ago:%d\r\n" + "master%s_sync_in_progress:%d\r\n" + "slave_read_repl_offset:%lld\r\n" + "slave_repl_offset:%lld\r\n" + ,master_prefix, mi->masterhost, + master_prefix, mi->masterport, + master_prefix, (mi->repl_state == REPL_STATE_CONNECTED) ? + "up" : "down", + master_prefix, mi->master ? + ((int)(g_pserver->unixtime-mi->master->lastinteraction)) : -1, + master_prefix, mi->repl_state == REPL_STATE_TRANSFER, + slave_read_repl_offset, + slave_repl_offset + ); + + if (mi->repl_state == REPL_STATE_TRANSFER) { + double perc = 0; + if (mi->repl_transfer_size) { + perc = ((double)mi->repl_transfer_read / mi->repl_transfer_size) * 100; + } + info = sdscatprintf(info, + "master%s_sync_total_bytes:%lld\r\n" + "master%s_sync_read_bytes:%lld\r\n" + "master%s_sync_left_bytes:%lld\r\n" + "master%s_sync_perc:%.2f\r\n" + "master%s_sync_last_io_seconds_ago:%d\r\n", + master_prefix, (long long) mi->repl_transfer_size, + master_prefix, (long long) mi->repl_transfer_read, + master_prefix, (long long) (mi->repl_transfer_size - mi->repl_transfer_read), + master_prefix, perc, + master_prefix, (int)(g_pserver->unixtime-mi->repl_transfer_lastio) + ); + } + + if (mi->repl_state != REPL_STATE_CONNECTED) { + info = sdscatprintf(info, + "master%s_link_down_since_seconds:%jd\r\n", + master_prefix, mi->repl_down_since ? + (intmax_t)(g_pserver->unixtime-mi->repl_down_since) : -1); + } + ++cmasters; + } + info = sdscatprintf(info, + "slave_priority:%d\r\n" + "slave_read_only:%d\r\n" + "replica_announced:%d\r\n", + g_pserver->slave_priority, + g_pserver->repl_slave_ro, + g_pserver->replica_announced); + } + + info = sdscatprintf(info, + "connected_slaves:%lu\r\n", + listLength(g_pserver->slaves)); + + /* If min-slaves-to-write is active, write the number of slaves + * currently considered 'good'. */ + if (g_pserver->repl_min_slaves_to_write && + g_pserver->repl_min_slaves_max_lag) { + info = sdscatprintf(info, + "min_slaves_good_slaves:%d\r\n", + g_pserver->repl_good_slaves_count); + } + + if (listLength(g_pserver->slaves)) { + int slaveid = 0; + listNode *ln; + listIter li; + + listRewind(g_pserver->slaves,&li); + while((ln = listNext(&li))) { + client *replica = (client*)listNodeValue(ln); + const char *state = NULL; + char ip[NET_IP_STR_LEN], *slaveip = replica->slave_addr; + int port; + long lag = 0; + + if (!slaveip) { + if (connPeerToString(replica->conn,ip,sizeof(ip),&port) == -1) + continue; + slaveip = ip; + } + switch(replica->replstate) { + case SLAVE_STATE_WAIT_BGSAVE_START: + case SLAVE_STATE_WAIT_BGSAVE_END: + state = "wait_bgsave"; + break; + case SLAVE_STATE_SEND_BULK: + state = "send_bulk"; + break; + case SLAVE_STATE_ONLINE: + state = "online"; + break; + } + if (state == NULL) continue; + if (replica->replstate == SLAVE_STATE_ONLINE) + lag = time(NULL) - replica->repl_ack_time; + + info = sdscatprintf(info, + "slave%d:ip=%s,port=%d,state=%s," + "offset=%lld,lag=%ld\r\n", + slaveid,slaveip,replica->slave_listening_port,state, + (replica->repl_ack_off), lag); + slaveid++; + } + } + info = sdscatprintf(info, + "master_failover_state:%s\r\n" + "master_replid:%s\r\n" + "master_replid2:%s\r\n" + "master_repl_offset:%lld\r\n" + "second_repl_offset:%lld\r\n" + "repl_backlog_active:%d\r\n" + "repl_backlog_size:%lld\r\n" + "repl_backlog_first_byte_offset:%lld\r\n" + "repl_backlog_histlen:%lld\r\n", + getFailoverStateString(), + g_pserver->replid, + g_pserver->replid2, + g_pserver->master_repl_offset, + g_pserver->second_replid_offset, + g_pserver->repl_backlog != NULL, + g_pserver->repl_backlog_size, + g_pserver->repl_backlog_off, + g_pserver->repl_backlog_histlen); + } + + /* CPU */ + if (allsections || defsections || !strcasecmp(section,"cpu")) { + if (sections++) info = sdscat(info,"\r\n"); + + struct rusage self_ru, c_ru; + getrusage(RUSAGE_SELF, &self_ru); + getrusage(RUSAGE_CHILDREN, &c_ru); + info = sdscatprintf(info, + "# CPU\r\n" + "used_cpu_sys:%ld.%06ld\r\n" + "used_cpu_user:%ld.%06ld\r\n" + "used_cpu_sys_children:%ld.%06ld\r\n" + "used_cpu_user_children:%ld.%06ld\r\n" + "server_threads:%d\r\n" + "long_lock_waits:%" PRIu64 "\r\n", + (long)self_ru.ru_stime.tv_sec, (long)self_ru.ru_stime.tv_usec, + (long)self_ru.ru_utime.tv_sec, (long)self_ru.ru_utime.tv_usec, + (long)c_ru.ru_stime.tv_sec, (long)c_ru.ru_stime.tv_usec, + (long)c_ru.ru_utime.tv_sec, (long)c_ru.ru_utime.tv_usec, + cserver.cthreads, + fastlock_getlongwaitcount()); +#ifdef RUSAGE_THREAD + struct rusage m_ru; + getrusage(RUSAGE_THREAD, &m_ru); + info = sdscatprintf(info, + "used_cpu_sys_main_thread:%ld.%06ld\r\n" + "used_cpu_user_main_thread:%ld.%06ld\r\n", + (long)m_ru.ru_stime.tv_sec, (long)m_ru.ru_stime.tv_usec, + (long)m_ru.ru_utime.tv_sec, (long)m_ru.ru_utime.tv_usec); +#endif /* RUSAGE_THREAD */ + } + + /* Modules */ + if (allsections || defsections || !strcasecmp(section,"modules")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info,"# Modules\r\n"); + info = genModulesInfoString(info); + } + + /* Command statistics */ + if (allsections || !strcasecmp(section,"commandstats")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, "# Commandstats\r\n"); + + struct redisCommand *c; + dictEntry *de; + dictIterator *di; + di = dictGetSafeIterator(g_pserver->commands); + while((de = dictNext(di)) != NULL) { + char *tmpsafe; + c = (struct redisCommand *) dictGetVal(de); + if (!c->calls && !c->failed_calls && !c->rejected_calls) + continue; + info = sdscatprintf(info, + "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f" + ",rejected_calls=%lld,failed_calls=%lld\r\n", + getSafeInfoString(c->name, strlen(c->name), &tmpsafe), c->calls, c->microseconds, + (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls), + c->rejected_calls, c->failed_calls); + if (tmpsafe != NULL) zfree(tmpsafe); + } + dictReleaseIterator(di); + } + /* Error statistics */ + if (allsections || defsections || !strcasecmp(section,"errorstats")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscat(info, "# Errorstats\r\n"); + raxIterator ri; + raxStart(&ri,g_pserver->errors); + raxSeek(&ri,"^",NULL,0); + struct redisError *e; + while(raxNext(&ri)) { + char *tmpsafe; + e = (struct redisError *) ri.data; + info = sdscatprintf(info, + "errorstat_%.*s:count=%lld\r\n", + (int)ri.key_len, getSafeInfoString((char *) ri.key, ri.key_len, &tmpsafe), e->count); + if (tmpsafe != NULL) zfree(tmpsafe); + } + raxStop(&ri); + } + + /* Cluster */ + if (allsections || defsections || !strcasecmp(section,"cluster")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Cluster\r\n" + "cluster_enabled:%d\r\n", + g_pserver->cluster_enabled); + } + + /* Key space */ + if (allsections || defsections || !strcasecmp(section,"keyspace")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, "# Keyspace\r\n"); + for (j = 0; j < cserver.dbnum; j++) { + long long keys, vkeys, cachedKeys; + + keys = g_pserver->db[j]->size(); + vkeys = g_pserver->db[j]->expireSize(); + cachedKeys = g_pserver->db[j]->size(true /* fCachedOnly */); + + // Adjust TTL by the current time + mstime_t mstime; + __atomic_load(&g_pserver->mstime, &mstime, __ATOMIC_ACQUIRE); + g_pserver->db[j]->avg_ttl -= (mstime - g_pserver->db[j]->last_expire_set); + if (g_pserver->db[j]->avg_ttl < 0) + g_pserver->db[j]->avg_ttl = 0; + g_pserver->db[j]->last_expire_set = mstime; + + if (keys || vkeys) { + info = sdscatprintf(info, + "db%d:keys=%lld,expires=%lld,avg_ttl=%lld,cached_keys=%lld\r\n", + j, keys, vkeys, static_cast(g_pserver->db[j]->avg_ttl), cachedKeys); + } + } + } + + if (allsections || defsections || !strcasecmp(section,"keydb")) { + // Compute the MVCC depth + int mvcc_depth = 0; + for (int idb = 0; idb < cserver.dbnum; ++idb) { + mvcc_depth = std::max(mvcc_depth, g_pserver->db[idb]->snapshot_depth()); + } + + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# KeyDB\r\n" + "mvcc_depth:%d\r\n", + mvcc_depth + ); + } + + /* Get info from modules. + * if user asked for "everything" or "modules", or a specific section + * that's not found yet. */ + if (everything || modules || + (!allsections && !defsections && sections==0)) { + info = modulesCollectInfo(info, + everything || modules ? NULL: section, + 0, /* not a crash report */ + sections); + } + return info; +} + +void infoCommand(client *c) { + const char *section = c->argc == 2 ? (const char*)ptrFromObj(c->argv[1]) : "default"; + + if (c->argc > 2) { + addReplyErrorObject(c,shared.syntaxerr); + return; + } + sds info = genRedisInfoString(section); + addReplyVerbatim(c,info,sdslen(info),"txt"); + sdsfree(info); +} + +void monitorCommand(client *c) { + serverAssert(GlobalLocksAcquired()); + + if (c->flags & CLIENT_DENY_BLOCKING) { + /** + * A client that has CLIENT_DENY_BLOCKING flag on + * expects a reply per command and so can't execute MONITOR. */ + addReplyError(c, "MONITOR isn't allowed for DENY BLOCKING client"); + return; + } + + /* ignore MONITOR if already slave or in monitor mode */ + if (c->flags & CLIENT_SLAVE) return; + + c->flags |= (CLIENT_SLAVE|CLIENT_MONITOR); + listAddNodeTail(g_pserver->monitors,c); + addReply(c,shared.ok); +} + +/* =================================== Main! ================================ */ + +int checkIgnoreWarning(const char *warning) { + int argc, j; + sds *argv = sdssplitargs(g_pserver->ignore_warnings, &argc); + if (argv == NULL) + return 0; + + for (j = 0; j < argc; j++) { + char *flag = argv[j]; + if (!strcasecmp(flag, warning)) + break; + } + sdsfreesplitres(argv,argc); + return j < argc; +} + +#ifdef __linux__ +int linuxOvercommitMemoryValue(void) { + FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r"); + char buf[64]; + + if (!fp) return -1; + if (fgets(buf,64,fp) == NULL) { + fclose(fp); + return -1; + } + fclose(fp); + + return atoi(buf); +} + +void linuxMemoryWarnings(void) { + if (linuxOvercommitMemoryValue() == 0) { + serverLog(LL_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); + } + if (THPIsEnabled() && THPDisable()) { + serverLog(LL_WARNING,"WARNING you have Transparent Huge Pages (THP) support enabled in your kernel. This will create latency and memory usage issues with KeyDB. To fix this issue run the command 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled' as root, and add it to your /etc/rc.local in order to retain the setting after a reboot. KeyDB must be restarted after THP is disabled (set to 'madvise' or 'never')."); + } +} + +#ifdef __arm64__ + +/* Get size in kilobytes of the Shared_Dirty pages of the calling process for the + * memory map corresponding to the provided address, or -1 on error. */ +static int smapsGetSharedDirty(unsigned long addr) { + int ret, in_mapping = 0, val = -1; + unsigned long from, to; + char buf[64]; + FILE *f; + + f = fopen("/proc/self/smaps", "r"); + serverAssert(f); + + while (1) { + if (!fgets(buf, sizeof(buf), f)) + break; + + ret = sscanf(buf, "%lx-%lx", &from, &to); + if (ret == 2) + in_mapping = from <= addr && addr < to; + + if (in_mapping && !memcmp(buf, "Shared_Dirty:", 13)) { + ret = sscanf(buf, "%*s %d", &val); + serverAssert(ret == 1); + break; + } + } + + fclose(f); + return val; +} + +/* Older arm64 Linux kernels have a bug that could lead to data corruption + * during background save in certain scenarios. This function checks if the + * kernel is affected. + * The bug was fixed in commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b + * titled: "arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect()" + * Return 1 if the kernel seems to be affected, and 0 otherwise. */ +int linuxMadvFreeForkBugCheck(void) { + int ret, pipefd[2]; + pid_t pid; + char *p, *q, bug_found = 0; + const long map_size = 3 * 4096; + + /* Create a memory map that's in our full control (not one used by the allocator). */ + p = (char*)mmap(NULL, map_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + serverAssert(p != MAP_FAILED); + + q = p + 4096; + + /* Split the memory map in 3 pages by setting their protection as RO|RW|RO to prevent + * Linux from merging this memory map with adjacent VMAs. */ + ret = mprotect(q, 4096, PROT_READ | PROT_WRITE); + serverAssert(!ret); + + /* Write to the page once to make it resident */ + *(volatile char*)q = 0; + + /* Tell the kernel that this page is free to be reclaimed. */ +#ifndef MADV_FREE +#define MADV_FREE 8 +#endif + ret = madvise(q, 4096, MADV_FREE); + serverAssert(!ret); + + /* Write to the page after being marked for freeing, this is supposed to take + * ownership of that page again. */ + *(volatile char*)q = 0; + + /* Create a pipe for the child to return the info to the parent. */ + ret = pipe(pipefd); + serverAssert(!ret); + + /* Fork the process. */ + pid = fork(); + serverAssert(pid >= 0); + if (!pid) { + /* Child: check if the page is marked as dirty, expecing 4 (kB). + * A value of 0 means the kernel is affected by the bug. */ + if (!smapsGetSharedDirty((unsigned long)q)) + bug_found = 1; + + ret = write(pipefd[1], &bug_found, 1); + serverAssert(ret == 1); + + exit(0); + } else { + /* Read the result from the child. */ + ret = read(pipefd[0], &bug_found, 1); + serverAssert(ret == 1); + + /* Reap the child pid. */ + serverAssert(waitpid(pid, NULL, 0) == pid); + } + + /* Cleanup */ + ret = close(pipefd[0]); + serverAssert(!ret); + ret = close(pipefd[1]); + serverAssert(!ret); + ret = munmap(p, map_size); + serverAssert(!ret); + + return bug_found; +} +#endif /* __arm64__ */ +#endif /* __linux__ */ + +void createPidFile(void) { + /* If pidfile requested, but no pidfile defined, use + * default pidfile path */ + if (!cserver.pidfile) cserver.pidfile = zstrdup(CONFIG_DEFAULT_PID_FILE); + + /* Try to write the pid file in a best-effort way. */ + FILE *fp = fopen(cserver.pidfile,"w"); + if (fp) { + fprintf(fp,"%d\n",(int)getpid()); + fclose(fp); + } +} + +void daemonize(void) { + int fd; + + if (fork() != 0) exit(0); /* parent exits */ + setsid(); /* create a new session */ + + /* Every output goes to /dev/null. If Redis is daemonized but + * the 'logfile' is set to 'stdout' in the configuration file + * it will not log at all. */ + if ((fd = open("/dev/null", O_RDWR, 0)) != -1) { + dup2(fd, STDIN_FILENO); + dup2(fd, STDOUT_FILENO); + dup2(fd, STDERR_FILENO); + if (fd > STDERR_FILENO) close(fd); + } +} + +void version(void) { + printf("Futriix server v=%s sha=%s:%d malloc=%s bits=%d build=%llx\n", + KEYDB_REAL_VERSION, + redisGitSHA1(), + atoi(redisGitDirty()) > 0, + ZMALLOC_LIB, + sizeof(long) == 4 ? 32 : 64, + (unsigned long long) redisBuildId()); + exit(0); +} + +void usage(void) { + fprintf(stderr,"Usage: ./futriix-server [/path/to/futriix.conf] [options] [-]\n"); + fprintf(stderr," ./futriix-server - (read config from stdin)\n"); + fprintf(stderr," ./futriix-server -v or --version\n"); + fprintf(stderr," ./futriix-server -h or --help\n"); + fprintf(stderr," ./futriix-server --test-memory \n\n"); + fprintf(stderr,"Examples:\n"); + fprintf(stderr," ./futriix-server (run the server with default conf)\n"); + fprintf(stderr," ./futriix-server /etc/keydb/9880.conf\n"); + fprintf(stderr," ./futriix-server --port 7777\n"); + fprintf(stderr," ./futriix-server --port 7777 --replicaof 127.0.0.1 8888\n"); + fprintf(stderr," ./futriix-server /etc/myfutriix.conf --loglevel verbose -\n"); + fprintf(stderr," ./futriix-server /etc/myfutriix.conf --loglevel verbose\n\n"); + exit(1); +} + +void redisAsciiArt(void) { +#include "asciilogo.h" + size_t bufsize = 1024*16; + char *buf = (char*)zmalloc(bufsize, MALLOC_LOCAL); + const char *mode; + + if (g_pserver->cluster_enabled) mode = "cluster"; + else if (g_pserver->sentinel_mode) mode = "sentinel"; + else mode = "standalone"; + + /* Show the ASCII logo if: log file is stdout AND stdout is a + * tty AND syslog logging is disabled. Also show logo if the user + * forced us to do so via futriix.conf. */ + int show_logo = ((!g_pserver->syslog_enabled && + g_pserver->logfile[0] == '\0' && + isatty(fileno(stdout))) || + g_pserver->always_show_logo); + + if (!show_logo) { + serverLog(LL_NOTICE, + "Running mode=%s, port=%d.", + mode, g_pserver->port ? g_pserver->port : g_pserver->tls_port + ); + } else { + sds motd = fetchMOTD(true, cserver.enable_motd); + snprintf(buf,bufsize,ascii_logo, + KEYDB_REAL_VERSION, + redisGitSHA1(), + strtol(redisGitDirty(),NULL,10) > 0, + (sizeof(long) == 8) ? "64" : "32", + mode, g_pserver->port ? g_pserver->port : g_pserver->tls_port, + (long) getpid(), + motd ? motd : "" + ); + if (motd) + freeMOTD(motd); + serverLogRaw(LL_NOTICE|LL_RAW,buf); + } + + zfree(buf); +} + +int changeBindAddr(sds *addrlist, int addrlist_len, bool fFirstCall) { + int i; + int result = C_OK; + + char *prev_bindaddr[CONFIG_BINDADDR_MAX]; + int prev_bindaddr_count; + + /* Close old TCP and TLS servers */ + closeSocketListeners(&serverTL->ipfd); + closeSocketListeners(&serverTL->tlsfd); + + /* Keep previous settings */ + prev_bindaddr_count = g_pserver->bindaddr_count; + memcpy(prev_bindaddr, g_pserver->bindaddr, sizeof(g_pserver->bindaddr)); + + /* Copy new settings */ + memset(g_pserver->bindaddr, 0, sizeof(g_pserver->bindaddr)); + for (i = 0; i < addrlist_len; i++) { + g_pserver->bindaddr[i] = zstrdup(addrlist[i]); + } + g_pserver->bindaddr_count = addrlist_len; + + /* Bind to the new port */ + if ((g_pserver->port != 0 && listenToPort(g_pserver->port, &serverTL->ipfd, (cserver.cthreads > 1), fFirstCall) != C_OK) || + (g_pserver->tls_port != 0 && listenToPort(g_pserver->tls_port, &serverTL->tlsfd, (cserver.cthreads > 1), fFirstCall) != C_OK)) { + serverLog(LL_WARNING, "Failed to bind, trying to restore old listening sockets."); + + /* Restore old bind addresses */ + for (i = 0; i < addrlist_len; i++) { + zfree(g_pserver->bindaddr[i]); + } + memcpy(g_pserver->bindaddr, prev_bindaddr, sizeof(g_pserver->bindaddr)); + g_pserver->bindaddr_count = prev_bindaddr_count; + + /* Re-Listen TCP and TLS */ + serverTL->ipfd.count = 0; + if (g_pserver->port != 0 && listenToPort(g_pserver->port, &serverTL->ipfd, (cserver.cthreads > 1), false) != C_OK) { + serverPanic("Failed to restore old listening sockets."); + } + + serverTL->tlsfd.count = 0; + if (g_pserver->tls_port != 0 && listenToPort(g_pserver->tls_port, &serverTL->tlsfd, (cserver.cthreads > 1), false) != C_OK) { + serverPanic("Failed to restore old listening sockets."); + } + + result = C_ERR; + } else { + /* Free old bind addresses */ + for (i = 0; i < prev_bindaddr_count; i++) { + zfree(prev_bindaddr[i]); + } + } + + /* Create TCP and TLS event handlers */ + if (createSocketAcceptHandler(&serverTL->ipfd, acceptTcpHandler) != C_OK) { + serverPanic("Unrecoverable error creating TCP socket accept handler."); + } + if (createSocketAcceptHandler(&serverTL->tlsfd, acceptTLSHandler) != C_OK) { + serverPanic("Unrecoverable error creating TLS socket accept handler."); + } + + if (cserver.set_proc_title && fFirstCall) redisSetProcTitle(NULL); + + return result; +} + +int changeListenPort(int port, socketFds *sfd, aeFileProc *accept_handler, bool fFirstCall) { + socketFds new_sfd = {{0}}; + + /* Just close the server if port disabled */ + if (port == 0) { + closeSocketListeners(sfd); + if (cserver.set_proc_title && fFirstCall) redisSetProcTitle(NULL); + return C_OK; + } + + /* Bind to the new port */ + if (listenToPort(port, &new_sfd, (cserver.cthreads > 1), fFirstCall) != C_OK) { + return C_ERR; + } + + /* Create event handlers */ + if (createSocketAcceptHandler(&new_sfd, accept_handler) != C_OK) { + closeSocketListeners(&new_sfd); + return C_ERR; + } + + /* Close old servers */ + closeSocketListeners(sfd); + + /* Copy new descriptors */ + sfd->count = new_sfd.count; + memcpy(sfd->fd, new_sfd.fd, sizeof(new_sfd.fd)); + + if (cserver.set_proc_title && fFirstCall) redisSetProcTitle(NULL); + + return C_OK; +} + +static void sigShutdownHandler(int sig) { + const char *msg; + + switch (sig) { + case SIGINT: + msg = "Received SIGINT scheduling shutdown..."; + break; + case SIGTERM: + msg = "Received SIGTERM scheduling shutdown..."; + break; + default: + msg = "Received shutdown signal, scheduling shutdown..."; + }; + + /* SIGINT is often delivered via Ctrl+C in an interactive session. + * If we receive the signal the second time, we interpret this as + * the user really wanting to quit ASAP without waiting to persist + * on disk. */ + if ((g_pserver->shutdown_asap || g_pserver->soft_shutdown) && sig == SIGINT) { + serverLogFromHandler(LL_WARNING, "You insist... exiting now."); + rdbRemoveTempFile(g_pserver->rdbThreadVars.tmpfileNum, 1); + g_pserver->garbageCollector.shutdown(); + _Exit(1); /* Exit with an error since this was not a clean shutdown. */ + } else if (g_pserver->loading) { + serverLogFromHandler(LL_WARNING, "Received shutdown signal during loading, exiting now."); + _Exit(0); // calling dtors is undesirable, exit immediately + } + + serverLogFromHandler(LL_WARNING, msg); + if (g_pserver->config_soft_shutdown) + g_pserver->soft_shutdown = true; + else + g_pserver->shutdown_asap = 1; +} + +void setupSignalHandlers(void) { + struct sigaction act; + + /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used. + * Otherwise, sa_handler is used. */ + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigShutdownHandler; + sigaction(SIGTERM, &act, NULL); + sigaction(SIGINT, &act, NULL); + + sigemptyset(&act.sa_mask); + act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO; + act.sa_sigaction = sigsegvHandler; + if(g_pserver->crashlog_enabled) { + sigaction(SIGSEGV, &act, NULL); + sigaction(SIGBUS, &act, NULL); + sigaction(SIGFPE, &act, NULL); + sigaction(SIGILL, &act, NULL); + sigaction(SIGABRT, &act, NULL); + } + return; +} + +void removeSignalHandlers(void) { + struct sigaction act; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_NODEFER | SA_RESETHAND; + act.sa_handler = SIG_DFL; + sigaction(SIGSEGV, &act, NULL); + sigaction(SIGBUS, &act, NULL); + sigaction(SIGFPE, &act, NULL); + sigaction(SIGILL, &act, NULL); + sigaction(SIGABRT, &act, NULL); +} + +/* This is the signal handler for children process. It is currently useful + * in order to track the SIGUSR1, that we send to a child in order to terminate + * it in a clean way, without the parent detecting an error and stop + * accepting writes because of a write error condition. */ +static void sigKillChildHandler(int sig) { + UNUSED(sig); + int level = g_pserver->in_fork_child == CHILD_TYPE_MODULE? LL_VERBOSE: LL_WARNING; + serverLogFromHandler(level, "Received SIGUSR1 in child, exiting now."); + exitFromChild(SERVER_CHILD_NOERROR_RETVAL); +} + +void setupChildSignalHandlers(void) { + struct sigaction act; + + /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used. + * Otherwise, sa_handler is used. */ + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigKillChildHandler; + sigaction(SIGUSR1, &act, NULL); + return; +} + +/* After fork, the child process will inherit the resources + * of the parent process, e.g. fd(socket or flock) etc. + * should close the resources not used by the child process, so that if the + * parent restarts it can bind/lock despite the child possibly still running. */ +void closeChildUnusedResourceAfterFork() { + closeListeningSockets(0); + if (g_pserver->cluster_enabled && g_pserver->cluster_config_file_lock_fd != -1) + close(g_pserver->cluster_config_file_lock_fd); /* don't care if this fails */ + + for (int iel = 0; iel < cserver.cthreads; ++iel) { + aeClosePipesForForkChild(g_pserver->rgthreadvar[iel].el); + } + aeClosePipesForForkChild(g_pserver->modulethreadvar.el); + + /* Clear cserver.pidfile, this is the parent pidfile which should not + * be touched (or deleted) by the child (on exit / crash) */ + zfree(cserver.pidfile); + cserver.pidfile = NULL; +} + +void executeWithoutGlobalLock(std::function func) { + serverAssert(GlobalLocksAcquired()); + + std::vector vecclients; + listIter li; + listNode *ln; + listRewind(g_pserver->clients, &li); + + // All client locks must be acquired *after* the global lock is reacquired to prevent deadlocks + // so unlock here, and save them for reacquisition later + while ((ln = listNext(&li)) != nullptr) + { + client *c = (client*)listNodeValue(ln); + if (c->lock.fOwnLock()) { + serverAssert(c->flags & CLIENT_PROTECTED || c->flags & CLIENT_EXECUTING_COMMAND); // If the client is not protected we have no gurantee they won't be free'd in the event loop + c->lock.unlock(); + vecclients.push_back(c); + } + } + + /* Since we're about to release our lock we need to flush the repl backlog queue */ + bool fReplBacklog = g_pserver->repl_batch_offStart >= 0; + if (fReplBacklog) { + flushReplBacklogToClients(); + g_pserver->repl_batch_idxStart = -1; + g_pserver->repl_batch_offStart = -1; + } + + aeReleaseLock(); + serverAssert(!GlobalLocksAcquired()); + try { + func(); + } + catch (...) { + // Caller expects us to be locked so fix and rethrow + AeLocker locker; + locker.arm(nullptr); + locker.release(); + for (client *c : vecclients) + c->lock.lock(); + throw; + } + + AeLocker locker; + locker.arm(nullptr); + locker.release(); + + // Restore it so the calling code is not confused + if (fReplBacklog) { + g_pserver->repl_batch_idxStart = g_pserver->repl_backlog_idx; + g_pserver->repl_batch_offStart = g_pserver->master_repl_offset; + } + + for (client *c : vecclients) + c->lock.lock(); +} + +/* purpose is one of CHILD_TYPE_ types */ +int redisFork(int purpose) { + int childpid; + long long start = ustime(); + + if (isMutuallyExclusiveChildType(purpose)) { + if (hasActiveChildProcess()) + return -1; + + openChildInfoPipe(); + } + long long startWriteLock = ustime(); + aeAcquireForkLock(); + latencyAddSampleIfNeeded("fork-lock",(ustime()-startWriteLock)/1000); + if ((childpid = fork()) == 0) { + /* Child */ + aeForkLockInChild(); + aeReleaseForkLock(); + g_pserver->in_fork_child = purpose; + setOOMScoreAdj(CONFIG_OOM_BGCHILD); + setupChildSignalHandlers(); + closeChildUnusedResourceAfterFork(); + } else { + /* Parent */ + aeReleaseForkLock(); + g_pserver->stat_total_forks++; + g_pserver->stat_fork_time = ustime()-start; + g_pserver->stat_fork_rate = (double) zmalloc_used_memory() * 1000000 / g_pserver->stat_fork_time / (1024*1024*1024); /* GB per second. */ + latencyAddSampleIfNeeded("fork",g_pserver->stat_fork_time/1000); + if (childpid == -1) { + if (isMutuallyExclusiveChildType(purpose)) closeChildInfoPipe(); + return -1; + } + + /* The child_pid and child_type are only for mutual exclusive children. + * other child types should handle and store their pid's in dedicated variables. + * + * Today, we allows CHILD_TYPE_LDB to run in parallel with the other fork types: + * - it isn't used for production, so it will not make the server be less efficient + * - used for debugging, and we don't want to block it from running while other + * forks are running (like RDB and AOF) */ + if (isMutuallyExclusiveChildType(purpose)) { + g_pserver->child_pid = childpid; + g_pserver->child_type = purpose; + g_pserver->stat_current_cow_bytes = 0; + g_pserver->stat_current_cow_updated = 0; + g_pserver->stat_current_save_keys_processed = 0; + g_pserver->stat_module_progress = 0; + g_pserver->stat_current_save_keys_total = dbTotalServerKeyCount(); + } + + updateDictResizePolicy(); + moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD, + REDISMODULE_SUBEVENT_FORK_CHILD_BORN, + NULL); + } + return childpid; +} + +void sendChildCowInfo(childInfoType info_type, const char *pname) { + sendChildInfoGeneric(info_type, 0, -1, pname); +} + +void sendChildInfo(childInfoType info_type, size_t keys, const char *pname) { + sendChildInfoGeneric(info_type, keys, -1, pname); +} + +extern "C" void memtest(size_t megabytes, int passes); + +/* Returns 1 if there is --sentinel among the arguments or if + * argv[0] contains "keydb-sentinel". */ +int checkForSentinelMode(int argc, char **argv) { + int j; + + if (strstr(argv[0],"keydb-sentinel") != NULL) return 1; + for (j = 1; j < argc; j++) + if (!strcmp(argv[j],"--sentinel")) return 1; + return 0; +} + +/* Function called at startup to load RDB or AOF file in memory. */ +void loadDataFromDisk(void) { + long long start = ustime(); + + if (g_pserver->m_pstorageFactory) + { + for (int idb = 0; idb < cserver.dbnum; ++idb) + { + if (g_pserver->db[idb]->size() > 0) + { + serverLog(LL_NOTICE, "Not loading the RDB because a storage provider is set and the database is not empty"); + return; + } + } + serverLog(LL_NOTICE, "Loading the RDB even though we have a storage provider because the database is empty"); + } + + serverTL->gcEpoch = g_pserver->garbageCollector.startEpoch(); + if (g_pserver->aof_state == AOF_ON) { + if (loadAppendOnlyFile(g_pserver->aof_filename) == C_OK) + serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000); + } else if (g_pserver->rdb_filename != NULL || g_pserver->rdb_s3bucketpath != NULL) { + rdbSaveInfo rsi; + rsi.fForceSetKey = false; + errno = 0; /* Prevent a stale value from affecting error checking */ + if (rdbLoad(&rsi,RDBFLAGS_NONE) == C_OK) { + serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds", + (float)(ustime()-start)/1000000); + + /* Restore the replication ID / offset from the RDB file. */ + if ((listLength(g_pserver->masters) || + (g_pserver->cluster_enabled && + nodeIsSlave(g_pserver->cluster->myself))) && + rsi.repl_id_is_set && + rsi.repl_offset != -1 && + /* Note that older implementations may save a repl_stream_db + * of -1 inside the RDB file in a wrong way, see more + * information in function rdbPopulateSaveInfo. */ + rsi.repl_stream_db != -1) + { + memcpy(g_pserver->replid,rsi.repl_id,sizeof(g_pserver->replid)); + g_pserver->master_repl_offset = rsi.repl_offset; + if (g_pserver->repl_batch_offStart >= 0) + g_pserver->repl_batch_offStart = g_pserver->master_repl_offset; + } + updateActiveReplicaMastersFromRsi(&rsi); + if (!g_pserver->fActiveReplica && listLength(g_pserver->masters)) { + redisMaster *mi = (redisMaster*)listNodeValue(listFirst(g_pserver->masters)); + /* If we are a replica, create a cached master from this + * information, in order to allow partial resynchronizations + * with masters. */ + replicationCacheMasterUsingMyself(mi); + selectDb(mi->cached_master,rsi.repl_stream_db); + } + } else if (errno != ENOENT) { + serverLog(LL_WARNING,"Fatal error loading the DB: %s. Exiting.",strerror(errno)); + exit(1); + } + } + g_pserver->garbageCollector.endEpoch(serverTL->gcEpoch); + serverTL->gcEpoch.reset(); +} + +void redisOutOfMemoryHandler(size_t allocation_size) { + serverLog(LL_WARNING,"Out Of Memory allocating %zu bytes!", + allocation_size); + serverPanic("futriix aborting for OUT OF MEMORY. Allocating %zu bytes!", + allocation_size); +} + +/* Callback for sdstemplate on proc-title-template. See redis.conf for + * supported variables. + */ +static sds redisProcTitleGetVariable(const sds varname, void *arg) +{ + if (!strcmp(varname, "title")) { + return sdsnew((const char*)arg); + } else if (!strcmp(varname, "listen-addr")) { + if (g_pserver->port || g_pserver->tls_port) + return sdscatprintf(sdsempty(), "%s:%u", + g_pserver->bindaddr_count ? g_pserver->bindaddr[0] : "*", + g_pserver->port ? g_pserver->port : g_pserver->tls_port); + else + return sdscatprintf(sdsempty(), "unixsocket:%s", g_pserver->unixsocket); + } else if (!strcmp(varname, "server-mode")) { + if (g_pserver->cluster_enabled) return sdsnew("[cluster]"); + else if (g_pserver->sentinel_mode) return sdsnew("[sentinel]"); + else return sdsempty(); + } else if (!strcmp(varname, "config-file")) { + return sdsnew(cserver.configfile ? cserver.configfile : "-"); + } else if (!strcmp(varname, "port")) { + return sdscatprintf(sdsempty(), "%u", g_pserver->port); + } else if (!strcmp(varname, "tls-port")) { + return sdscatprintf(sdsempty(), "%u", g_pserver->tls_port); + } else if (!strcmp(varname, "unixsocket")) { + return sdsnew(g_pserver->unixsocket); + } else + return NULL; /* Unknown variable name */ +} + +/* Expand the specified proc-title-template string and return a newly + * allocated sds, or NULL. */ +static sds expandProcTitleTemplate(const char *_template, const char *title) { + sds res = sdstemplate(_template, redisProcTitleGetVariable, (void *) title); + if (!res) + return NULL; + return sdstrim(res, " "); +} +/* Validate the specified template, returns 1 if valid or 0 otherwise. */ +int validateProcTitleTemplate(const char *_template) { + int ok = 1; + sds res = expandProcTitleTemplate(_template, ""); + if (!res) + return 0; + if (sdslen(res) == 0) ok = 0; + sdsfree(res); + return ok; +} + +int redisSetProcTitle(const char *title) { +#ifdef USE_SETPROCTITLE + if (!title) title = cserver.exec_argv[0]; + sds proc_title = expandProcTitleTemplate(cserver.proc_title_template, title); + if (!proc_title) return C_ERR; /* Not likely, proc_title_template is validated */ + + setproctitle("%s", proc_title); + sdsfree(proc_title); +#else + UNUSED(title); +#endif + + return C_OK; +} + +void redisSetCpuAffinity(const char *cpulist) { +#ifdef USE_SETCPUAFFINITY + setcpuaffinity(cpulist); +#else + UNUSED(cpulist); +#endif +} + +/* Send a notify message to systemd. Returns sd_notify return code which is + * a positive number on success. */ +int redisCommunicateSystemd(const char *sd_notify_msg) { +#ifdef HAVE_LIBSYSTEMD + int ret = sd_notify(0, sd_notify_msg); + + if (ret == 0) + serverLog(LL_WARNING, "systemd supervision error: NOTIFY_SOCKET not found!"); + else if (ret < 0) + serverLog(LL_WARNING, "systemd supervision error: sd_notify: %d", ret); + return ret; +#else + UNUSED(sd_notify_msg); + return 0; +#endif +} + +/* Attempt to set up upstart supervision. Returns 1 if successful. */ +static int redisSupervisedUpstart(void) { + const char *upstart_job = getenv("UPSTART_JOB"); + + if (!upstart_job) { + serverLog(LL_WARNING, + "upstart supervision requested, but UPSTART_JOB not found!"); + return 0; + } + + serverLog(LL_NOTICE, "supervised by upstart, will stop to signal readiness."); + raise(SIGSTOP); + unsetenv("UPSTART_JOB"); + return 1; +} + +/* Attempt to set up systemd supervision. Returns 1 if successful. */ +static int redisSupervisedSystemd(void) { +#ifndef HAVE_LIBSYSTEMD + serverLog(LL_WARNING, + "systemd supervision requested or auto-detected, but Redis is compiled without libsystemd support!"); + return 0; +#else + if (redisCommunicateSystemd("STATUS=Futriix is loading...\n") <= 0) + return 0; + serverLog(LL_NOTICE, + "Supervised by systemd. Please make sure you set appropriate values for TimeoutStartSec and TimeoutStopSec in your service unit."); + return 1; +#endif +} + +int redisIsSupervised(int mode) { + int ret = 0; + + if (mode == SUPERVISED_AUTODETECT) { + if (getenv("UPSTART_JOB")) { + serverLog(LL_VERBOSE, "Upstart supervision detected."); + mode = SUPERVISED_UPSTART; + } else if (getenv("NOTIFY_SOCKET")) { + serverLog(LL_VERBOSE, "Systemd supervision detected."); + mode = SUPERVISED_SYSTEMD; + } + } else if (mode == SUPERVISED_UPSTART) { + return redisSupervisedUpstart(); + } else if (mode == SUPERVISED_SYSTEMD) { + serverLog(LL_WARNING, + "WARNING supervised by systemd - you MUST set appropriate values for TimeoutStartSec and TimeoutStopSec in your service unit."); + return redisCommunicateSystemd("STATUS=futriix is loading...\n"); + } + + switch (mode) { + case SUPERVISED_UPSTART: + ret = redisSupervisedUpstart(); + break; + case SUPERVISED_SYSTEMD: + ret = redisSupervisedSystemd(); + break; + default: + break; + } + + if (ret) + cserver.supervised_mode = mode; + + return ret; +} + +uint64_t getMvccTstamp() +{ + uint64_t rval; + __atomic_load(&g_pserver->mvcc_tstamp, &rval, __ATOMIC_ACQUIRE); + return rval; +} + +void incrementMvccTstamp() +{ + uint64_t msPrev; + __atomic_load(&g_pserver->mvcc_tstamp, &msPrev, __ATOMIC_ACQUIRE); + msPrev >>= MVCC_MS_SHIFT; // convert to milliseconds + + long long mst; + __atomic_load(&g_pserver->mstime, &mst, __ATOMIC_ACQUIRE); + if (msPrev >= (uint64_t)mst) // we can be greater if the count overflows + { + __atomic_fetch_add(&g_pserver->mvcc_tstamp, 1, __ATOMIC_RELEASE); + } + else + { + uint64_t val = ((uint64_t)mst) << MVCC_MS_SHIFT; + __atomic_store(&g_pserver->mvcc_tstamp, &val, __ATOMIC_RELEASE); + } +} + +void OnTerminate() +{ + /* Any uncaught exception will call std::terminate(). + We want this handled like a segfault (printing the stack trace etc). + The easiest way to achieve that is to acutally segfault, so we assert + here. + */ + auto exception = std::current_exception(); + if (exception != nullptr) + { + try + { + std::rethrow_exception(exception); + } + catch (const char *szErr) + { + serverLog(LL_WARNING, "Crashing on uncaught exception: %s", szErr); + } + catch (std::string str) + { + serverLog(LL_WARNING, "Crashing on uncaught exception: %s", str.c_str()); + } + catch (...) + { + // NOP + } + } + + serverPanic("std::teminate() called"); +} + +void wakeTimeThread() { + updateCachedTime(); + aeThreadOffline(); + std::unique_lock lock(time_thread_lock); + aeThreadOnline(); + if (sleeping_threads >= cserver.cthreads) + time_thread_cv.notify_one(); + sleeping_threads--; + serverAssert(sleeping_threads >= 0); +} + +void *timeThreadMain(void*) { + timespec delay; + delay.tv_sec = 0; + delay.tv_nsec = 100; + int cycle_count = 0; + aeThreadOnline(); + while (true) { + { + aeThreadOffline(); + std::unique_lock lock(time_thread_lock); + aeThreadOnline(); + if (sleeping_threads >= cserver.cthreads) { + aeThreadOffline(); + time_thread_cv.wait(lock); + aeThreadOnline(); + cycle_count = 0; + } + } + updateCachedTime(); + if (cycle_count == MAX_CYCLES_TO_HOLD_FORK_LOCK) { + aeThreadOffline(); + aeThreadOnline(); + cycle_count = 0; + } +#if defined(__APPLE__) + nanosleep(&delay, nullptr); +#else + clock_nanosleep(CLOCK_MONOTONIC, 0, &delay, NULL); +#endif + cycle_count++; + } + aeThreadOffline(); +} + +void *workerThreadMain(void *parg) +{ + int iel = (int)((int64_t)parg); + serverLog(LL_NOTICE, "Thread %d alive.", iel); + serverTL = g_pserver->rgthreadvar+iel; // set the TLS threadsafe global + tlsInitThread(); + + if (iel != IDX_EVENT_LOOP_MAIN) + { + aeThreadOnline(); + aeAcquireLock(); + initNetworkingThread(iel, cserver.cthreads > 1); + aeReleaseLock(); + aeThreadOffline(); + } + + moduleAcquireGIL(true); // Normally afterSleep acquires this, but that won't be called on the first run + aeThreadOnline(); + aeEventLoop *el = g_pserver->rgthreadvar[iel].el; + try + { + aeMain(el); + } + catch (ShutdownException) + { + } + aeThreadOffline(); + moduleReleaseGIL(true); + serverAssert(!GlobalLocksAcquired()); + aeDeleteEventLoop(el); + + tlsCleanupThread(); + return NULL; +} + +static void validateConfiguration() +{ + updateMasterAuth(); + + if (cserver.cthreads > (int)std::thread::hardware_concurrency()) { + serverLog(LL_WARNING, "WARNING: server-threads is greater than this machine's core count. Truncating to %u threads", std::thread::hardware_concurrency()); + cserver.cthreads = (int)std::thread::hardware_concurrency(); + cserver.cthreads = std::max(cserver.cthreads, 1); // in case of any weird sign overflows + } + + if (g_pserver->enable_multimaster && !g_pserver->fActiveReplica) { + serverLog(LL_WARNING, "ERROR: Multi Master requires active replication to be enabled."); + serverLog(LL_WARNING, "\tfutriix will now exit. Please update your configuration file."); + exit(EXIT_FAILURE); + } + + g_pserver->repl_backlog_size = g_pserver->repl_backlog_config_size; // this is normally set in the update logic, but not on initial config +} + +int iAmMaster(void) { + return ((!g_pserver->cluster_enabled && (listLength(g_pserver->masters) == 0 || g_pserver->fActiveReplica)) || + (g_pserver->cluster_enabled && nodeIsMaster(g_pserver->cluster->myself))); +} + +bool initializeStorageProvider(const char **err); + +#ifdef REDIS_TEST +typedef int redisTestProc(int argc, char **argv, int accurate); +struct redisTest { + char *name; + redisTestProc *proc; + int failed; +} redisTests[] = { + {"ziplist", ziplistTest}, + {"quicklist", quicklistTest}, + {"intset", intsetTest}, + {"zipmap", zipmapTest}, + {"sha1test", sha1Test}, + {"util", utilTest}, + {"endianconv", endianconvTest}, + {"crc64", crc64Test}, + {"zmalloc", zmalloc_test}, + {"sds", sdsTest}, + {"dict", dictTest} +}; +redisTestProc *getTestProcByName(const char *name) { + int numtests = sizeof(redisTests)/sizeof(struct redisTest); + for (int j = 0; j < numtests; j++) { + if (!strcasecmp(name,redisTests[j].name)) { + return redisTests[j].proc; + } + } + return NULL; +} +#endif + +int main(int argc, char **argv) { + struct timeval tv; + int j; + char config_from_stdin = 0; + + std::set_terminate(OnTerminate); + + { + SymVer version; + version = parseVersion(KEYDB_REAL_VERSION); + serverAssert(version.major >= 0 && version.minor >= 0 && version.build >= 0); + serverAssert(compareVersion(&version) == VersionCompareResult::EqualVersion); + } + +#ifdef USE_MEMKIND + storage_init(NULL, 0); +#endif + +#ifdef REDIS_TEST + if (argc >= 3 && !strcasecmp(argv[1], "test")) { + int accurate = 0; + for (j = 3; j < argc; j++) { + if (!strcasecmp(argv[j], "--accurate")) { + accurate = 1; + } + } + + if (!strcasecmp(argv[2], "all")) { + int numtests = sizeof(redisTests)/sizeof(struct redisTest); + for (j = 0; j < numtests; j++) { + redisTests[j].failed = (redisTests[j].proc(argc,argv,accurate) != 0); + } + + /* Report tests result */ + int failed_num = 0; + for (j = 0; j < numtests; j++) { + if (redisTests[j].failed) { + failed_num++; + printf("[failed] Test - %s\n", redisTests[j].name); + } else { + printf("[ok] Test - %s\n", redisTests[j].name); + } + } + + printf("%d tests, %d passed, %d failed\n", numtests, + numtests-failed_num, failed_num); + + return failed_num == 0 ? 0 : 1; + } else { + redisTestProc *proc = getTestProcByName(argv[2]); + if (!proc) return -1; /* test not found */ + return proc(argc,argv,accurate); + } + + return 0; + } +#endif + + /* We need to initialize our libraries, and the server configuration. */ +#ifdef INIT_SETPROCTITLE_REPLACEMENT + spt_init(argc, argv); +#endif + setlocale(LC_COLLATE,""); + tzset(); /* Populates 'timezone' global. */ + zmalloc_set_oom_handler(redisOutOfMemoryHandler); + srand(time(NULL)^getpid()); + srandom(time(NULL)^getpid()); + gettimeofday(&tv,NULL); + init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid()); + crc64_init(); + + /* Store umask value. Because umask(2) only offers a set-and-get API we have + * to reset it and restore it back. We do this early to avoid a potential + * race condition with threads that could be creating files or directories. + */ + umask(g_pserver->umask = umask(0777)); + + serverAssert(g_pserver->repl_batch_offStart < 0); + + uint8_t hashseed[16]; + getRandomHexChars((char*)hashseed,sizeof(hashseed)); + dictSetHashFunctionSeed(hashseed); + g_pserver->sentinel_mode = checkForSentinelMode(argc,argv); + initServerConfig(); + serverTL = &g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN]; + aeThreadOnline(); + aeAcquireLock(); // We own the lock on boot + ACLInit(); /* The ACL subsystem must be initialized ASAP because the + basic networking code and client creation depends on it. */ + moduleInitModulesSystem(); + tlsInit(); + + /* Store the executable path and arguments in a safe place in order + * to be able to restart the server later. */ + cserver.executable = getAbsolutePath(argv[0]); + cserver.exec_argv = (char**)zmalloc(sizeof(char*)*(argc+1), MALLOC_LOCAL); + cserver.exec_argv[argc] = NULL; + for (j = 0; j < argc; j++) cserver.exec_argv[j] = zstrdup(argv[j]); + + /* We need to init sentinel right now as parsing the configuration file + * in sentinel mode will have the effect of populating the sentinel + * data structures with master nodes to monitor. */ + if (g_pserver->sentinel_mode) { + initSentinelConfig(); + initSentinel(); + } + + /* Check if we need to start in keydb-check-rdb/aof mode. We just execute + * the program main. However the program is part of the Redis executable + * so that we can easily execute an RDB check on loading errors. */ + if (strstr(argv[0],"keydb-check-rdb") != NULL) + redis_check_rdb_main(argc,(const char**)argv,NULL); + else if (strstr(argv[0],"keydb-check-aof") != NULL) + redis_check_aof_main(argc,argv); + + if (argc >= 2) { + j = 1; /* First option to parse in argv[] */ + sds options = sdsempty(); + + /* Handle special options --help and --version */ + if (strcmp(argv[1], "-v") == 0 || + strcmp(argv[1], "--version") == 0) version(); + if (strcmp(argv[1], "--help") == 0 || + strcmp(argv[1], "-h") == 0) usage(); + if (strcmp(argv[1], "--test-memory") == 0) { + if (argc == 3) { + memtest(atoi(argv[2]),50); + exit(0); + } else { + fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n"); + fprintf(stderr,"Example: ./futriix-server --test-memory 4096\n\n"); + exit(1); + } + } + /* Parse command line options + * Precedence wise, File, stdin, explicit options -- last config is the one that matters. + * + * First argument is the config file name? */ + if (argv[1][0] != '-') { + /* Replace the config file in g_pserver->exec_argv with its absolute path. */ + cserver.configfile = getAbsolutePath(argv[1]); + zfree(cserver.exec_argv[1]); + cserver.exec_argv[1] = zstrdup(cserver.configfile); + j = 2; // Skip this arg when parsing options + } + while(j < argc) { + /* Either first or last argument - Should we read config from stdin? */ + if (argv[j][0] == '-' && argv[j][1] == '\0' && (j == 1 || j == argc-1)) { + config_from_stdin = 1; + } + /* All the other options are parsed and conceptually appended to the + * configuration file. For instance --port 6380 will generate the + * string "port 6380\n" to be parsed after the actual config file + * and stdin input are parsed (if they exist). */ + else if (argv[j][0] == '-' && argv[j][1] == '-') { + /* Option name */ + if (sdslen(options)) options = sdscat(options,"\n"); + options = sdscat(options,argv[j]+2); + options = sdscat(options," "); + } else { + /* Option argument */ + options = sdscatrepr(options,argv[j],strlen(argv[j])); + options = sdscat(options," "); + } + j++; + } + + loadServerConfig(cserver.configfile, config_from_stdin, options); + if (g_pserver->sentinel_mode) loadSentinelConfigFromQueue(); + sdsfree(options); + } + + if (g_pserver->syslog_enabled) { + openlog(g_pserver->syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT, + g_pserver->syslog_facility); + } + + if (g_pserver->sentinel_mode) sentinelCheckConfigFile(); + + cserver.supervised = redisIsSupervised(cserver.supervised_mode); + int background = cserver.daemonize && !cserver.supervised; + if (background) daemonize(); + + serverLog(LL_WARNING, "oO0OoO0OoO0Oo Futriix is starting oO0OoO0OoO0Oo"); + serverLog(LL_WARNING, + "Futriix version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started", + KEYDB_REAL_VERSION, + (sizeof(long) == 8) ? 64 : 32, + redisGitSHA1(), + strtol(redisGitDirty(),NULL,10) > 0, + (int)getpid()); + + if (argc == 1) { + serverLog(LL_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/futriix.conf", argv[0]); + } else { + serverLog(LL_WARNING, "Configuration loaded"); + } + + validateConfiguration(); + + if (!g_pserver->sentinel_mode) { + #ifdef __linux__ + linuxMemoryWarnings(); + #if defined (__arm64__) + int ret; + if ((ret = linuxMadvFreeForkBugCheck())) { + if (ret == 1) + serverLog(LL_WARNING,"WARNING Your kernel has a bug that could lead to data corruption during background save. " + "Please upgrade to the latest stable kernel."); + else + serverLog(LL_WARNING, "Failed to test the kernel for a bug that could lead to data corruption during background save. " + "Your system could be affected, please report this error."); + if (!checkIgnoreWarning("ARM64-COW-BUG")) { + serverLog(LL_WARNING,"KeyDB will now exit to prevent data corruption. " + "Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG"); + exit(1); + } + } + #endif /* __arm64__ */ + #endif /* __linux__ */ + } + + + const char *err; + if (!initializeStorageProvider(&err)) + { + serverLog(LL_WARNING, "Failed to initialize storage provider: %s",err); + exit(EXIT_FAILURE); + } + + for (int iel = 0; iel < cserver.cthreads; ++iel) + { + initServerThread(g_pserver->rgthreadvar+iel, iel == IDX_EVENT_LOOP_MAIN); + } + + initServerThread(&g_pserver->modulethreadvar, false); + readOOMScoreAdj(); + initServer(); + initNetworking(cserver.cthreads > 1 /* fReusePort */); + + if (background || cserver.pidfile) createPidFile(); + if (cserver.set_proc_title) redisSetProcTitle(NULL); + redisAsciiArt(); + checkTcpBacklogSettings(); + + if (!g_pserver->sentinel_mode) { + /* Things not needed when running in Sentinel mode. */ + serverLog(LL_WARNING,"Server initialized"); + moduleInitModulesSystemLast(); + moduleLoadFromQueue(); + ACLLoadUsersAtStartup(); + + // special case of FUZZING load from stdin then quit + if (argc > 1 && strstr(argv[1],"rdbfuzz-mode") != NULL) + { + zmalloc_set_oom_handler(fuzzOutOfMemoryHandler); +#ifdef __AFL_HAVE_MANUAL_CONTROL + __AFL_INIT(); +#endif + rio rdb; + rdbSaveInfo rsi; + startLoadingFile(stdin, (char*)"stdin", 0); + rioInitWithFile(&rdb,stdin); + rdbLoadRio(&rdb,0,&rsi); + stopLoading(true); + return EXIT_SUCCESS; + } + + InitServerLast(); + + try { + loadDataFromDisk(); + } catch (ShutdownException) { + _Exit(EXIT_SUCCESS); + } + + if (g_pserver->cluster_enabled) { + if (verifyClusterConfigWithData() == C_ERR) { + serverLog(LL_WARNING, + "You can't have keys in a DB different than DB 0 when in " + "Cluster mode. Exiting."); + exit(1); + } + } + if (g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].ipfd.count > 0 && g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].tlsfd.count > 0) + serverLog(LL_NOTICE,"Ready to accept connections"); + if (g_pserver->sofd > 0) + serverLog(LL_NOTICE,"The server is now ready to accept connections at %s", g_pserver->unixsocket); + if (cserver.supervised_mode == SUPERVISED_SYSTEMD) { + if (!listLength(g_pserver->masters)) { + redisCommunicateSystemd("STATUS=Ready to accept connections\n"); + } else { + redisCommunicateSystemd("STATUS=Ready to accept connections in read-only mode. Waiting for MASTER <-> REPLICA sync\n"); + } + redisCommunicateSystemd("READY=1\n"); + } + } else { + ACLLoadUsersAtStartup(); + InitServerLast(); + sentinelIsRunning(); + if (cserver.supervised_mode == SUPERVISED_SYSTEMD) { + redisCommunicateSystemd("STATUS=Ready to accept connections\n"); + redisCommunicateSystemd("READY=1\n"); + } + } + + if (g_pserver->rdb_filename == nullptr) + { + if (g_pserver->rdb_s3bucketpath == nullptr) + g_pserver->rdb_filename = zstrdup(CONFIG_DEFAULT_RDB_FILENAME); + else + g_pserver->repl_diskless_sync = TRUE; + } + + if (cserver.cthreads > 4) { + serverLog(LL_WARNING, "Warning: server-threads is set to %d. This is above the maximum recommend value of 4, please ensure you've verified this is actually faster on your machine.", cserver.cthreads); + } + + /* Warning the user about suspicious maxmemory setting. */ + if (g_pserver->maxmemory > 0 && g_pserver->maxmemory < 1024*1024) { + serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", g_pserver->maxmemory); + } + + redisSetCpuAffinity(g_pserver->server_cpulist); + aeReleaseLock(); //Finally we can dump the lock + aeThreadOffline(); + moduleReleaseGIL(true); + + setOOMScoreAdj(-1); + serverAssert(cserver.cthreads > 0 && cserver.cthreads <= MAX_EVENT_LOOPS); + + pthread_create(&cserver.time_thread_id, nullptr, timeThreadMain, nullptr); + if (cserver.time_thread_priority) { + struct sched_param time_thread_priority; + time_thread_priority.sched_priority = sched_get_priority_max(SCHED_FIFO); + pthread_setschedparam(cserver.time_thread_id, SCHED_FIFO, &time_thread_priority); + } + + pthread_attr_t tattr; + pthread_attr_init(&tattr); + pthread_attr_setstacksize(&tattr, 1 << 23); // 8 MB + for (int iel = 0; iel < cserver.cthreads; ++iel) + { + pthread_create(g_pserver->rgthread + iel, &tattr, workerThreadMain, (void*)((int64_t)iel)); + if (cserver.fThreadAffinity) + { +#ifdef __linux__ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(iel + cserver.threadAffinityOffset, &cpuset); + if (pthread_setaffinity_np(g_pserver->rgthread[iel], sizeof(cpu_set_t), &cpuset) == 0) + { + serverLog(LL_NOTICE, "Binding thread %d to cpu %d", iel, iel + cserver.threadAffinityOffset + 1); + } +#else + serverLog(LL_WARNING, "CPU pinning not available on this platform"); +#endif + } + } + + /* Block SIGALRM from this thread, it should only be received on a server thread */ + sigset_t sigset; + sigemptyset(&sigset); + sigaddset(&sigset, SIGALRM); + pthread_sigmask(SIG_BLOCK, &sigset, nullptr); + + /* The main thread sleeps until all the workers are done. + this is so that all worker threads are orthogonal in their startup/shutdown */ + void *pvRet; + for (int iel = 0; iel < cserver.cthreads; ++iel) + pthread_join(g_pserver->rgthread[iel], &pvRet); + + /* free our databases */ + bool fLockAcquired = aeTryAcquireLock(false); + g_pserver->shutdown_asap = true; // flag that we're in shutdown + if (!fLockAcquired) + g_fInCrash = true; // We don't actually crash right away, because we want to sync any storage providers + + saveMasterStatusToStorage(true); + for (int idb = 0; idb < cserver.dbnum; ++idb) { + g_pserver->db[idb]->storageProviderDelete(); + } + delete g_pserver->metadataDb; + + // If we couldn't acquire the global lock it means something wasn't shutdown and we'll probably deadlock + serverAssert(fLockAcquired); + + g_pserver->garbageCollector.shutdown(); + delete g_pserver->m_pstorageFactory; + + // Don't return because we don't want to run any global dtors + _Exit(EXIT_SUCCESS); + return 0; // Ensure we're well formed even though this won't get hit +} + +/* The End */