From 67065d00a16a9170658c5fe866ea1ef1823746da Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 12 Jan 2018 11:06:24 +0100 Subject: [PATCH 01/66] Cluster Manager mode --- src/redis-cli.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 372d02d97..59abd571e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -65,6 +65,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -77,6 +78,16 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; +/* Cluster Manager command info */ +struct clusterManagerCommand { + char *name; + int argc; + char **argv; + int flags; + int replicas; +}; + + static redisContext *context; static struct config { char *hostip; @@ -119,8 +130,29 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + struct clusterManagerCommand cluster_manager_command; } config; +/* Cluster Manager commands. */ +typedef int clusterManagerCommandProc(int argc, char **argv); +static struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; +}; + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("CLUSTER: create\n"); + printf("Arguments: %d\n", argc); + printf("Replicas: %d\n", config.cluster_manager_command.replicas); + fprintf(stderr, "Not implemented yet!\n"); + return 0; +} + +struct clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2} +}; + /* User preferences. */ static struct pref { int hints; @@ -1061,6 +1093,13 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + struct clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; +} + static int parseOptions(int argc, char **argv) { int i; @@ -1146,6 +1185,18 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--cluster") && !lastarg) { + if (CLUSTER_MANAGER_MODE()) usage(); + char *cmd = argv[++i]; + int j = i; + for (; j < argc; j++) if (argv[j][0] == '-') break; + j--; + createClusterManagerCommand(cmd, j - i, argv + i); + i = j; + } else if (!strcmp(argv[i],"--cluster") && lastarg) { + usage(); + } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { + config.cluster_manager_command.replicas = atoi(argv[++i]); } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1243,9 +1294,13 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" +" --cluster [args...]\n" +" Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" +"Cluster Manager Commands:\n" +"\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" " redis-cli get mypasswd\n" @@ -1569,6 +1624,43 @@ static int evalMode(int argc, char **argv) { return retval; } +/*------------------------------------------------------------------------------ + * Cluster Manager mode + *--------------------------------------------------------------------------- */ + +static clusterManagerCommandProc *validateClusterManagerCommand(void) { + int i, commands_count = sizeof(clusterManagerCommands) / + sizeof(struct clusterManagerCommandDef); + clusterManagerCommandProc *proc = NULL; + char *cmdname = config.cluster_manager_command.name; + int argc = config.cluster_manager_command.argc; + for (i = 0; i < commands_count; i++) { + struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + if (!strcmp(cmddef.name, cmdname)) { + if ((cmddef.arity > 0 && argc != cmddef.arity) || + (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { + fprintf(stderr, "[ERR] Wrong number of arguments for " + "specified --cluster sub command\n"); + return NULL; + } + proc = cmddef.proc; + } + } + if (!proc) fprintf(stderr, "Unknown --cluster subcommand\n"); + return proc; +} + +static void clusterManagerMode(clusterManagerCommandProc *proc) { + int argc = config.cluster_manager_command.argc; + char **argv = config.cluster_manager_command.argv; + if (!proc(argc, argv)) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + exit(0); +} + /*------------------------------------------------------------------------------ * Latency and latency history modes *--------------------------------------------------------------------------- */ @@ -2861,7 +2953,11 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; - + config.cluster_manager_command.name = NULL; + config.cluster_manager_command.argc = 0; + config.cluster_manager_command.argv = NULL; + config.cluster_manager_command.flags = 0; + config.cluster_manager_command.replicas = 0; pref.hints = 1; spectrum_palette = spectrum_palette_color; @@ -2877,6 +2973,17 @@ int main(int argc, char **argv) { argc -= firstarg; argv += firstarg; + /* Cluster Manager mode */ + if (CLUSTER_MANAGER_MODE()) { + clusterManagerCommandProc *proc = validateClusterManagerCommand(); + if (!proc) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + clusterManagerMode(proc); + } + /* Latency mode */ if (config.latency_mode) { if (cliConnect(0) == REDIS_ERR) exit(1); From e674d6f6c0513e0ba48361a43c62e5439552c7f3 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 16:26:21 +0100 Subject: [PATCH 02/66] Cluster Manager: 'create', 'info' and 'check' commands --- src/Makefile | 2 +- src/redis-cli.c | 1297 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 1272 insertions(+), 27 deletions(-) diff --git a/src/Makefile b/src/Makefile index b896b1263..a5e0e231a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.c REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 59abd571e..ef917cca5 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -41,13 +41,15 @@ #include #include #include -#include +#include #include #include #include #include #include /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */ +#include "dict.h" +#include "adlist.h" #include "zmalloc.h" #include "linenoise.h" #include "help.h" @@ -65,7 +67,64 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) +#define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_COMMAND(n,...) \ + (reconnectingRedisCommand(n->context, __VA_ARGS__)) +#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) + +#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ + memset(n->slots, 0, sizeof(n->slots)); \ + n->slots_count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ + array->alloc = array->nodes; \ + array->len = alloc_len; \ + array->count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ + if (array->nodes > array->alloc) { \ + array->len = array->nodes - array->alloc; \ + array->nodes = array->alloc; \ + array->count = 0; \ + int i = 0; \ + for(; i < array->len; i++) { \ + if (array->nodes[i] != NULL) array->count++;\ + } \ + } \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) + +#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ + assert(array->nodes < (array->nodes + array->len)); \ + if (*array->nodes != NULL) array->count--; \ + nodeptr = *array->nodes; \ + array->nodes++; \ + array->len--; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ + assert(array->nodes < (array->nodes + array->len)); \ + assert(nodeptr != NULL); \ + array->nodes[array->count++] = nodeptr; \ +} while(0) + +#define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ + fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + +#define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 +#define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_FLAG_FRIEND 1 << 2 +#define CLUSTER_MANAGER_FLAG_NOADDR 1 << 3 +#define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 +#define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -79,13 +138,13 @@ int *spectrum_palette; int spectrum_palette_size; /* Cluster Manager command info */ -struct clusterManagerCommand { +typedef struct clusterManagerCommand { char *name; int argc; char **argv; int flags; int replicas; -}; +} clusterManagerCommand; static redisContext *context; @@ -130,28 +189,70 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; - struct clusterManagerCommand cluster_manager_command; + clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager commands. */ +/* Cluster Manager */ + +static struct clusterManager { + list *nodes; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; + int dirty; + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + list *friends; +} clusterManagerNode; + +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; + clusterManagerNode **alloc; + int len; + int count; +} clusterManagerNodeArray; + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); typedef int clusterManagerCommandProc(int argc, char **argv); -static struct clusterManagerCommandDef { +typedef struct clusterManagerCommandDef { char *name; clusterManagerCommandProc *proc; int arity; -}; + char *args; + char *options; +} clusterManagerCommandDef; +static int clusterManagerIsConfigConsistent(void); -static int clusterManagerCommandCreate(int argc, char **argv) { - printf("CLUSTER: create\n"); - printf("Arguments: %d\n", argc); - printf("Replicas: %d\n", config.cluster_manager_command.replicas); - fprintf(stderr, "Not implemented yet!\n"); - return 0; -} +/* Cluster Manager commands. */ -struct clusterManagerCommandDef clusterManagerCommands[] = { - {"create", clusterManagerCommandCreate, -2} -}; +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ static struct pref { @@ -165,6 +266,9 @@ char *redisGitSHA1(void); char *redisGitDirty(void); static int cliConnect(int force); +static char *getInfoField(char *info, char *field); +static long getLongInfoField(char *info, char *field); + /*------------------------------------------------------------------------------ * Utility functions *--------------------------------------------------------------------------- */ @@ -317,6 +421,36 @@ static void parseRedisUri(const char *uri) { config.dbnum = atoi(curr); } +static uint64_t dictSdsHash(const void *key) { + return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + int l1,l2; + DICT_NOTUSED(privdata); + + l1 = sdslen((sds)key1); + l2 = sdslen((sds)key2); + if (l1 != l2) return 0; + return memcmp(key1, key2, l1) == 0; +} + +static void dictSdsDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + sdsfree(val); +} + +/* _serverAssert is needed by dict */ +void _serverAssert(const char *estr, const char *file, int line) { + fprintf(stderr, "=== ASSERTION FAILED ==="); + fprintf(stderr, "==> %s:%d '%s' is not true",file,line,estr); + *((char*)-1) = 'x'; +} + /*------------------------------------------------------------------------------ * Help functions *--------------------------------------------------------------------------- */ @@ -1094,7 +1228,7 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. *--------------------------------------------------------------------------- */ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - struct clusterManagerCommand *cmd = &config.cluster_manager_command; + clusterManagerCommand *cmd = &config.cluster_manager_command; cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; @@ -1191,7 +1325,7 @@ static int parseOptions(int argc, char **argv) { int j = i; for (; j < argc; j++) if (argv[j][0] == '-') break; j--; - createClusterManagerCommand(cmd, j - i, argv + i); + createClusterManagerCommand(cmd, j - i, argv + i + 1); i = j; } else if (!strcmp(argv[i],"--cluster") && lastarg) { usage(); @@ -1300,6 +1434,7 @@ static void usage(void) { " --version Output version and exit.\n" "\n" "Cluster Manager Commands:\n" +" Use --cluster help to list all available cluster manager commands.\n" "\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" @@ -1628,14 +1763,22 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", + "cluster-replicas"}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"help", clusterManagerCommandHelp, 0, NULL, NULL} +}; + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / - sizeof(struct clusterManagerCommandDef); + sizeof(clusterManagerCommandDef); clusterManagerCommandProc *proc = NULL; char *cmdname = config.cluster_manager_command.name; int argc = config.cluster_manager_command.argc; for (i = 0; i < commands_count; i++) { - struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + clusterManagerCommandDef cmddef = clusterManagerCommands[i]; if (!strcmp(cmddef.name, cmdname)) { if ((cmddef.arity > 0 && argc != cmddef.arity) || (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { @@ -1650,15 +1793,1117 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +static void freeClusterManagerNode(clusterManagerNode *node) { + if (node->context != NULL) redisFree(node->context); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *fn = ln->value; + freeClusterManagerNode(fn); + } + listRelease(node->friends); + node->friends = NULL; + } + if (node->name != NULL) sdsfree(node->name); + if (node->replicate != NULL) sdsfree(node->replicate); + if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) + sdsfree(node->ip); + zfree(node); +} + +static void freeClusterManager(void) { + if (cluster_manager.nodes != NULL) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + freeClusterManagerNode(n); + } + listRelease(cluster_manager.nodes); + cluster_manager.nodes = NULL; + } +} + +static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { + clusterManagerNode *node = zmalloc(sizeof(*node)); + node->context = NULL; + node->name = NULL; + node->ip = ip; + node->port = port; + node->current_epoch = 0; + node->ping_sent = 0; + node->ping_recv = 0; + node->flags = 0; + node->replicate = NULL; + node->dirty = 0; + node->friends = NULL; + CLUSTER_MANAGER_RESET_SLOTS(node); + return node; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + freeReplyObject(info); + return 0; + } + int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); + freeReplyObject(info); + return is_cluster; +} + +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0, is_empty = 1; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + if (strstr(info->str, "db0:") != NULL) { + is_empty = 0; + goto result; + } + freeReplyObject(info); + info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + long known_nodes = getLongInfoField(info->str, "cluster_known_nodes"); + is_empty = (known_nodes == 1); +result: + freeReplyObject(info); + return is_empty; +} + +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len) +{ + assert(offending != NULL); + int score = 0, i, j; + int node_len = cluster_manager.nodes->len; + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + clusterManagerNode **offending_p = *offending; + dictType dtype = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ + }; + for (i = 0; i < ip_len; i++) { + clusterManagerNodeArray *node_array = &(ipnodes[i]); + dict *related = dictCreate(&dtype, NULL); + char *ip = NULL; + for (j = 0; j < node_array->len; j++) { + clusterManagerNode *node = node_array->nodes[j]; + if (node == NULL) continue; + if (!ip) ip = node->ip; + sds types; + if (!node->replicate) { + assert(node->name != NULL); + dictEntry *entry = dictFind(related, node->name); + if (entry) types = (sds) dictGetVal(entry); + else types = sdsempty(); + types = sdscatprintf(types, "m%s", types); + dictReplace(related, node->name, types); + } else { + dictEntry *entry = dictFind(related, node->replicate); + if (entry) types = (sds) dictGetVal(entry); + else { + types = sdsempty(); + dictAdd(related, node->replicate, types); + } + sdscat(types, "s"); + } + } + dictIterator *iter = dictGetIterator(related); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds types = (sds) dictGetVal(entry); + sds name = (sds) dictGetKey(entry); + int typeslen = sdslen(types); + if (typeslen < 2) continue; + if (types[0] == 'm') score += (10000 * (typeslen - 1)); + else score += (1 * typeslen); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate == NULL) continue; + if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { + *(offending_p++) = n; + break; + } + } + } + if (offending_len != NULL) *offending_len = offending_p - *offending; + dictReleaseIterator(iter); + dictRelease(related); + } + return score; +} + +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len) +{ + clusterManagerNode **offenders = NULL, **aux; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + if (score == 0) goto cleanup; + printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + int node_len = cluster_manager.nodes->len; + int maxiter = 500 * node_len; + srand(time(NULL)); + while (maxiter > 0) { + int offending_len = 0; + if (offenders != NULL) { + zfree(offenders); + offenders = NULL; + } + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + &offending_len); + if (score == 0) break; + int rand_idx = rand() % offending_len; + clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode **other_replicas = zcalloc((node_len - 1) * + sizeof(*other_replicas)); + int other_replicas_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n != first && n->replicate != NULL) + other_replicas[other_replicas_count++] = n; + } + if (other_replicas_count == 0) { + zfree(other_replicas); + break; + } + rand_idx = rand() % other_replicas_count; + second = other_replicas[rand_idx]; + char *first_master = first->replicate, + *second_master = second->replicate; + first->replicate = second_master, first->dirty = 1; + second->replicate = first_master, second->dirty = 1; + zfree(aux), aux = NULL; + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + &aux, NULL); + if (new_score > score) { + first->replicate = first_master; + second->replicate = second_master; + } + zfree(other_replicas); + maxiter--; + } + zfree(aux), aux = NULL; + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + char *msg; + if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + else if (score >= 10000) + msg = ("[WARNING] Some slaves are in the same host as their master"); + else + msg=("[WARNING] Some slaves of the same master are in the same host"); + printf("%s\n", msg); +cleanup: + zfree(offenders); + zfree(aux); +} + +static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { + sds slots = sdsempty(); + int first_range_idx = -1, last_slot_idx = -1, i; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int has_slot = node->slots[i]; + if (has_slot) { + if (first_range_idx == -1) { + if (sdslen(slots)) slots = sdscat(slots, ","); + first_range_idx = i; + slots = sdscatfmt(slots, "[%u", i); + } + last_slot_idx = i; + } else { + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) + slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + last_slot_idx = -1; + first_range_idx = -1; + } + } + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + return slots; +} + +static sds clusterManagerNodeInfo(clusterManagerNode *node) { + sds info = sdsempty(); + int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); + char *role = (is_master ? "M" : "S"); + sds slots = NULL; + if (node->dirty && node->replicate != NULL) + info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port); + else { + slots = clusterManagerNodeSlotsString(node); + info = sdscatfmt(info, "%s: %S %s:%u\n" + " slots:%S (%u slots) " + "", //TODO: flags string + role, node->name, node->ip, node->port, + slots, node->slots_count); + sdsfree(slots); + } + if (node->replicate != NULL) + info = sdscatfmt(info, "\n replicates %S", node->replicate); + //else if () {} //TODO: add replicas info + return info; +} + +static void clusterManagerShowNodes(void) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds info = clusterManagerNodeInfo(node); + printf("%s\n", info); + sdsfree(info); + } +} + +static void clusterManagerShowInfo(void) { + int masters = 0; + int keys = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) { + if (!node->name) continue; + int replicas = 0; + int dbsize = -1; + char name[9]; + memcpy(name, node->name, 8); + name[8] = '\0'; + listIter ri; + listNode *rn; + listRewind(cluster_manager.nodes, &ri); + while ((rn = listNext(&ri)) != NULL) { + clusterManagerNode *n = rn->value; + if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_SLAVE)) + continue; + if (n->replicate && !strcmp(n->replicate, node->name)) + replicas++; + } + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "DBSIZE"); + if (reply != NULL || reply->type == REDIS_REPLY_INTEGER) + dbsize = reply->integer; + if (dbsize < 0) { + char *err = ""; + if (reply != NULL && reply->type == REDIS_REPLY_ERROR) + err = reply->str; + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + if (reply != NULL) freeReplyObject(reply); + return; + }; + if (reply != NULL) freeReplyObject(reply); + printf("%s:%d (%s...) -> %d keys | %d slots | %d slaves.\n", + node->ip, node->port, name, dbsize, + node->slots_count, replicas); + masters++; + keys += dbsize; + } + } + printf("[OK] %d keys in %d masters.\n", keys, masters); + float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; + printf("%.2f keys per slot on average.\n", keys_per_slot); +} + +static int clusterManagerAddSlots(clusterManagerNode *node, char**err) +{ + redisReply *reply = NULL; + void *_reply = NULL; + int is_err = 0; + int argc; + sds *argv = NULL; + size_t *argvlen = NULL; + *err = NULL; + sds cmd = sdsnew("CLUSTER ADDSLOTS "); + int i, added = 0; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (node->slots[i]) { + char *fmt = (!last_slot ? "%u " : "%u"); + cmd = sdscatfmt(cmd, fmt, i); + added++; + } + } + if (!added) goto node_cmd_err; + argv = cliSplitArgs(cmd, &argc); + if (argc == 0 || argv == NULL) goto node_cmd_err; + argvlen = zmalloc(argc*sizeof(size_t)); + for (i = 0; i < argc; i++) + argvlen[i] = sdslen(argv[i]); + redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); + if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + reply = (redisReply*) _reply; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + sdsfree(cmd); + zfree(argvlen); + sdsfreesplitres(argv,argc); + freeReplyObject(reply); + return 1; +node_cmd_err: + sdsfree(cmd); + zfree(argvlen); + if (argv != NULL) sdsfreesplitres(argv,argc); + if (reply != NULL) freeReplyObject(reply); + return 0; +} + +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { + if (!node->dirty) return 0; + redisReply *reply = NULL; + int is_err = 0; + *err = NULL; + if (node->replicate != NULL) { + reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", + node->replicate); + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + } else { + int added = clusterManagerAddSlots(node, err); + if (!added || *err != NULL) goto node_cmd_err; + } + node->dirty = 0; + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static void clusterManagerWaitForClusterJoin(void) { + printf("Waiting for the cluster to join\n"); + while(!clusterManagerIsConfigConsistent()) { + printf("."); + fflush(stdout); + sleep(1); + } + printf("\n"); +} + +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err) +{ + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + int is_err = 0; + *err = NULL; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); + char *lines = reply->str, *p, *line; + while ((p = strstr(lines, "\n")) != NULL) { + *p = '\0'; + line = lines; + lines = p + 1; + char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, + *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, + *link_status = NULL; + int i = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + switch(i++){ + case 0: name = token; break; + case 1: addr = token; break; + case 2: flags = token; break; + case 3: master_id = token; break; + case 4: ping_sent = token; break; + case 5: ping_recv = token; break; + case 6: config_epoch = token; break; + case 7: link_status = token; break; + } + if (i == 8) break; // Slots + } + if (!flags) goto node_cmd_err; + int myself = (strstr(flags, "myself") != NULL); + if (strstr(flags, "noaddr") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_FAIL; + clusterManagerNode *currentNode = NULL; + if (myself) { + node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; + currentNode = node; + CLUSTER_MANAGER_RESET_SLOTS(node); + if (i == 8) { + int remaining = strlen(line); + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + remaining -= (p - line); + + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] == '[') { + //TODO: migrating/importing + } else if ((p = strchr(slotsdef, '-')) != NULL) { + int start, stop; + *p = '\0'; + start = atoi(slotsdef); + stop = atoi(p + 1); + node->slots_count += (stop - (start - 1)); + while (start <= stop) node->slots[start++] = 1; + } else if (p > slotsdef) { + node->slots[atoi(slotsdef)] = 1; + node->slots_count++; + } + } + } + node->dirty = 0; + } else if (!getfriends) { + if (!(node->flags & CLUSTER_MANAGER_FLAG_MYSELF)) continue; + else break; + } else { + if (addr == NULL) { + // TODO: find a better err message + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + *c = '\0'; + int port = atoi(++c); + currentNode = clusterManagerNewNode(sdsnew(addr), port); + currentNode->flags |= CLUSTER_MANAGER_FLAG_FRIEND; + if (node->friends == NULL) node->friends = listCreate(); + listAddNodeTail(node->friends, currentNode); + } + if (name != NULL) currentNode->name = sdsnew(name); + if (strstr(flags, "slave") != NULL) { + currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; + if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + } + if (config_epoch != NULL) + currentNode->current_epoch = atoll(config_epoch); + if (ping_sent != NULL) currentNode->ping_sent = atoll(ping_sent); + if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); + if (!getfriends && myself) break; + } + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { + if (node->context == NULL) + node->context = redisConnect(node->ip, node->port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, + node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; + char *e = NULL; + if (!clusterManagerNodeIsCluster(node, &e)) { + char *msg = (e ? e : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + if (e) zfree(e); + freeClusterManagerNode(node); + return 0; + } + e = NULL; + if (!clusterManagerNodeLoadInfo(node, opts, &e)) { + if (e) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, e); + zfree(e); + } + freeClusterManagerNode(node); + return 0; + } + cluster_manager.nodes = listCreate(); + listAddNodeTail(cluster_manager.nodes, node); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *friend = ln->value; + if (!friend->ip || !friend->port) continue; + if (!friend->context) + friend->context = redisConnect(friend->ip, friend->port); + if (friend->context->err) continue; + e = NULL; + if (clusterManagerNodeLoadInfo(friend, 0, &e)) { + if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | + CLUSTER_MANAGER_FLAG_DISCONNECT | + CLUSTER_MANAGER_FLAG_FAIL)) continue; + listAddNodeTail(cluster_manager.nodes, friend); + + } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + } + listRelease(node->friends); + node->friends = NULL; + } + return 1; +} + +int clusterManagerSlotCompare(const void *slot1, const void *slot2) { + const char **i1 = (const char **)slot1; + const char **i2 = (const char **)slot2; + return strcmp(*i1, *i2); +} + +static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { + sds signature = NULL; + int node_count = 0, i = 0, name_len = 0; + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + if (reply == NULL || reply->type == REDIS_REPLY_ERROR) + goto cleanup; + char *lines = reply->str, *p, *line; + char **node_configs = NULL; + while ((p = strstr(lines, "\n")) != NULL) { + i = 0; + *p = '\0'; + line = lines; + lines = p + 1; + char *nodename = NULL; + int tot_size = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + if (i == 0) { + nodename = token; + tot_size = p - token; + name_len = tot_size; + } else if (i == 8) break; + i++; + } + if (i != 8) continue; + if (nodename == NULL) continue; + int remaining = strlen(line); + if (remaining == 0) continue; + char **slots = NULL; + int c = 0; + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + int size = (p - line); + remaining -= size; + tot_size += size; + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] != '[') { + c++; + slots = zrealloc(slots, (c * sizeof(char *))); + slots[c - 1] = slotsdef; + } + } + if (c > 0) { + if (c > 1) + qsort(slots, c, sizeof(char *), clusterManagerSlotCompare); + node_count++; + node_configs = + zrealloc(node_configs, (node_count * sizeof(char *))); + tot_size += (sizeof(char) * (c - 1)); + char *cfg = zmalloc((sizeof(char) * tot_size) + 1); + memcpy(cfg, nodename, name_len); + char *sp = cfg + name_len; + *(sp++) = ':'; + for (i = 0; i < c; i++) { + if (i > 0) *(sp++) = '|'; + int slen = strlen(slots[i]); + memcpy(sp, slots[i], slen); + sp += slen; + } + *(sp++) = '\0'; + node_configs[node_count - 1] = cfg; + } + zfree(slots); + } + if (node_count > 0) { + if (node_count > 1) { + qsort(node_configs, node_count, sizeof(char *), + clusterManagerSlotCompare); + } + signature = sdsempty(); + for (i = 0; i < node_count; i++) { + if (i > 0) signature = sdscatprintf(signature, "%c", '|'); + signature = sdscatfmt(signature, "%s", node_configs[i]); + } + } +cleanup: + if (reply != NULL) freeReplyObject(reply); + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + return signature; +} + +static int clusterManagerIsConfigConsistent(void) { + if (cluster_manager.nodes == NULL) return 0; + int consistent = 0; + sds first_cfg = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds cfg = clusterManagerGetConfigSignature(node); + if (cfg == NULL) { + consistent = 0; + break; + } + if (first_cfg == NULL) first_cfg = cfg; + else { + consistent = !sdscmp(first_cfg, cfg); + sdsfree(cfg); + if (!consistent) break; + } + } + if (first_cfg != NULL) sdsfree(first_cfg); + return consistent; +} + +static void clusterManagerCheckCluster(int quiet) { + listNode *ln = listFirst(cluster_manager.nodes); + if (!ln) return; + clusterManagerNode *node = ln->value; + printf(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); + if (!quiet) clusterManagerShowNodes(); + if (!clusterManagerIsConfigConsistent()) + printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array + else + printf("[OK] All nodes agree about slots configuration.\n"); + //TODO:check_open_slots + //TODO:check_slots_coverage +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; - if (!proc(argc, argv)) { - sdsfree(config.hostip); - sdsfree(config.mb_delim); - exit(1); - } + cluster_manager.nodes = NULL; + if (!proc(argc, argv)) goto cluster_manager_err; + freeClusterManager(); exit(0); +cluster_manager_err: + freeClusterManager(); + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); +} + +/* Cluster Manager Commands */ + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("Cluster Manager: Creating Cluster\n"); + int i, j; + cluster_manager.nodes = listCreate(); + for (i = 0; i < argc; i++) { + char *addr = argv[i]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Invalid address format: %s\n", addr); + return 0; + } + *c = '\0'; + char *ip = addr; + int port = atoi(++c); + clusterManagerNode *node = clusterManagerNewNode(ip, port); + node->context = redisConnect(ip, port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + char *err = NULL; + if (!clusterManagerNodeIsCluster(node, &err)) { + char *msg = (err ? err : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeLoadInfo(node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeIsEmpty(node, &err)) { + char *msg; + if (err) msg = err; + else { + msg = " is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + listAddNodeTail(cluster_manager.nodes, node); + } + int node_len = cluster_manager.nodes->len; + int replicas = config.cluster_manager_command.replicas; + int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); + if (masters_count < 3) { + fprintf(stderr, + "*** ERROR: Invalid configuration for cluster creation.\n"); + fprintf(stderr, + "*** Redis Cluster requires at least 3 master nodes.\n"); + fprintf(stderr, + "*** This is not possible with %d nodes and %d replicas per node.", + node_len, replicas); + fprintf(stderr, "\n*** At least %d nodes are required.\n", + (3 * (replicas + 1))); + return 0; + } + printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + int interleaved_len = 0, ips_len = 0; + clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); + char **ips = zcalloc(node_len * sizeof(char*)); + clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + int found = 0; + for (i = 0; i < ips_len; i++) { + char *ip = ips[i]; + if (!strcmp(ip, n->ip)) { + found = 1; + break; + } + } + if (!found) { + ips[ips_len++] = n->ip; + } + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->nodes == NULL) + CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); + CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + } + while (interleaved_len < node_len) { + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->count > 0) { + clusterManagerNode *n; + CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + interleaved[interleaved_len++] = n; + } + } + } + clusterManagerNode **masters = interleaved; + interleaved += masters_count; + interleaved_len -= masters_count; + float slots_per_node = CLUSTER_MANAGER_SLOTS / (float) masters_count; + long first = 0; + float cursor = 0.0f; + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + long last = lround(cursor + slots_per_node - 1); + if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1)) + last = CLUSTER_MANAGER_SLOTS - 1; + if (last < first) last = first; + printf("Master[%d] -> Slots %lu - %lu\n", i, first, last); + master->slots_count = 0; + for (j = first; j <= last; j++) { + master->slots[j] = 1; + master->slots_count++; + } + master->dirty = 1; + first = last + 1; + cursor += slots_per_node; + } + + int assign_unused = 0, available_count = interleaved_len; +assign_replicas: + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + int assigned_replicas = 0; + while (assigned_replicas < replicas) { + if (available_count == 0) break; + clusterManagerNode *found = NULL, *slave = NULL; + int firstNodeIdx = -1; + for (j = 0; j < interleaved_len; j++) { + clusterManagerNode *n = interleaved[j]; + if (n == NULL) continue; + if (strcmp(n->ip, master->ip)) { + found = n; + interleaved[j] = NULL; + break; + } + if (firstNodeIdx < 0) firstNodeIdx = j; + } + if (found) slave = found; + else if (firstNodeIdx >= 0) { + slave = interleaved[firstNodeIdx]; + interleaved_len -= (interleaved - (interleaved + firstNodeIdx)); + interleaved += (firstNodeIdx + 1); + } + if (slave != NULL) { + assigned_replicas++; + available_count--; + slave->replicate = sdsnew(master->name); + slave->dirty = 1; + } else break; + printf("Adding replica %s:%d to %s:%d\n", slave->ip, slave->port, + master->ip, master->port); + if (assign_unused) break; + } + } + if (!assign_unused && available_count > 0) { + assign_unused = 1; + printf("Adding extra replicas...\n"); + goto assign_replicas; + } + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + } + clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerShowNodes(); + printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread != 0 && !strcmp("yes", buf)) { + printf("\nFlushing configuration!\n"); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && node->dirty && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + printf(">>> Nodes configuration updated\n"); + printf(">>> Assign a different config epoch to each node\n"); + int config_epoch = 1; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, + "cluster set-config-epoch %d", + config_epoch++); + if (reply != NULL) freeReplyObject(reply); + } + printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerNode *first = NULL; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (first == NULL) { + first = node; + continue; + } + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", + first->ip, first->port); + if (reply != NULL) freeReplyObject(reply); + } + // Give one second for the join to start, in order to avoid that + // waiting for cluster join will find all the nodes agree about + // the config as they are still empty with unassigned slots. + sleep(1); + clusterManagerWaitForClusterJoin(); + // Useful for the replicas //TODO: create a function for this? + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!node->dirty) continue; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + // Reset Nodes + listRewind(cluster_manager.nodes, &li); + clusterManagerNode *first_node = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!first_node) first_node = node; + else freeClusterManagerNode(node); + } + listEmpty(cluster_manager.nodes); + if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + clusterManagerCheckCluster(0); + } + /* Free everything */ + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 1; +cmd_err: + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 0; +} + +static int clusterManagerCommandInfo(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandCheck(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + clusterManagerCheckCluster(0); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandHelp(int argc, char **argv) { + UNUSED(argc); + UNUSED(argv); + int commands_count = sizeof(clusterManagerCommands) / + sizeof(clusterManagerCommandDef); + int i = 0, j; + fprintf(stderr, "Cluster Manager Commands:\n"); + for (; i < commands_count; i++) { + clusterManagerCommandDef *def = &(clusterManagerCommands[i]); + int namelen = strlen(def->name), padlen = 15 - namelen; + fprintf(stderr, " %s", def->name); + for (j = 0; j < padlen; j++) fprintf(stderr, " "); + fprintf(stderr, "%s\n", (def->args ? def->args : "")); + //TODO: if (def->options) + } + return 0; } /*------------------------------------------------------------------------------ From 8d343fa60b9049d5fc2d514ea0d5002320143419 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 17:57:16 +0100 Subject: [PATCH 03/66] Added check for open slots (clusterManagerCheckCluster) --- src/redis-cli.c | 162 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 143 insertions(+), 19 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index ef917cca5..456751f58 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,6 +74,13 @@ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) +#define CLUSTER_MANAGER_ERROR(err) do { \ + if (cluster_manager.errors == NULL) \ + cluster_manager.errors = listCreate(); \ + listAddNodeTail(cluster_manager.errors, err); \ + fprintf(stderr, "%s\n", (char *) err); \ +} while(0) + #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ memset(n->slots, 0, sizeof(n->slots)); \ n->slots_count = 0; \ @@ -137,7 +144,14 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; -/* Cluster Manager command info */ +/* Dict Helpers */ + +static uint64_t dictSdsHash(const void *key); +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2); +static void dictSdsDestructor(void *privdata, void *val); + +/* Cluster Manager Command Info */ typedef struct clusterManagerCommand { char *name; int argc; @@ -196,6 +210,7 @@ static struct config { static struct clusterManager { list *nodes; + list *errors; } cluster_manager; typedef struct clusterManagerNode { @@ -212,6 +227,10 @@ typedef struct clusterManagerNode { uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; } clusterManagerNode; typedef struct clusterManagerNodeArray { @@ -221,6 +240,15 @@ typedef struct clusterManagerNodeArray { int count; } clusterManagerNodeArray; +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1810,13 +1838,22 @@ static void freeClusterManagerNode(clusterManagerNode *node) { if (node->replicate != NULL) sdsfree(node->replicate); if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) sdsfree(node->ip); + int i; + if (node->migrating != NULL) { + for (i = 0; i < node->migrating_count; i++) sdsfree(node->migrating[i]); + zfree(node->migrating); + } + if (node->importing != NULL) { + for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]); + zfree(node->importing); + } zfree(node); } static void freeClusterManager(void) { + listIter li; + listNode *ln; if (cluster_manager.nodes != NULL) { - listIter li; - listNode *ln; listRewind(cluster_manager.nodes,&li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; @@ -1825,9 +1862,18 @@ static void freeClusterManager(void) { listRelease(cluster_manager.nodes); cluster_manager.nodes = NULL; } + if (cluster_manager.errors != NULL) { + listRewind(cluster_manager.errors,&li); + while ((ln = listNext(&li)) != NULL) { + sds err = ln->value; + sdsfree(err); + } + listRelease(cluster_manager.errors); + cluster_manager.errors = NULL; + } } -static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { +static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNode *node = zmalloc(sizeof(*node)); node->context = NULL; node->name = NULL; @@ -1840,6 +1886,10 @@ static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { node->replicate = NULL; node->dirty = 0; node->friends = NULL; + node->migrating = NULL; + node->importing = NULL; + node->migrating_count = 0; + node->importing_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } @@ -1902,17 +1952,9 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int node_len = cluster_manager.nodes->len; *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); clusterManagerNode **offending_p = *offending; - dictType dtype = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ - }; for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); - dict *related = dictCreate(&dtype, NULL); + dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; for (j = 0; j < node_array->len; j++) { clusterManagerNode *node = node_array->nodes[j]; @@ -2291,7 +2333,32 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (remaining) line = p + 1; else line = p; if (slotsdef[0] == '[') { - //TODO: migrating/importing + slotsdef++; + if ((p = strstr(slotsdef, "->-"))) { // Migrating + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds dst = sdsnew(p); + node->migrating_count += 2; + node->migrating = zrealloc(node->migrating, + (node->migrating_count * sizeof(sds))); + node->migrating[node->migrating_count - 2] = + slot; + node->migrating[node->migrating_count - 1] = + dst; + } else if ((p = strstr(slotsdef, "-<-"))) {//Importing + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds src = sdsnew(p); + node->importing_count += 2; + node->importing = zrealloc(node->importing, + (node->importing_count * sizeof(sds))); + node->importing[node->importing_count - 2] = + slot; + node->importing[node->importing_count - 1] = + src; + } } else if ((p = strchr(slotsdef, '-')) != NULL) { int start, stop; *p = '\0'; @@ -2529,11 +2596,68 @@ static void clusterManagerCheckCluster(int quiet) { printf(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); if (!quiet) clusterManagerShowNodes(); - if (!clusterManagerIsConfigConsistent()) - printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array - else - printf("[OK] All nodes agree about slots configuration.\n"); - //TODO:check_open_slots + if (!clusterManagerIsConfigConsistent()) { + sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); + CLUSTER_MANAGER_ERROR(err); + } else printf("[OK] All nodes agree about slots configuration.\n"); + // Check open slots + listIter li; + listRewind(cluster_manager.nodes, &li); + int i; + dict *open_slots = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->migrating != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "migrating state ", + n->ip, + n->port); + for (i = 0; i < n->migrating_count; i += 2) { + sds slot = n->migrating[i]; + dictAdd(open_slots, slot, n->migrating[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + if (n->importing != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "importing state ", + n->ip, + n->port); + for (i = 0; i < n->importing_count; i += 2) { + sds slot = n->importing[i]; + dictAdd(open_slots, slot, n->importing[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + } + if (open_slots != NULL) { + dictIterator *iter = dictGetIterator(open_slots); + dictEntry *entry; + sds errstr = sdsnew("[WARNING] The following slots are open: "); + i = 0; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + char *fmt = (i++ > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + fprintf(stderr, "%s.\n", (char *) errstr); + sdsfree(errstr); + dictRelease(open_slots); + } //TODO:check_slots_coverage } From 7769aa0f98939df685348d28b3ddab87be09baa1 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 19:25:02 +0100 Subject: [PATCH 04/66] - Cluster Manager: fixed various memory leaks - Cluster Manager: fixed flags assignment in clusterManagerNodeLoadInfo --- src/redis-cli.c | 54 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 456751f58..4c30067b3 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2310,12 +2310,6 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (!flags) goto node_cmd_err; int myself = (strstr(flags, "myself") != NULL); - if (strstr(flags, "noaddr") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; - if (strstr(flags, "disconnected") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; - if (strstr(flags, "fail") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_FAIL; clusterManagerNode *currentNode = NULL; if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; @@ -2396,10 +2390,22 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (node->friends == NULL) node->friends = listCreate(); listAddNodeTail(node->friends, currentNode); } - if (name != NULL) currentNode->name = sdsnew(name); + if (name != NULL) { + if (currentNode->name) sdsfree(currentNode->name); + currentNode->name = sdsnew(name); + } + if (strstr(flags, "noaddr") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; if (strstr(flags, "slave") != NULL) { currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; - if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + if (master_id != NULL) { + if (currentNode->replicate) sdsfree(currentNode->replicate); + currentNode->replicate = sdsnew(master_id); + } } if (config_epoch != NULL) currentNode->current_epoch = atoll(config_epoch); @@ -2442,27 +2448,39 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { freeClusterManagerNode(node); return 0; } + listIter li; + listNode *ln; + if (cluster_manager.nodes != NULL) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) + freeClusterManagerNode((clusterManagerNode *) ln->value); + listRelease(cluster_manager.nodes); + } cluster_manager.nodes = listCreate(); listAddNodeTail(cluster_manager.nodes, node); if (node->friends != NULL) { - listIter li; - listNode *ln; listRewind(node->friends, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *friend = ln->value; - if (!friend->ip || !friend->port) continue; + if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) friend->context = redisConnect(friend->ip, friend->port); - if (friend->context->err) continue; + if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | CLUSTER_MANAGER_FLAG_DISCONNECT | - CLUSTER_MANAGER_FLAG_FAIL)) continue; + CLUSTER_MANAGER_FLAG_FAIL)) + goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); - - } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + } else { + fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + goto invalid_friend; + } + continue; +invalid_friend: + freeClusterManagerNode(friend); } listRelease(node->friends); node->friends = NULL; @@ -2601,6 +2619,7 @@ static void clusterManagerCheckCluster(int quiet) { CLUSTER_MANAGER_ERROR(err); } else printf("[OK] All nodes agree about slots configuration.\n"); // Check open slots + printf(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2836,6 +2855,7 @@ assign_replicas: if (slave != NULL) { assigned_replicas++; available_count--; + if (slave->replicate) sdsfree(slave->replicate); slave->replicate = sdsnew(master->name); slave->dirty = 1; } else break; @@ -2873,7 +2893,7 @@ assign_replicas: zfree(err); } goto cmd_err; - } + } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); printf(">>> Assign a different config epoch to each node\n"); From 6b13b265e402987c56756b532d234a7a36433ae3 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 17:43:36 +0100 Subject: [PATCH 05/66] Cluster Manager: slots coverage check. --- src/redis-cli.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4c30067b3..51eb137e8 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2607,6 +2607,24 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static int clusterManagerGetCoveredSlots(char *all_slots) { + if (cluster_manager.nodes == NULL) return 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + int totslots = 0, i; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + if (node->slots[i] && !all_slots[i]) { + all_slots[i] = 1; + totslots++; + } + } + } + return totslots; +} + static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; @@ -2677,7 +2695,19 @@ static void clusterManagerCheckCluster(int quiet) { sdsfree(errstr); dictRelease(open_slots); } - //TODO:check_slots_coverage + printf(">>> Check slots coverage...\n"); + char slots[CLUSTER_MANAGER_SLOTS]; + memset(slots, 0, CLUSTER_MANAGER_SLOTS); + int coverage = clusterManagerGetCoveredSlots(slots); + if (coverage == CLUSTER_MANAGER_SLOTS) + printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); + else { + sds err = sdsempty(); + err = sdscatprintf(err, "[ERR] Not all %d slots are " + "covered by nodes.\n", + CLUSTER_MANAGER_SLOTS); + CLUSTER_MANAGER_ERROR(err); + } } static void clusterManagerMode(clusterManagerCommandProc *proc) { From c6e8eae7aedc01b96aa591b6a8db374657605a5f Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 20:09:30 +0100 Subject: [PATCH 06/66] Cluster Manager: reply error catch for MEET command --- src/redis-cli.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 51eb137e8..b5c80a5e8 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2949,7 +2949,16 @@ assign_replicas: redisReply *reply = NULL; reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", first->ip, first->port); - if (reply != NULL) freeReplyObject(reply); + int is_err = 0; + if (reply != NULL) { + if ((is_err = reply->type == REDIS_REPLY_ERROR)) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, reply->str); + freeReplyObject(reply); + } else { + is_err = 1; + fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); + } + if (is_err) goto cmd_err; } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about From 8251a1b736c277ac3dcbfeb7107a571018c668e3 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 11:29:25 +0100 Subject: [PATCH 07/66] Cluster Manager: cluster is considered consistent if only one node has been found --- src/redis-cli.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b5c80a5e8..7128dd979 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2584,7 +2584,10 @@ cleanup: static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; - int consistent = 0; + int consistent = (listLength(cluster_manager.nodes) <= 1); + // If the Cluster has only one node, it's always consistent + // Does it make sense? + if (consistent) return 1; sds first_cfg = NULL; listIter li; listNode *ln; From bb958098f4213800d236569a487ea6a5fa6ed479 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 12:02:56 +0100 Subject: [PATCH 08/66] ClusterManager: added replicas count to clusterManagerNode --- src/redis-cli.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 7128dd979..de7ba2511 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -223,9 +223,11 @@ typedef struct clusterManagerNode { time_t ping_recv; int flags; sds replicate; + list replicas; int dirty; uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; + int replicas_count; list *friends; sds *migrating; sds *importing; @@ -250,6 +252,7 @@ static dictType clusterManagerDictType = { }; static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); @@ -265,6 +268,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); + typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { char *name; @@ -1890,10 +1894,31 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->importing = NULL; node->migrating_count = 0; node->importing_count = 0; + node->replicas_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } +static clusterManagerNode *clusterManagerNodeByName(const char *name) { + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && !sdscmp(n->name, lcname)) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); int is_err = 0; @@ -2119,7 +2144,9 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { } if (node->replicate != NULL) info = sdscatfmt(info, "\n replicates %S", node->replicate); - //else if () {} //TODO: add replicas info + else if (node->replicas_count) + info = sdscatfmt(info, "\n %U additional replica(s)", + node->replicas_count); return info; } @@ -2485,6 +2512,18 @@ invalid_friend: listRelease(node->friends); node->friends = NULL; } + // Count replicas for each node + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate != NULL) { + clusterManagerNode *master = clusterManagerNodeByName(n->replicate); + if (master == NULL) { + printf("*** WARNING: %s:%d claims to be slave of unknown " + "node ID %s.\n", n->ip, n->port, n->replicate); + } else master->replicas_count++; + } + } return 1; } From 273ba954854df74c80c974a4e1de6f4c1e1cec71 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 9 Feb 2018 13:02:37 +0100 Subject: [PATCH 09/66] Cluster Manager: CLUSTER_MANAGER_NODE_CONNECT macro --- src/redis-cli.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index de7ba2511..fd3bdf988 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -70,6 +70,8 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_NODE_CONNECT(n) \ + (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) @@ -2449,7 +2451,7 @@ node_cmd_err: static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) - node->context = redisConnect(node->ip, node->port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, @@ -2491,7 +2493,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { clusterManagerNode *friend = ln->value; if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) - friend->context = redisConnect(friend->ip, friend->port); + CLUSTER_MANAGER_NODE_CONNECT(friend); if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { @@ -2785,7 +2787,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *ip = addr; int port = atoi(++c); clusterManagerNode *node = clusterManagerNewNode(ip, port); - node->context = redisConnect(ip, port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); From 46465a79420aeb3d43d3056875f93447e05003c8 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 13 Feb 2018 12:00:06 +0100 Subject: [PATCH 10/66] Cluster Manager: 'call' command. --- src/redis-cli.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index fd3bdf988..308bd08c6 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -286,6 +286,7 @@ static int clusterManagerIsConfigConsistent(void); static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ @@ -1802,6 +1803,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { "cluster-replicas"}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"call", clusterManagerCommandCall, -2, + "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -2449,6 +2452,11 @@ node_cmd_err: return 0; } +/* Retrieves info about the cluster using argument 'node' as the starting + * point. All nodes will be loaded inside the cluster_manager.nodes list. + * Warning: if something goes wrong, it will free the starting node before + * returning 0. */ + static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -3115,6 +3123,56 @@ invalid_args: return 0; } +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0; + char *ip = NULL; + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + int i; + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else { + fprintf(stderr, + "Invalid arguments: first agrumnt must be host:port.\n"); + return 0; + } + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + argc--; + argv++; + size_t *argvlen = zmalloc(argc*sizeof(size_t)); + printf(">>> Calling"); + for (i = 0; i < argc; i++) { + argvlen[i] = strlen(argv[i]); + printf(" %s", argv[i]); + } + printf("\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (!n->context) CLUSTER_MANAGER_NODE_CONNECT(n); + redisReply *reply = NULL; + redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen); + int status = redisGetReply(n->context, (void **)(&reply)); + if (status != REDIS_OK || reply == NULL ) + printf("%s:%d: Failed!\n", n->ip, n->port); //TODO: better message? + else { + sds formatted_reply = cliFormatReplyTTY(reply, ""); + printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply); + sdsfree(formatted_reply); + } + if (reply != NULL) freeReplyObject(reply); + } + zfree(argvlen); + return 1; +} + static int clusterManagerCommandHelp(int argc, char **argv) { UNUSED(argc); UNUSED(argv); From b0e77e6afaaffc1c638c66b1360d63600a8a351d Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 17:54:46 +0100 Subject: [PATCH 11/66] Cluster Manager: improved cleanup/error handling in various functions --- src/redis-cli.c | 101 +++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 308bd08c6..280e6c9e3 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2220,7 +2220,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; int argc; sds *argv = NULL; size_t *argvlen = NULL; @@ -2235,39 +2235,44 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) added++; } } - if (!added) goto node_cmd_err; + if (!added) { + success = 0; + goto cleanup; + } argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) goto node_cmd_err; + if (argc == 0 || argv == NULL) { + success = 0; + goto cleanup; + } argvlen = zmalloc(argc*sizeof(size_t)); for (i = 0; i < argc; i++) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); - if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + if (redisGetReply(node->context, &_reply) != REDIS_OK) { + success = 1; + goto cleanup; + } reply = (redisReply*) _reply; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } - sdsfree(cmd); - zfree(argvlen); - sdsfreesplitres(argv,argc); - freeReplyObject(reply); - return 1; -node_cmd_err: +cleanup: sdsfree(cmd); zfree(argvlen); if (argv != NULL) sdsfreesplitres(argv,argc); if (reply != NULL) freeReplyObject(reply); - return 0; + return success; } static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", @@ -2277,18 +2282,20 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) goto node_cmd_err; + if (!added || *err != NULL) { + success = 0; + goto cleanup; + } } node->dirty = 0; - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply != NULL) freeReplyObject(reply); + return success; } static void clusterManagerWaitForClusterJoin(void) { @@ -2305,14 +2312,15 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); char *lines = reply->str, *p, *line; @@ -2340,7 +2348,10 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (i == 8) break; // Slots } - if (!flags) goto node_cmd_err; + if (!flags) { + success = 0; + goto cleanup; + } int myself = (strstr(flags, "myself") != NULL); clusterManagerNode *currentNode = NULL; if (myself) { @@ -2406,14 +2417,16 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (addr == NULL) { // TODO: find a better err message fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } char *c = strrchr(addr, '@'); if (c != NULL) *c = '\0'; c = strrchr(addr, ':'); if (c == NULL) { fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } *c = '\0'; int port = atoi(++c); @@ -2445,11 +2458,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); if (!getfriends && myself) break; } - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply) freeReplyObject(reply); + return success; } /* Retrieves info about the cluster using argument 'node' as the starting @@ -2780,7 +2791,7 @@ cluster_manager_err: static int clusterManagerCommandCreate(int argc, char **argv) { printf("Cluster Manager: Creating Cluster\n"); - int i, j; + int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { char *addr = argv[i]; @@ -2974,7 +2985,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); @@ -3010,7 +3022,10 @@ assign_replicas: is_err = 1; fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); } - if (is_err) goto cmd_err; + if (is_err) { + success = 0; + goto cleanup; + } } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about @@ -3029,7 +3044,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } } // Reset Nodes @@ -3041,9 +3057,13 @@ assign_replicas: else freeClusterManagerNode(node); } listEmpty(cluster_manager.nodes); - if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + if (!clusterManagerLoadInfoFromNode(first_node, 0)) { + success = 0; + goto cleanup; //TODO: msg? + } clusterManagerCheckCluster(0); } +cleanup: /* Free everything */ zfree(masters); zfree(ips); @@ -3052,16 +3072,7 @@ assign_replicas: CLUSTER_MANAGER_NODEARRAY_FREE(node_array); } zfree(ip_nodes); - return 1; -cmd_err: - zfree(masters); - zfree(ips); - for (i = 0; i < node_len; i++) { - clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); - } - zfree(ip_nodes); - return 0; + return success; } static int clusterManagerCommandInfo(int argc, char **argv) { From 2e64e25ee1815d3a945d813690471ad3925c3a1a Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 19:29:28 +0100 Subject: [PATCH 12/66] Cluster Manager: colorized output --- src/redis-cli.c | 130 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 280e6c9e3..6ea44f83f 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -67,6 +67,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" + #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) @@ -80,7 +81,7 @@ if (cluster_manager.errors == NULL) \ cluster_manager.errors = listCreate(); \ listAddNodeTail(cluster_manager.errors, err); \ - fprintf(stderr, "%s\n", (char *) err); \ + clusterManagerLogErr("%s\n", (char *) err); \ } while(0) #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ @@ -124,7 +125,20 @@ } while(0) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ - fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ + n->ip, n->port, err); + +#define clusterManagerLogInfo(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_INFO,__VA_ARGS__) + +#define clusterManagerLogErr(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_ERR,__VA_ARGS__) + +#define clusterManagerLogWarn(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_WARN,__VA_ARGS__) + +#define clusterManagerLogOk(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_SUCCESS,__VA_ARGS__) #define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 #define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 @@ -133,7 +147,22 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 + +#define CLUSTER_MANAGER_LOG_LVL_INFO 1 +#define CLUSTER_MANAGER_LOG_LVL_WARN 2 +#define CLUSTER_MANAGER_LOG_LVL_ERR 3 +#define CLUSTER_MANAGER_LOG_LVL_SUCCESS 4 + +#define LOG_COLOR_BOLD "29;1m" +#define LOG_COLOR_RED "31;1m" +#define LOG_COLOR_GREEN "32;1m" +#define LOG_COLOR_YELLOW "33;1m" +#define LOG_COLOR_RESET "0m" /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -270,6 +299,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { @@ -1267,6 +1297,7 @@ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; } static int parseOptions(int argc, char **argv) { @@ -2042,7 +2073,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerNode **offenders = NULL, **aux; int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); if (score == 0) goto cleanup; - printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + clusterManagerLogInfo(">>> Trying to optimize slaves allocation " + "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; int maxiter = 500 * node_len; srand(time(NULL)); @@ -2091,12 +2123,15 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(aux), aux = NULL; score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); char *msg; - if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + int perfect = (score == 0); + int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : + CLUSTER_MANAGER_LOG_LVL_WARN); + if (perfect) msg = "[OK] Perfect anti-affinity obtained!"; else if (score >= 10000) msg = ("[WARNING] Some slaves are in the same host as their master"); else msg=("[WARNING] Some slaves of the same master are in the same host"); - printf("%s\n", msg); + clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); zfree(aux); @@ -2211,7 +2246,7 @@ static void clusterManagerShowInfo(void) { keys += dbsize; } } - printf("[OK] %d keys in %d masters.\n", keys, masters); + clusterManagerLogOk("[OK] %d keys in %d masters.\n", keys, masters); float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; printf("%.2f keys per slot on average.\n", keys_per_slot); } @@ -2482,7 +2517,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { char *msg = (e ? e : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -2522,8 +2557,9 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); } else { - fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + clusterManagerLogErr("[ERR] Unable to load info for " + "node %s:%d\n", + friend->ip, friend->port); goto invalid_friend; } continue; @@ -2692,15 +2728,18 @@ static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; clusterManagerNode *node = ln->value; - printf(">>> Performing Cluster Check (using node %s:%d)\n", - node->ip, node->port); + clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); CLUSTER_MANAGER_ERROR(err); - } else printf("[OK] All nodes agree about slots configuration.\n"); + } else { + clusterManagerLogOk("[OK] All nodes agree about slots " + "configuration.\n"); + } // Check open slots - printf(">>> Check for open slots...\n"); + clusterManagerLogInfo(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2754,17 +2793,18 @@ static void clusterManagerCheckCluster(int quiet) { char *fmt = (i++ > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } - fprintf(stderr, "%s.\n", (char *) errstr); + clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); dictRelease(open_slots); } - printf(">>> Check slots coverage...\n"); + clusterManagerLogInfo(">>> Check slots coverage...\n"); char slots[CLUSTER_MANAGER_SLOTS]; memset(slots, 0, CLUSTER_MANAGER_SLOTS); int coverage = clusterManagerGetCoveredSlots(slots); - if (coverage == CLUSTER_MANAGER_SLOTS) - printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); - else { + if (coverage == CLUSTER_MANAGER_SLOTS) { + clusterManagerLogOk("[OK] All %d slots covered.\n", + CLUSTER_MANAGER_SLOTS); + } else { sds err = sdsempty(); err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", @@ -2773,6 +2813,26 @@ static void clusterManagerCheckCluster(int quiet) { } } +static void clusterManagerLog(int level, const char* fmt, ...) { + int use_colors = + (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); + if (use_colors) { + printf("\033["); + switch (level) { + case CLUSTER_MANAGER_LOG_LVL_INFO: printf(LOG_COLOR_BOLD); break; + case CLUSTER_MANAGER_LOG_LVL_WARN: printf(LOG_COLOR_YELLOW); break; + case CLUSTER_MANAGER_LOG_LVL_ERR: printf(LOG_COLOR_RED); break; + case CLUSTER_MANAGER_LOG_LVL_SUCCESS: printf(LOG_COLOR_GREEN); break; + default: printf(LOG_COLOR_RESET); break; + } + } + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + if (use_colors) printf("\033[" LOG_COLOR_RESET); +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2790,7 +2850,6 @@ cluster_manager_err: /* Cluster Manager Commands */ static int clusterManagerCommandCreate(int argc, char **argv) { - printf("Cluster Manager: Creating Cluster\n"); int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { @@ -2816,7 +2875,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { char *msg = (err ? err : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2835,11 +2894,11 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *msg; if (err) msg = err; else { - msg = " is not empty. Either the node already knows other " + msg = "is not empty. Either the node already knows other " "nodes (check with CLUSTER NODES) or contains some " "key in database 0."; } - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2850,18 +2909,17 @@ static int clusterManagerCommandCreate(int argc, char **argv) { int replicas = config.cluster_manager_command.replicas; int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); if (masters_count < 3) { - fprintf(stderr, - "*** ERROR: Invalid configuration for cluster creation.\n"); - fprintf(stderr, - "*** Redis Cluster requires at least 3 master nodes.\n"); - fprintf(stderr, + clusterManagerLogErr( + "*** ERROR: Invalid configuration for cluster creation.\n" + "*** Redis Cluster requires at least 3 master nodes.\n" "*** This is not possible with %d nodes and %d replicas per node.", node_len, replicas); - fprintf(stderr, "\n*** At least %d nodes are required.\n", - (3 * (replicas + 1))); + clusterManagerLogErr("\n*** At least %d nodes are required.\n", + 3 * (replicas + 1)); return 0; } - printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + clusterManagerLogInfo(">>> Performing hash slots allocation " + "on %d nodes...\n", node_len); int interleaved_len = 0, ips_len = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); @@ -2989,8 +3047,9 @@ assign_replicas: goto cleanup; } else if (err != NULL) zfree(err); } - printf(">>> Nodes configuration updated\n"); - printf(">>> Assign a different config epoch to each node\n"); + clusterManagerLogInfo(">>> Nodes configuration updated\n"); + clusterManagerLogInfo(">>> Assign a different config epoch to " + "each node\n"); int config_epoch = 1; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3001,7 +3060,8 @@ assign_replicas: config_epoch++); if (reply != NULL) freeReplyObject(reply); } - printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerLogInfo(">>> Sending CLUSTER MEET messages to join " + "the cluster\n"); clusterManagerNode *first = NULL; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3156,7 +3216,7 @@ static int clusterManagerCommandCall(int argc, char **argv) { argc--; argv++; size_t *argvlen = zmalloc(argc*sizeof(size_t)); - printf(">>> Calling"); + clusterManagerLogInfo(">>> Calling"); for (i = 0; i < argc; i++) { argvlen[i] = strlen(argv[i]); printf(" %s", argv[i]); From 4a89b1b7c5597ecc5b8057524aba1696fce7c71b Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 20 Feb 2018 12:01:13 +0100 Subject: [PATCH 13/66] - Fixed bug in clusterManagerGetAntiAffinityScore - Code improvements --- src/redis-cli.c | 57 ++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 6ea44f83f..b222f5a88 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -505,7 +505,6 @@ static int dictSdsKeyCompare(void *privdata, const void *key1, static void dictSdsDestructor(void *privdata, void *val) { DICT_NOTUSED(privdata); - sdsfree(val); } @@ -2008,11 +2007,13 @@ result: static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_len, clusterManagerNode ***offending, int *offending_len) { - assert(offending != NULL); int score = 0, i, j; int node_len = cluster_manager.nodes->len; - *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); - clusterManagerNode **offending_p = *offending; + clusterManagerNode **offending_p = NULL; + if (offending != NULL) { + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + offending_p = *offending; + } for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); @@ -2021,23 +2022,21 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, clusterManagerNode *node = node_array->nodes[j]; if (node == NULL) continue; if (!ip) ip = node->ip; - sds types; - if (!node->replicate) { - assert(node->name != NULL); - dictEntry *entry = dictFind(related, node->name); - if (entry) types = (sds) dictGetVal(entry); - else types = sdsempty(); - types = sdscatprintf(types, "m%s", types); - dictReplace(related, node->name, types); - } else { - dictEntry *entry = dictFind(related, node->replicate); - if (entry) types = (sds) dictGetVal(entry); - else { - types = sdsempty(); - dictAdd(related, node->replicate, types); - } - sdscat(types, "s"); + sds types, otypes; + // We always use the Master ID as key + sds key = (!node->replicate ? node->name : node->replicate); + assert(key != NULL); + dictEntry *entry = dictFind(related, key); + if (entry) otypes = (sds) dictGetVal(entry); + else { + otypes = sdsempty(); + dictAdd(related, key, otypes); } + // Master type 'm' is always set as the first character of the + // types string. + if (!node->replicate) types = sdscatprintf(otypes, "m%s", otypes); + else types = sdscat(otypes, "s"); + if (types != otypes) dictReplace(related, key, types); } dictIterator *iter = dictGetIterator(related); dictEntry *entry; @@ -2048,6 +2047,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (typeslen < 2) continue; if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); + if (offending == NULL) continue; listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2056,11 +2056,12 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (n->replicate == NULL) continue; if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { *(offending_p++) = n; + if (offending_len != NULL) (*offending_len)++; break; } } } - if (offending_len != NULL) *offending_len = offending_p - *offending; + //if (offending_len != NULL) *offending_len = offending_p - *offending; dictReleaseIterator(iter); dictRelease(related); } @@ -2070,8 +2071,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_len) { - clusterManagerNode **offenders = NULL, **aux; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + clusterManagerNode **offenders = NULL; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); @@ -2088,7 +2089,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, &offending_len); if (score == 0) break; int rand_idx = rand() % offending_len; - clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode *first = offenders[rand_idx], + *second = NULL; clusterManagerNode **other_replicas = zcalloc((node_len - 1) * sizeof(*other_replicas)); int other_replicas_count = 0; @@ -2110,9 +2112,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - zfree(aux), aux = NULL; int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, - &aux, NULL); + NULL, NULL); if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2120,8 +2121,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - zfree(aux), aux = NULL; - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2134,7 +2134,6 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); - zfree(aux); } static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { From 749e0935916dd59a9236eb18995a8f3218c6d839 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 22 Feb 2018 18:32:39 +0100 Subject: [PATCH 14/66] Cluster Manager: - Almost all Cluster Manager related code moved to the same section. - Many macroes converted to functions - Added various comments - Little code restyling --- src/redis-cli.c | 460 ++++++++++++++++++++++++++++-------------------- 1 file changed, 271 insertions(+), 189 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b222f5a88..b72c31cff 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -75,54 +75,8 @@ (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) -#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) -#define CLUSTER_MANAGER_ERROR(err) do { \ - if (cluster_manager.errors == NULL) \ - cluster_manager.errors = listCreate(); \ - listAddNodeTail(cluster_manager.errors, err); \ - clusterManagerLogErr("%s\n", (char *) err); \ -} while(0) - -#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ - memset(n->slots, 0, sizeof(n->slots)); \ - n->slots_count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ - array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ - array->alloc = array->nodes; \ - array->len = alloc_len; \ - array->count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ - if (array->nodes > array->alloc) { \ - array->len = array->nodes - array->alloc; \ - array->nodes = array->alloc; \ - array->count = 0; \ - int i = 0; \ - for(; i < array->len; i++) { \ - if (array->nodes[i] != NULL) array->count++;\ - } \ - } \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) - -#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ - assert(array->nodes < (array->nodes + array->len)); \ - if (*array->nodes != NULL) array->count--; \ - nodeptr = *array->nodes; \ - array->nodes++; \ - array->len--; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ - assert(array->nodes < (array->nodes + array->len)); \ - assert(nodeptr != NULL); \ - array->nodes[array->count++] = nodeptr; \ -} while(0) +#define CLUSTER_MANAGER_NODE_ARRAY_FREE(array) zfree(array->alloc) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ @@ -190,6 +144,7 @@ typedef struct clusterManagerCommand { int flags; int replicas; } clusterManagerCommand; +static void createClusterManagerCommand(char *cmdname, int argc, char **argv); static redisContext *context; @@ -237,88 +192,6 @@ static struct config { clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager */ - -static struct clusterManager { - list *nodes; - list *errors; -} cluster_manager; - -typedef struct clusterManagerNode { - redisContext *context; - sds name; - char *ip; - int port; - uint64_t current_epoch; - time_t ping_sent; - time_t ping_recv; - int flags; - sds replicate; - list replicas; - int dirty; - uint8_t slots[CLUSTER_MANAGER_SLOTS]; - int slots_count; - int replicas_count; - list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; -} clusterManagerNode; - -typedef struct clusterManagerNodeArray { - clusterManagerNode **nodes; - clusterManagerNode **alloc; - int len; - int count; -} clusterManagerNodeArray; - -static dictType clusterManagerDictType = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ -}; - -static clusterManagerNode *clusterManagerNewNode(char *ip, int port); -static clusterManagerNode *clusterManagerNodeByName(const char *name); -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); -static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, - char **err); -static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); -static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); -static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len); -static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len); -static sds clusterManagerNodeInfo(clusterManagerNode *node); -static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); -static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); -static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); -static void clusterManagerLog(int level, const char* fmt, ...); - -typedef int clusterManagerCommandProc(int argc, char **argv); -typedef struct clusterManagerCommandDef { - char *name; - clusterManagerCommandProc *proc; - int arity; - char *args; - char *options; -} clusterManagerCommandDef; -static int clusterManagerIsConfigConsistent(void); - -/* Cluster Manager commands. */ - -static int clusterManagerCommandCreate(int argc, char **argv); -static int clusterManagerCommandInfo(int argc, char **argv); -static int clusterManagerCommandCheck(int argc, char **argv); -static int clusterManagerCommandCall(int argc, char **argv); -static int clusterManagerCommandHelp(int argc, char **argv); - /* User preferences. */ static struct pref { int hints; @@ -1291,14 +1164,6 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ -static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - clusterManagerCommand *cmd = &config.cluster_manager_command; - cmd->name = cmdname; - cmd->argc = argc; - cmd->argv = argc ? argv : NULL; - if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; -} - static int parseOptions(int argc, char **argv) { int i; @@ -1828,6 +1693,100 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +/* The Cluster Manager global structure */ +static struct clusterManager { + list *nodes; /* List of nodes int he configuration. */ + list *errors; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; /* Master ID if node is a slave */ + list replicas; + int dirty; /* Node has changes that can be flushed */ + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + int replicas_count; + list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; +} clusterManagerNode; + +/* Data structure used to represent a sequence of nodes. */ +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; /* Actual nodes array */ + clusterManagerNode **alloc; /* Pointer to the allocated memory */ + int len; /* Actual length of the array */ + int count; /* Non-NULL nodes count */ +} clusterManagerNodeArray; + +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + +typedef int clusterManagerCommandProc(int argc, char **argv); + +/* Cluster Manager helper functions */ + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); +static void clusterManagerNodeResetSlots(clusterManagerNode *node); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_count, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_count); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); +static int clusterManagerIsConfigConsistent(void); +static void clusterManagerOnError(sds err); +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int len); +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array); +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr); +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node); + +/* Cluster Manager commands. */ + +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); + +typedef struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; + char *args; + char *options; +} clusterManagerCommandDef; + clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "cluster-replicas"}, @@ -1838,6 +1797,16 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; + +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; +} + + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / sizeof(clusterManagerCommandDef); @@ -1930,7 +1899,7 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); return node; } @@ -1954,41 +1923,49 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); - int is_err = 0; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { +static void clusterManagerNodeResetSlots(clusterManagerNode *node) { + memset(node->slots, 0, sizeof(node->slots)); + node->slots_count = 0; +} + +static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, + char **err) +{ + redisReply *info = CLUSTER_MANAGER_COMMAND(node, "INFO"); + if (err != NULL) *err = NULL; + if (info == NULL) return NULL; + if (info->type == REDIS_REPLY_ERROR) { + if (err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); strcpy(*err, info->str); } freeReplyObject(info); - return 0; + return NULL; } + return info; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); + if (info == NULL) return 0; int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); freeReplyObject(info); return is_cluster; } +/* Checks whether the node is empty. Node is considered not-empty if it has + * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); int is_err = 0, is_empty = 1; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } - is_empty = 0; - goto result; - } + if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; goto result; } freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (err != NULL) *err = NULL; if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); @@ -2004,8 +1981,37 @@ result: return is_empty; } +/* Return the anti-affinity score, which is a measure of the amount of + * violations of anti-affinity in the current cluster layout, that is, how + * badly the masters and slaves are distributed in the different IP + * addresses so that slaves of the same master are not in the master + * host and are also in different hosts. + * + * The score is calculated as follows: + * + * SAME_AS_MASTER = 10000 * each slave in the same IP of its master. + * SAME_AS_SLAVE = 1 * each slave having the same IP as another slave + of the same master. + * FINAL_SCORE = SAME_AS_MASTER + SAME_AS_SLAVE + * + * So a greater score means a worse anti-affinity level, while zero + * means perfect anti-affinity. + * + * The anti affinity optimizator will try to get a score as low as + * possible. Since we do not want to sacrifice the fact that slaves should + * not be in the same host as the master, we assign 10000 times the score + * to this violation, so that we'll optimize for the second factor only + * if it does not impact the first one. + * + * The ipnodes argument is an array of clusterManagerNodeArray, one for + * each IP, while ip_count is the total number of IPs in the configuration. + * + * The function returns the above score, and the list of + * offending slaves can be stored into the 'offending' argument, + * so that the optimizer can try changing the configuration of the + * slaves violating the anti-affinity goals. */ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len) + int ip_count, clusterManagerNode ***offending, int *offending_len) { int score = 0, i, j; int node_len = cluster_manager.nodes->len; @@ -2014,7 +2020,10 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); offending_p = *offending; } - for (i = 0; i < ip_len; i++) { + /* For each set of nodes in the same host, split by + * related nodes (masters and slaves which are involved in + * replication of each other) */ + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; @@ -2038,6 +2047,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, else types = sdscat(otypes, "s"); if (types != otypes) dictReplace(related, key, types); } + /* Now it's trivial to check, for each related group having the + * same host, what is their local score. */ dictIterator *iter = dictGetIterator(related); dictEntry *entry; while ((entry = dictNext(iter)) != NULL) { @@ -2048,6 +2059,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); if (offending == NULL) continue; + /* Populate the list of offending nodes. */ listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2069,15 +2081,16 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, } static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len) + int ip_count) { clusterManagerNode **offenders = NULL; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, + NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; - int maxiter = 500 * node_len; + int maxiter = 500 * node_len; // Effort is proportional to cluster size... srand(time(NULL)); while (maxiter > 0) { int offending_len = 0; @@ -2085,9 +2098,14 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(offenders); offenders = NULL; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, + &offenders, &offending_len); - if (score == 0) break; + if (score == 0) break; // Optimal anti affinity reached + /* We'll try to randomly swap a slave's assigned master causing + * an affinity problem with another random slave, to see if we + * can improve the affinity. */ int rand_idx = rand() % offending_len; clusterManagerNode *first = offenders[rand_idx], *second = NULL; @@ -2112,8 +2130,12 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, NULL, NULL); + /* If the change actually makes thing worse, revert. Otherwise + * leave as it is becuase the best solution may need a few + * combined swaps. */ if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2121,7 +2143,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2136,6 +2158,7 @@ cleanup: zfree(offenders); } +/* Return a representable string of the node's slots */ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { sds slots = sdsempty(); int first_range_idx = -1, last_slot_idx = -1, i; @@ -2303,11 +2326,13 @@ cleanup: return success; } +/* Flush the dirty node configuration by calling replicate for slaves or + * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; int is_err = 0, success = 1; - *err = NULL; + if (err != NULL) *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", node->replicate); @@ -2317,14 +2342,15 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { strcpy(*err, reply->str); } success = 0; + /* If the cluster did not already joined it is possible that + * the slave does not know the master node yet. So on errors + * we return ASAP leaving the dirty flag set, to flush the + * config later. */ goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) { - success = 0; - goto cleanup; - } + if (!added || *err != NULL) success = 0; } node->dirty = 0; cleanup: @@ -2342,6 +2368,11 @@ static void clusterManagerWaitForClusterJoin(void) { printf("\n"); } +/* Load node's cluster configuration by calling "CLUSTER NODES" command. + * Node's configuration (name, replicate, slots, ...) is then updated. + * If CLUSTER_MANAGER_OPT_GETFRIENDS flag is set into 'opts' argument, + * and node already knows other nodes, the node's friends list is populated + * with the other nodes info. */ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { @@ -2391,7 +2422,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; currentNode = node; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); //TODO: just while(remaining) && assign p inside the block @@ -2501,7 +2532,6 @@ cleanup: * point. All nodes will be loaded inside the cluster_manager.nodes list. * Warning: if something goes wrong, it will free the starting node before * returning 0. */ - static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -2681,7 +2711,6 @@ static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; int consistent = (listLength(cluster_manager.nodes) <= 1); // If the Cluster has only one node, it's always consistent - // Does it make sense? if (consistent) return 1; sds first_cfg = NULL; listIter li; @@ -2705,6 +2734,13 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static void clusterManagerOnError(sds err) { + if (cluster_manager.errors == NULL) + cluster_manager.errors = listCreate(); + listAddNodeTail(cluster_manager.errors, err); + clusterManagerLogErr("%s\n", (char *) err); +} + static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -2732,7 +2768,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -2761,7 +2797,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } if (n->importing != NULL) { if (open_slots == NULL) @@ -2779,7 +2815,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } } if (open_slots != NULL) { @@ -2808,7 +2844,7 @@ static void clusterManagerCheckCluster(int quiet) { err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } } @@ -2832,6 +2868,53 @@ static void clusterManagerLog(int level, const char* fmt, ...) { if (use_colors) printf("\033[" LOG_COLOR_RESET); } +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int alloc_len) +{ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*)); + array->alloc = array->nodes; + array->len = alloc_len; + array->count = 0; +} + +/* Reset array->nodes to the original array allocation and re-count non-NULL + * nodes. */ +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array) { + if (array->nodes > array->alloc) { + array->len = array->nodes - array->alloc; + array->nodes = array->alloc; + array->count = 0; + int i = 0; + for(; i < array->len; i++) { + if (array->nodes[i] != NULL) array->count++; + } + } +} + +/* Shift array->nodes and store the shifted node into 'nodeptr'. */ +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr) +{ + assert(array->nodes < (array->nodes + array->len)); + /* If the first node to be shifted is not NULL, decrement count. */ + if (*array->nodes != NULL) array->count--; + /* Store the first node to be shifted into 'nodeptr'. */ + *nodeptr = *array->nodes; + /* Shift the nodes array and decrement length. */ + array->nodes++; + array->len--; +} + +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node) +{ + assert(array->nodes < (array->nodes + array->len)); + assert(node != NULL); + assert(array->count < array->len); + array->nodes[array->count++] = node; +} + +/* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2919,7 +3002,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } clusterManagerLogInfo(">>> Performing hash slots allocation " "on %d nodes...\n", node_len); - int interleaved_len = 0, ips_len = 0; + int interleaved_len = 0, ip_count = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); @@ -2929,7 +3012,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; int found = 0; - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { char *ip = ips[i]; if (!strcmp(ip, n->ip)) { found = 1; @@ -2937,19 +3020,19 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } } if (!found) { - ips[ips_len++] = n->ip; + ips[ip_count++] = n->ip; } clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->nodes == NULL) - CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); - CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + clusterManagerNodeArrayInit(node_array, node_len); + clusterManagerNodeArrayAdd(node_array, n); } while (interleaved_len < node_len) { - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->count > 0) { - clusterManagerNode *n; - CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + clusterManagerNode *n = NULL; + clusterManagerNodeArrayShift(node_array, &n); interleaved[interleaved_len++] = n; } } @@ -3019,11 +3102,11 @@ assign_replicas: printf("Adding extra replicas...\n"); goto assign_replicas; } - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + clusterManagerNodeArrayReset(node_array); } - clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); fflush(stdout); @@ -3031,7 +3114,6 @@ assign_replicas: int nread = read(fileno(stdin),buf,4); buf[3] = '\0'; if (nread != 0 && !strcmp("yes", buf)) { - printf("\nFlushing configuration!\n"); listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3128,7 +3210,7 @@ cleanup: zfree(ips); for (i = 0; i < node_len; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + CLUSTER_MANAGER_NODE_ARRAY_FREE(node_array); } zfree(ip_nodes); return success; From 66548863a47d6191eaf9fe97d59529ee30f82cf1 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 10:44:11 +0100 Subject: [PATCH 15/66] Cluster Manager: reshard command, fixed slots parsing bug and other minor bugs. --- src/redis-cli.c | 655 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 593 insertions(+), 62 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b72c31cff..68ae7cfa6 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -69,6 +69,13 @@ #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" #define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 +#define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 + +#define CLUSTER_MANAGER_INVALID_HOST_ARG \ + "Invalid arguments: you need to pass either a valid " \ + "address (ie. 120.0.0.1:7000) or space separated IP " \ + "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) #define CLUSTER_MANAGER_NODE_CONNECT(n) \ @@ -103,9 +110,14 @@ #define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 #define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 #define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_OPT_COLD 1 << 1 +#define CLUSTER_MANAGER_OPT_UPDATE 1 << 2 +#define CLUSTER_MANAGER_OPT_QUIET 1 << 6 +#define CLUSTER_MANAGER_OPT_VERBOSE 1 << 7 #define CLUSTER_MANAGER_LOG_LVL_INFO 1 #define CLUSTER_MANAGER_LOG_LVL_WARN 2 @@ -143,6 +155,11 @@ typedef struct clusterManagerCommand { char **argv; int flags; int replicas; + char *from; + char *to; + int slots; + int timeout; + int pipeline; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1261,6 +1278,19 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { + config.cluster_manager_command.from = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { + config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { + config.cluster_manager_command.slots = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { + config.cluster_manager_command.timeout = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { + config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-yes")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_YES; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1358,7 +1388,7 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" -" --cluster [args...]\n" +" --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" @@ -1729,6 +1759,12 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; +/* Used for reshard table. */ +typedef struct clusterManagerReshardTableItem { + clusterManagerNode *source; + int slot; +} clusterManagerReshardTableItem; + static dictType clusterManagerDictType = { dictSdsHash, /* hash function */ NULL, /* key dup */ @@ -1754,7 +1790,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_count, clusterManagerNode ***offending, int *offending_len); static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); -static sds clusterManagerNodeInfo(clusterManagerNode *node); +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); @@ -1776,6 +1812,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1789,9 +1826,11 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", - "cluster-replicas"}, - {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"reshard", clusterManagerCommandReshard, -1, "host:port", + "from ,to ,slots ,yes,timeout ,pipeline "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1829,6 +1868,38 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +/* Get host ip and port from command arguments. If only one argument has + * been provided it must be in the form of 'ip:port', elsewhere + * the first argument must be the ip and the second one the port. + * If host and port can be detected, it returns 1 and it stores host and + * port into variables referenced by'ip_ptr' and 'port_ptr' pointers, + * elsewhere it returns 0. */ +static int getClusterHostFromCmdArgs(int argc, char **argv, + char **ip_ptr, int *port_ptr) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else return 0; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) return 0; + else { + *ip_ptr = ip; + *port_ptr = port; + } + return 1; +} + static void freeClusterManagerNode(clusterManagerNode *node) { if (node->context != NULL) redisFree(node->context); if (node->friends != NULL) { @@ -2188,8 +2259,12 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } -static sds clusterManagerNodeInfo(clusterManagerNode *node) { +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); + sds spaces = sdsempty(); + int i; + for (i = 0; i < indent; i++) spaces = sdscat(spaces, " "); + if (indent) info = sdscat(info, spaces); int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); char *role = (is_master ? "M" : "S"); sds slots = NULL; @@ -2198,17 +2273,18 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { else { slots = clusterManagerNodeSlotsString(node); info = sdscatfmt(info, "%s: %S %s:%u\n" - " slots:%S (%u slots) " + "%s slots:%S (%u slots) " "", //TODO: flags string - role, node->name, node->ip, node->port, + role, node->name, node->ip, node->port, spaces, slots, node->slots_count); sdsfree(slots); } if (node->replicate != NULL) - info = sdscatfmt(info, "\n replicates %S", node->replicate); + info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); else if (node->replicas_count) - info = sdscatfmt(info, "\n %U additional replica(s)", - node->replicas_count); + info = sdscatfmt(info, "\n%s %U additional replica(s)", + spaces, node->replicas_count); + sdsfree(spaces); return info; } @@ -2218,7 +2294,7 @@ static void clusterManagerShowNodes(void) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; - sds info = clusterManagerNodeInfo(node); + sds info = clusterManagerNodeInfo(node, 0); printf("%s\n", info); sdsfree(info); } @@ -2306,7 +2382,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { - success = 1; + success = 0; goto cleanup; } reply = (redisReply*) _reply; @@ -2326,6 +2402,193 @@ cleanup: return success; } +/* Set slot status to "importing" or "migrating" */ +static int clusterManagerSetSlot(clusterManagerNode *node1, + clusterManagerNode *node2, + int slot, const char *mode, char **err) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " + "SETSLOT %d %s %s", + slot, mode, + (char *) node2->name); + if (err != NULL) *err = NULL; + if (!reply) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + return 0; + } + return 1; +} + +static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int timeout, + int pipeline, int verbose, + char **err) +{ + int success = 1; + while (1) { + redisReply *reply = NULL, *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " + "GETKEYSINSLOT %d %d", slot, + pipeline); + success = (reply != NULL); + if (!success) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + assert(reply->type == REDIS_REPLY_ARRAY); + size_t count = reply->elements; + if (count == 0) { + freeReplyObject(reply); + break; + } + char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + /* Calling MIGRATE command. */ + size_t argc = count + 8; + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + argv[6] = "REPLACE"; + argv_len[6] = 7; + argv[7] = "KEYS"; + argv_len[7] = 4; + for (size_t i = 0; i < count; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + 8; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (verbose) dots[i] = '.'; + } + if (verbose) dots[count] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); + if (!success) goto next; + migrate_reply = (redisReply *) _reply; + if (migrate_reply->type == REDIS_REPLY_ERROR) { + // TODO: Implement fix. + success = 0; + if (err != NULL) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + if (verbose) { + printf("%s", dots); + fflush(stdout); + } +next: + if (reply != NULL) freeReplyObject(reply); + if (migrate_reply != NULL) freeReplyObject(migrate_reply); + zfree(argv); + zfree(argv_len); + if (!success) break; + } + return success; +} + +/* Move slots between source and target nodes using MIGRATE. + * + * Options: + * CLUSTER_MANAGER_OPT_VERBOSE -- Print a dot for every moved key. + * CLUSTER_MANAGER_OPT_COLD -- Move keys without opening slots / + * reconfiguring the nodes. + * CLUSTER_MANAGER_OPT_UPDATE -- Update node->slots for source/target nodes. + * CLUSTER_MANAGER_OPT_QUIET -- Don't print info messages. +*/ +static int clusterManagerMoveSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int opts, char**err) +{ + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) { + printf("Moving slot %d from %s:%d to %s:%d: ", slot, source->ip, + source->port, target->ip, target->port); + fflush(stdout); + } + if (err != NULL) *err = NULL; + int pipeline = config.cluster_manager_command.pipeline, + timeout = config.cluster_manager_command.timeout, + print_dots = (opts & CLUSTER_MANAGER_OPT_VERBOSE), + option_cold = (opts & CLUSTER_MANAGER_OPT_COLD), + success = 1; + if (!option_cold) { + success = clusterManagerSetSlot(target, source, slot, + "importing", err); + if (!success) return 0; + success = clusterManagerSetSlot(source, target, slot, + "migrating", err); + if (!success) return 0; + } + success = clusterManagerMigrateKeysInSlot(source, target, slot, timeout, + pipeline, print_dots, err); + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) printf("\n"); + if (!success) return 0; + /* Set the new node as the owner of the slot in all the known nodes. */ + if (!option_cold) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER " + "SETSLOT %d %s %s", + slot, "node", + target->name); + success = (r != NULL); + if (!success) return 0; + if (r->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + } + } + freeReplyObject(r); + if (!success) return 0; + } + } + /* Update the node logical config */ + if (opts & CLUSTER_MANAGER_OPT_UPDATE) { + source->slots[slot] = 0; + target->slots[slot] = 1; + } + return 1; +} + /* Flush the dirty node configuration by calling replicate for slaves or * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { @@ -2425,20 +2688,24 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; remaining -= (p - line); char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds dst = sdsnew(p); node->migrating_count += 2; @@ -2451,6 +2718,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } else if ((p = strstr(slotsdef, "-<-"))) {//Importing *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds src = sdsnew(p); node->importing_count += 2; @@ -2605,8 +2874,9 @@ invalid_friend: if (n->replicate != NULL) { clusterManagerNode *master = clusterManagerNodeByName(n->replicate); if (master == NULL) { - printf("*** WARNING: %s:%d claims to be slave of unknown " - "node ID %s.\n", n->ip, n->port, n->replicate); + clusterManagerLogWarn("*** WARNING: %s:%d claims to be " + "slave of unknown node ID %s.\n", + n->ip, n->port, n->replicate); } else master->replicas_count++; } } @@ -2619,6 +2889,12 @@ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { return strcmp(*i1, *i2); } +int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node2->slots_count - node1->slots_count; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -2651,16 +2927,18 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { if (remaining == 0) continue; char **slots = NULL; int c = 0; - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; int size = (p - line); remaining -= size; tot_size += size; char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] != '[') { c++; slots = zrealloc(slots, (c * sizeof(char *))); @@ -2792,7 +3070,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->migrating_count; i += 2) { sds slot = n->migrating[i]; - dictAdd(open_slots, slot, n->migrating[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->migrating[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2810,7 +3088,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->importing_count; i += 2) { sds slot = n->importing[i]; - dictAdd(open_slots, slot, n->importing[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->importing[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2848,6 +3126,76 @@ static void clusterManagerCheckCluster(int quiet) { } } +static clusterManagerNode *clusterNodeForResharding(char *id, + clusterManagerNode *target, + int *raise_err) +{ + clusterManagerNode *node = NULL; + const char *invalid_node_msg = "*** The specified node is not known or " + "not a master, please retry.\n"; + node = clusterManagerNodeByName(id); + *raise_err = 0; + if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { + clusterManagerLogErr(invalid_node_msg); + *raise_err = 1; + return NULL; + } else if (node != NULL && target != NULL) { + if (!strcmp(node->name, target->name)) { + clusterManagerLogErr( "*** It is not possible to use " + "the target node as " + "source node.\n"); + return NULL; + } + } + return node; +} + +static list *clusterManagerComputeReshardTable(list *sources, int numslots) { + list *moved = listCreate(); + int src_count = listLength(sources), i = 0, tot_slots = 0, j; + clusterManagerNode **sorted = zmalloc(src_count * sizeof(**sorted)); + listIter li; + listNode *ln; + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + tot_slots += node->slots_count; + sorted[i++] = node; + } + qsort(sorted, src_count, sizeof(clusterManagerNode *), + clusterManagerSlotCountCompareDesc); + for (i = 0; i < src_count; i++) { + clusterManagerNode *node = sorted[i]; + float n = ((float) numslots / tot_slots * node->slots_count); + if (i == 0) n = ceil(n); + else n = floor(n); + int max = (int) n, count = 0; + for (j = 0; j < CLUSTER_MANAGER_SLOTS; j++) { + int slot = node->slots[j]; + if (!slot) continue; + if (count >= max || (int)listLength(moved) >= numslots) break; + clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + item->source = node; + item->slot = j; + listAddNodeTail(moved, item); + count++; + } + } + zfree(sorted); + return moved; +} + +static void clusterManagerShowReshardTable(list *table) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + clusterManagerNode *n = item->source; + printf(" Moving slot %d from %s\n", item->slot, (char *) n->name); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3219,59 +3567,218 @@ cleanup: static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } static int clusterManagerCommandCheck(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); clusterManagerCheckCluster(0); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandReshard(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerCheckCluster(0); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + fflush(stdout); + fprintf(stderr, + "*** Please fix your cluster problems before resharding\n"); + return 0; + } + int slots = config.cluster_manager_command.slots; + if (!slots) { + while (slots <= 0 || slots > CLUSTER_MANAGER_SLOTS) { + printf("How many slots do you want to move (from 1 to %d)? ", + CLUSTER_MANAGER_SLOTS); + fflush(stdout); + char buf[6]; + int nread = read(fileno(stdin),buf,6); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + slots = atoi(buf); + } + } + char buf[255]; + char *to = config.cluster_manager_command.to, + *from = config.cluster_manager_command.from; + while (to == NULL) { + printf("What is the receiving node ID? "); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (strlen(buf) > 0) to = buf; + } + int raise_err = 0; + clusterManagerNode *target = clusterNodeForResharding(to, NULL, &raise_err); + if (target == NULL) return 0; + list *sources = listCreate(); + list *table = NULL; + int all = 0, result = 1; + if (from == NULL) { + printf("Please enter all the source node IDs.\n"); + printf(" Type 'all' to use all the nodes as source nodes for " + "the hash slots.\n"); + printf(" Type 'done' once you entered all the source nodes IDs.\n"); + while (1) { + printf("Source node #%lu: ", listLength(sources) + 1); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (!strcmp(buf, "done")) break; + else if (!strcmp(buf, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(buf, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + } else { + char *p; + while((p = strchr(from, ',')) != NULL) { + *p = '\0'; + if (!strcmp(from, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + from = p + 1; + } + /* Check if there's still another source to process. */ + if (!all && strlen(from) > 0) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + listIter li; + listNode *ln; + if (all) { + listEmpty(sources); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!sdscmp(n->name, target->name)) continue; + listAddNodeTail(sources, n); + } + } + if (listLength(sources) == 0) { + fprintf(stderr, "*** No source nodes given, operation aborted.\n"); + result = 0; + goto cleanup; + } + printf("\nReady to move %d slots.\n", slots); + printf(" Source nodes:\n"); + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *src = ln->value; + sds info = clusterManagerNodeInfo(src, 4); + printf("%s\n", info); + sdsfree(info); + } + printf(" Destination node:\n"); + sds info = clusterManagerNodeInfo(target, 4); + printf("%s\n", info); + sdsfree(info); + table = clusterManagerComputeReshardTable(sources, slots); + printf(" Resharding plan:\n"); + clusterManagerShowReshardTable(table); + if (!(config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_YES)) + { + printf("Do you want to proceed with the proposed " + "reshard plan (yes/no)? "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread <= 0 || strcmp("yes", buf) != 0) { + result = 0; + goto cleanup; + } + } + int opts = CLUSTER_MANAGER_OPT_VERBOSE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + char *err = NULL; + result = clusterManagerMoveSlot(item->source, target, item->slot, + opts, &err); + if (!result) { + if (err != NULL) { + clusterManagerLogErr("\n%s\n", err); + zfree(err); + } + goto cleanup; + } + } +cleanup: + listRelease(sources); + if (table) { + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } @@ -3332,13 +3839,32 @@ static int clusterManagerCommandHelp(int argc, char **argv) { sizeof(clusterManagerCommandDef); int i = 0, j; fprintf(stderr, "Cluster Manager Commands:\n"); + int padding = 15; for (; i < commands_count; i++) { clusterManagerCommandDef *def = &(clusterManagerCommands[i]); - int namelen = strlen(def->name), padlen = 15 - namelen; + int namelen = strlen(def->name), padlen = padding - namelen; fprintf(stderr, " %s", def->name); for (j = 0; j < padlen; j++) fprintf(stderr, " "); fprintf(stderr, "%s\n", (def->args ? def->args : "")); - //TODO: if (def->options) + if (def->options != NULL) { + int optslen = strlen(def->options); + char *p = def->options, *eos = p + optslen; + char *comma = NULL; + while ((comma = strchr(p, ',')) != NULL) { + int deflen = (int)(comma - p); + char buf[255]; + memcpy(buf, p, deflen); + buf[deflen] = '\0'; + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", buf); + p = comma + 1; + if (p >= eos) break; + } + if (p < eos) { + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", p); + } + } } return 0; } @@ -4640,6 +5166,11 @@ int main(int argc, char **argv) { config.cluster_manager_command.argv = NULL; config.cluster_manager_command.flags = 0; config.cluster_manager_command.replicas = 0; + config.cluster_manager_command.from = NULL; + config.cluster_manager_command.to = NULL; + config.cluster_manager_command.slots = 0; + config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; + config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; pref.hints = 1; spectrum_palette = spectrum_palette_color; From b15f3515f3a853f0d7f83f032fec1f155987b2ee Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 11:49:10 +0100 Subject: [PATCH 16/66] Fixed memory write error in clusterManagerGetConfigSignature --- src/redis-cli.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 68ae7cfa6..366c36fad 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2295,7 +2295,7 @@ static void clusterManagerShowNodes(void) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; sds info = clusterManagerNodeInfo(node, 0); - printf("%s\n", info); + printf("%s\n", (char *) info); sdsfree(info); } } @@ -2916,8 +2916,8 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { line = p + 1; if (i == 0) { nodename = token; - tot_size = p - token; - name_len = tot_size; + tot_size = (p - token); + name_len = tot_size++; // Make room for ':' in tot_size } else if (i == 8) break; i++; } @@ -2951,6 +2951,7 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { node_count++; node_configs = zrealloc(node_configs, (node_count * sizeof(char *))); + /* Make room for '|' separators. */ tot_size += (sizeof(char) * (c - 1)); char *cfg = zmalloc((sizeof(char) * tot_size) + 1); memcpy(cfg, nodename, name_len); @@ -3760,7 +3761,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { opts, &err); if (!result) { if (err != NULL) { - clusterManagerLogErr("\n%s\n", err); + //clusterManagerLogErr("\n%s\n", err); zfree(err); } goto cleanup; From a00157c44f8a23bad2554380408c21f9d53b2e67 Mon Sep 17 00:00:00 2001 From: Artix Date: Wed, 28 Feb 2018 15:21:08 +0100 Subject: [PATCH 17/66] Cluster Manager: fixed some memory error --- src/redis-cli.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 366c36fad..64ec48b5d 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2412,14 +2412,19 @@ static int clusterManagerSetSlot(clusterManagerNode *node1, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; + int success = 1; if (reply->type == REDIS_REPLY_ERROR) { + success = 0; if (err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node1, err); } - return 0; + goto cleanup; } - return 1; +cleanup: + freeReplyObject(reply); + return success; } static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, @@ -3175,7 +3180,7 @@ static list *clusterManagerComputeReshardTable(list *sources, int numslots) { int slot = node->slots[j]; if (!slot) continue; if (count >= max || (int)listLength(moved) >= numslots) break; - clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + clusterManagerReshardTableItem *item = zmalloc(sizeof(*item)); item->source = node; item->slot = j; listAddNodeTail(moved, item); From 422e41606b61ebb95a626bd00bf62c3f7fb03bc4 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 2 Mar 2018 17:06:50 +0100 Subject: [PATCH 18/66] ClusterManager: fixed --cluster-from 'all' parsing --- src/redis-cli.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 64ec48b5d..fe73f4a46 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -3137,12 +3137,12 @@ static clusterManagerNode *clusterNodeForResharding(char *id, int *raise_err) { clusterManagerNode *node = NULL; - const char *invalid_node_msg = "*** The specified node is not known or " - "not a master, please retry.\n"; + const char *invalid_node_msg = "*** The specified node (%s) is not known " + "or not a master, please retry.\n"; node = clusterManagerNodeByName(id); *raise_err = 0; if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { - clusterManagerLogErr(invalid_node_msg); + clusterManagerLogErr(invalid_node_msg, id); *raise_err = 1; return NULL; } else if (node != NULL && target != NULL) { @@ -3700,12 +3700,15 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } /* Check if there's still another source to process. */ if (!all && strlen(from) > 0) { - clusterManagerNode *src = - clusterNodeForResharding(from, target, &raise_err); - if (src != NULL) listAddNodeTail(sources, src); - else if (raise_err) { - result = 0; - goto cleanup; + if (!strcmp(from, "all")) all = 1; + if (!all) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } } } } From 928640dee0631630997ff5c2cfe559b172792bdd Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 6 Mar 2018 13:06:04 +0200 Subject: [PATCH 19/66] clusterManagerAddSlots: changed the way ADDSLOTS command is built --- src/redis-cli.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index fe73f4a46..e2b1fb2f5 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2354,32 +2354,28 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) redisReply *reply = NULL; void *_reply = NULL; int is_err = 0, success = 1; - int argc; - sds *argv = NULL; - size_t *argvlen = NULL; + /* First two args are used for the command itself. */ + int argc = node->slots_count + 2; + sds *argv = zmalloc(argc * sizeof(*argv)); + size_t *argvlen = zmalloc(argc * sizeof(*argvlen)); + argv[0] = "CLUSTER"; + argv[1] = "ADDSLOTS"; + argvlen[0] = 7; + argvlen[1] = 8; *err = NULL; - sds cmd = sdsnew("CLUSTER ADDSLOTS "); - int i, added = 0; + int i, argv_idx = 2; for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { - int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (argv_idx >= argc) break; if (node->slots[i]) { - char *fmt = (!last_slot ? "%u " : "%u"); - cmd = sdscatfmt(cmd, fmt, i); - added++; + argv[argv_idx] = sdsfromlonglong((long long) i); + argvlen[argv_idx] = sdslen(argv[argv_idx]); + argv_idx++; } } - if (!added) { + if (!argv_idx) { success = 0; goto cleanup; } - argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) { - success = 0; - goto cleanup; - } - argvlen = zmalloc(argc*sizeof(size_t)); - for (i = 0; i < argc; i++) - argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { success = 0; @@ -2395,9 +2391,11 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } cleanup: - sdsfree(cmd); zfree(argvlen); - if (argv != NULL) sdsfreesplitres(argv,argc); + if (argv != NULL) { + for (i = 2; i < argc; i++) sdsfree(argv[i]); + zfree(argv); + } if (reply != NULL) freeReplyObject(reply); return success; } From 8b39e31ec39ce65cd6f42765121fd03d279516a7 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 23 Mar 2018 16:46:43 +0100 Subject: [PATCH 20/66] Cluster Manager: rebalance command --- src/redis-cli.c | 297 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 286 insertions(+), 11 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index e2b1fb2f5..69ba39acc 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -71,6 +71,7 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 +#define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ "Invalid arguments: you need to pass either a valid " \ @@ -108,10 +109,13 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 -#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 -#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 +#define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 +#define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 +#define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -157,9 +161,12 @@ typedef struct clusterManagerCommand { int replicas; char *from; char *to; + char **weight; + int weight_argc; int slots; int timeout; int pipeline; + float threshold; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -206,6 +213,7 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + int verbose; clusterManagerCommand cluster_manager_command; } config; @@ -1266,6 +1274,8 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--verbose")) { + config.verbose = 1; } else if (!strcmp(argv[i],"--cluster") && !lastarg) { if (CLUSTER_MANAGER_MODE()) usage(); char *cmd = argv[++i]; @@ -1282,15 +1292,35 @@ static int parseOptions(int argc, char **argv) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-weight") && !lastarg) { + int widx = i + 1; + char **weight = argv + widx; + int wargc = 0; + for (; widx < argc; widx++) { + if (strstr(argv[widx], "--") == argv[widx]) break; + wargc++; + } + if (wargc > 0) { + config.cluster_manager_command.weight = weight; + config.cluster_manager_command.weight_argc = wargc; + } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { config.cluster_manager_command.timeout = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-threshold") && !lastarg) { + config.cluster_manager_command.threshold = atof(argv[++i]); } else if (!strcmp(argv[i],"--cluster-yes")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_YES; + } else if (!strcmp(argv[i],"--cluster-simulate")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1390,6 +1420,7 @@ static void usage(void) { " are not rolled back from the server memory.\n" " --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" +" --verbose Verbose mode.\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" @@ -1749,6 +1780,8 @@ typedef struct clusterManagerNode { sds *importing; int migrating_count; int importing_count; + float weight; /* Weight used by rebalance */ + int balance; /* Used by rebalance */ } clusterManagerNode; /* Data structure used to represent a sequence of nodes. */ @@ -1780,6 +1813,7 @@ typedef int clusterManagerCommandProc(int argc, char **argv); static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static clusterManagerNode *clusterManagerNodeByName(const char *name); +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1813,6 +1847,7 @@ static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); +static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1831,6 +1866,9 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, + {"rebalance", clusterManagerCommandRebalance, -1, "host:port", + "weight ,use-empty-masters," + "timeout ,simulate,pipeline ,threshold "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1970,10 +2008,13 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; + node->weight = 1.0f; + node->balance = 0; clusterManagerNodeResetSlots(node); return node; } +/* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -1994,6 +2035,32 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } +/* Like get_node_by_name but the specified name can be just the first + * part of the node ID as long as the prefix in unique across the + * cluster. + */ +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char*name) +{ + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && + strstr(n->name, lcname) == n->name) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static void clusterManagerNodeResetSlots(clusterManagerNode *node) { memset(node->slots, 0, sizeof(node->slots)); node->slots_count = 0; @@ -2898,6 +2965,12 @@ int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { return node2->slots_count - node1->slots_count; } +int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node1->balance - node2->balance; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -3200,6 +3273,19 @@ static void clusterManagerShowReshardTable(list *table) { } } +static void clusterManagerReleaseReshardTable(list *table) { + if (table != NULL) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3775,14 +3861,199 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } cleanup: listRelease(sources); - if (table) { - listRewind(table, &li); - while ((ln = listNext(&li)) != NULL) { - clusterManagerReshardTableItem *item = ln->value; - zfree(item); - } - listRelease(table); + clusterManagerReleaseReshardTable(table); + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandRebalance(int argc, char **argv) { + int port = 0; + char *ip = NULL; + clusterManagerNode **weightedNodes = NULL; + list *involved = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int result = 1, i; + if (config.cluster_manager_command.weight != NULL) { + for (i = 0; i < config.cluster_manager_command.weight_argc; i++) { + char *name = config.cluster_manager_command.weight[i]; + char *p = strchr(name, '='); + if (p == NULL) { + result = 0; + goto cleanup; + } + *p = '\0'; + float w = atof(++p); + clusterManagerNode *n = clusterManagerNodeByAbbreviatedName(name); + if (n == NULL) { + clusterManagerLogErr("*** No such master node %s\n", name); + result = 0; + goto cleanup; + } + n->weight = w; + } } + float total_weight = 0; + int nodes_involved = 0; + int use_empty = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; + + involved = listCreate(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + /* Compute the total cluster weight. */ + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!use_empty && n->slots_count == 0) { + n->weight = 0; + continue; + } + total_weight += n->weight; + nodes_involved++; + listAddNodeTail(involved, n); + } + weightedNodes = zmalloc(nodes_involved * + sizeof(clusterManagerNode *)); + if (weightedNodes == NULL) goto cleanup; + /* Check cluster, only proceed if it looks sane. */ + clusterManagerCheckCluster(1); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + clusterManagerLogErr("*** Please fix your cluster problems " + "before rebalancing" ); + result = 0; + goto cleanup; + } + /* Calculate the slots balance for each node. It's the number of + * slots the node should lose (if positive) or gain (if negative) + * in order to be balanced. */ + int threshold_reached = 0, total_balance = 0; + float threshold = config.cluster_manager_command.threshold; + i = 0; + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + weightedNodes[i++] = n; + int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + (int) n->weight); + n->balance = n->slots_count - expected; + total_balance += n->balance; + /* Compute the percentage of difference between the + * expected number of slots and the real one, to see + * if it's over the threshold specified by the user. */ + int over_threshold = 0; + if (config.cluster_manager_command.threshold > 0) { + if (n->slots_count > 0) { + float err_perc = fabs((100-(100.0*expected/n->slots_count))); + if (err_perc > threshold) over_threshold = 1; + } else if (expected > 1) { + over_threshold = 1; + } + } + if (over_threshold) threshold_reached = 1; + } + if (!threshold_reached) { + clusterManagerLogErr("*** No rebalancing needed! " + "All nodes are within the %.2f%% threshold.\n", + config.cluster_manager_command.threshold); + result = 0; + goto cleanup; + } + /* Because of rounding, it is possible that the balance of all nodes + * summed does not give 0. Make sure that nodes that have to provide + * slots are always matched by nodes receiving slots. */ + while (total_balance > 0) { + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->balance < 0 && total_balance > 0) { + n->balance--; + total_balance--; + } + } + } + /* Sort nodes by their slots balance. */ + qsort(weightedNodes, nodes_involved, sizeof(clusterManagerNode *), + clusterManagerCompareNodeBalance); + clusterManagerLogInfo(">>> Rebalancing across %d nodes. " + "Total weight = %.2f\n", + nodes_involved, total_weight); + if (config.verbose) { + for (i = 0; i < nodes_involved; i++) { + clusterManagerNode *n = weightedNodes[i]; + printf("%s:%d balance is %d slots\n", n->ip, n->port, n->balance); + } + } + /* Now we have at the start of the 'sn' array nodes that should get + * slots, at the end nodes that must give slots. + * We take two indexes, one at the start, and one at the end, + * incrementing or decrementing the indexes accordingly til we + * find nodes that need to get/provide slots. */ + int dst_idx = 0; + int src_idx = nodes_involved - 1; + int simulate = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + while (dst_idx < src_idx) { + clusterManagerNode *dst = weightedNodes[dst_idx]; + clusterManagerNode *src = weightedNodes[src_idx]; + int db = abs(dst->balance); + int sb = abs(src->balance); + int numslots = (db < sb ? db : sb); + if (numslots > 0) { + printf("Moving %d slots from %s:%d to %s:%d\n", numslots, + src->ip, + src->port, + dst->ip, + dst->port); + /* Actaully move the slots. */ + list *lsrc = listCreate(), *table = NULL; + listAddNodeTail(lsrc, src); + table = clusterManagerComputeReshardTable(lsrc, numslots); + listRelease(lsrc); + int table_len = (int) listLength(table); + if (!table || table_len != numslots) { + clusterManagerLogErr("*** Assertio failed: Reshard table " + "!= number of slots"); + result = 0; + goto end_move; + } + if (simulate) { + for (i = 0; i < table_len; i++) printf("#"); + } else { + int opts = CLUSTER_MANAGER_OPT_QUIET | + CLUSTER_MANAGER_OPT_UPDATE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + result = clusterManagerMoveSlot(item->source, + dst, + item->slot, + opts, NULL); + if (!result) goto end_move; + printf("#"); + fflush(stdout); + } + + } + printf("\n"); +end_move: + clusterManagerReleaseReshardTable(table); + if (!result) goto cleanup; + } + /* Update nodes balance. */ + dst->balance += numslots; + src->balance -= numslots; + if (dst->balance == 0) dst_idx++; + if (src->balance == 0) src_idx --; + } +cleanup: + if (involved != NULL) listRelease(involved); + if (weightedNodes != NULL) zfree(weightedNodes); return result; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5168,6 +5439,7 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; + config.verbose = 0; config.cluster_manager_command.name = NULL; config.cluster_manager_command.argc = 0; config.cluster_manager_command.argv = NULL; @@ -5175,9 +5447,12 @@ int main(int argc, char **argv) { config.cluster_manager_command.replicas = 0; config.cluster_manager_command.from = NULL; config.cluster_manager_command.to = NULL; + config.cluster_manager_command.weight = NULL; config.cluster_manager_command.slots = 0; config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; + config.cluster_manager_command.threshold = + CLUSTER_MANAGER_REBALANCE_THRESHOLD; pref.hints = 1; spectrum_palette = spectrum_palette_color; From 00771be4e0c05add57d7af25910be414c8e064ec Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 6 Apr 2018 18:02:40 +0200 Subject: [PATCH 21/66] Cluster Manager: fix command. --- src/redis-cli.c | 715 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 642 insertions(+), 73 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 69ba39acc..8af1130c3 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -151,6 +151,7 @@ static uint64_t dictSdsHash(const void *key); static int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2); static void dictSdsDestructor(void *privdata, void *val); +static void dictListDestructor(void *privdata, void *val); /* Cluster Manager Command Info */ typedef struct clusterManagerCommand { @@ -406,6 +407,12 @@ static void dictSdsDestructor(void *privdata, void *val) sdsfree(val); } +void dictListDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + listRelease((list*)val); +} + /* _serverAssert is needed by dict */ void _serverAssert(const char *estr, const char *file, int line) { fprintf(stderr, "=== ASSERTION FAILED ==="); @@ -1446,6 +1453,15 @@ static void usage(void) { exit(1); } +static int confirmWithYes(char *msg) { + printf("%s (type 'yes' to accept): ", msg); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + return (nread != 0 && !strcmp("yes", buf)); +} + /* Turn the plain C strings into Sds strings */ static char **convertToSds(int count, char** args) { int j; @@ -1751,7 +1767,7 @@ static int evalMode(int argc, char **argv) { } /*------------------------------------------------------------------------------ - * Cluster Manager mode + * Cluster Manager *--------------------------------------------------------------------------- */ /* The Cluster Manager global structure */ @@ -1760,6 +1776,9 @@ static struct clusterManager { list *errors; } cluster_manager; +/* Used by clusterManagerFixSlotsCoverage */ +dict *clusterManagerUncoveredSlots = NULL; + typedef struct clusterManagerNode { redisContext *context; sds name; @@ -1776,10 +1795,12 @@ typedef struct clusterManagerNode { int slots_count; int replicas_count; list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; + sds *migrating; /* An array of sds where even strings are slots and odd + * strings are the destination node IDs. */ + sds *importing; /* An array of sds where even strings are slots and odd + * strings are the source node IDs. */ + int migrating_count; /* Length of the migrating array (migrating slots*2) */ + int importing_count; /* Length of the importing array (importing slots*2) */ float weight; /* Weight used by rebalance */ int balance; /* Used by rebalance */ } clusterManagerNode; @@ -1829,7 +1850,7 @@ static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); +static int clusterManagerCheckCluster(int quiet); static void clusterManagerLog(int level, const char* fmt, ...); static int clusterManagerIsConfigConsistent(void); static void clusterManagerOnError(sds err); @@ -1846,6 +1867,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); @@ -1863,6 +1885,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, @@ -1988,6 +2011,8 @@ static void freeClusterManager(void) { listRelease(cluster_manager.errors); cluster_manager.errors = NULL; } + if (clusterManagerUncoveredSlots != NULL) + dictRelease(clusterManagerUncoveredSlots); } static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { @@ -2013,6 +2038,38 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } +/* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the + * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * of reply error (it's up to the caller function to free it), elsewhere + * the error is directly printed. */ +static int clusterManagerCheckRedisReply(clusterManagerNode *n, + redisReply *r, char **err) +{ + int is_err = 0; + if (!r || (is_err = (r->type == REDIS_REPLY_ERROR))) { + if (is_err) { + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, r->str); + } + return 0; + } + return 1; +} + +static void clusterManagerRemoveNodeFromList(list *nodelist, + clusterManagerNode *node) { + listIter li; + listNode *ln; + listRewind(nodelist, &li); + while ((ln = listNext(&li)) != NULL) { + if (node == ln->value) { + listDelNode(nodelist, ln); + break; + } + } +} /* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { @@ -2470,10 +2527,10 @@ cleanup: /* Set slot status to "importing" or "migrating" */ static int clusterManagerSetSlot(clusterManagerNode *node1, clusterManagerNode *node2, - int slot, const char *mode, char **err) { + int slot, const char *status, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " "SETSLOT %d %s %s", - slot, mode, + slot, status, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; @@ -2492,6 +2549,70 @@ cleanup: return success; } +/* Migrate keys taken from reply->elements. It returns the reply from the + * MIGRATE command, or NULL if something goes wrong. If the argument 'dots' + * is not NULL, a dot will be printed for every migrated key. */ +static redisReply *clusterManagerMigrateKeysInReply(clusterManagerNode *source, + clusterManagerNode *target, + redisReply *reply, + int replace, int timeout, + char *dots) +{ + redisReply *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + int c = (replace ? 8 : 7); + size_t argc = c + reply->elements; + size_t i, offset = 6; // Keys Offset + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + if (replace) { + argv[offset] = "REPLACE"; + argv_len[offset] = 7; + offset++; + } + argv[offset] = "KEYS"; + argv_len[offset] = 4; + offset++; + for (i = 0; i < reply->elements; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + offset; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (dots) dots[i] = '.'; + } + if (dots) dots[reply->elements] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + int success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (i = 0; i < reply->elements; i++) sdsfree(argv[i + offset]); + if (!success) goto cleanup; + migrate_reply = (redisReply *) _reply; +cleanup: + zfree(argv); + zfree(argv_len); + return migrate_reply; +} + +/* Migrate all keys in the given slot from source to target.*/ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, clusterManagerNode *target, int slot, int timeout, @@ -2499,10 +2620,11 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, char **err) { int success = 1; + int do_fix = (config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX); while (1) { + char *dots = NULL; redisReply *reply = NULL, *migrate_reply = NULL; - char **argv = NULL; - size_t *argv_len = NULL; reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " "GETKEYSINSLOT %d %d", slot, pipeline); @@ -2523,57 +2645,37 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, freeReplyObject(reply); break; } - char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + if (verbose) dots = zmalloc((count+1) * sizeof(char)); /* Calling MIGRATE command. */ - size_t argc = count + 8; - argv = zcalloc(argc * sizeof(char *)); - argv_len = zcalloc(argc * sizeof(size_t)); - char portstr[255]; - char timeoutstr[255]; - snprintf(portstr, 10, "%d", target->port); - snprintf(timeoutstr, 10, "%d", timeout); - argv[0] = "MIGRATE"; - argv_len[0] = 7; - argv[1] = target->ip; - argv_len[1] = strlen(target->ip); - argv[2] = portstr; - argv_len[2] = strlen(portstr); - argv[3] = ""; - argv_len[3] = 0; - argv[4] = "0"; - argv_len[4] = 1; - argv[5] = timeoutstr; - argv_len[5] = strlen(timeoutstr); - argv[6] = "REPLACE"; - argv_len[6] = 7; - argv[7] = "KEYS"; - argv_len[7] = 4; - for (size_t i = 0; i < count; i++) { - redisReply *entry = reply->element[i]; - size_t idx = i + 8; - assert(entry->type == REDIS_REPLY_STRING); - argv[idx] = (char *) sdsnew(entry->str); - argv_len[idx] = entry->len; - if (verbose) dots[i] = '.'; - } - if (verbose) dots[count] = '\0'; - void *_reply = NULL; - redisAppendCommandArgv(source->context,argc, - (const char**)argv,argv_len); - success = (redisGetReply(source->context, &_reply) == REDIS_OK); - for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); - if (!success) goto next; - migrate_reply = (redisReply *) _reply; + migrate_reply = clusterManagerMigrateKeysInReply(source, target, + reply, 0, timeout, + dots); + if (migrate_reply == NULL) goto next; if (migrate_reply->type == REDIS_REPLY_ERROR) { - // TODO: Implement fix. - success = 0; - if (err != NULL) { - *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); - strcpy(*err, migrate_reply->str); - printf("\n"); - CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + if (do_fix && strstr(migrate_reply->str, "BUSYKEY")) { + clusterManagerLogWarn("*** Target key exists. " + "Replacing it for FIX.\n"); + freeReplyObject(migrate_reply); + /* Try to migrate keys adding REPLACE option. */ + migrate_reply = clusterManagerMigrateKeysInReply(source, + target, + reply, + 1, timeout, + NULL); + success = (migrate_reply != NULL && + migrate_reply->type != REDIS_REPLY_ERROR); + } else success = 0; + if (!success) { + if (migrate_reply != NULL) { + if (err) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + } + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; } - goto next; } if (verbose) { printf("%s", dots); @@ -2582,8 +2684,7 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, next: if (reply != NULL) freeReplyObject(reply); if (migrate_reply != NULL) freeReplyObject(migrate_reply); - zfree(argv); - zfree(argv_len); + if (dots) zfree(dots); if (!success) break; } return success; @@ -2729,6 +2830,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, *link_status = NULL; + UNUSED(link_status); int i = 0; while ((p = strchr(line, ' ')) != NULL) { *p = '\0'; @@ -2974,11 +3076,11 @@ int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; + char **node_configs = NULL; redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); if (reply == NULL || reply->type == REDIS_REPLY_ERROR) goto cleanup; char *lines = reply->str, *p, *line; - char **node_configs = NULL; while ((p = strstr(lines, "\n")) != NULL) { i = 0; *p = '\0'; @@ -3057,8 +3159,10 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { } cleanup: if (reply != NULL) freeReplyObject(reply); - for (i = 0; i < node_count; i++) zfree(node_configs[i]); - zfree(node_configs); + if (node_configs != NULL) { + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + } return signature; } @@ -3114,9 +3218,453 @@ static int clusterManagerGetCoveredSlots(char *all_slots) { return totslots; } -static void clusterManagerCheckCluster(int quiet) { +static void clusterManagerPrintSlotsList(list *slots) { + listIter li; + listNode *ln; + listRewind(slots, &li); + sds first = NULL; + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + if (!first) first = slot; + else printf(", "); + printf("%s", slot); + } + printf("\n"); +} + +/* Return the node, among 'nodes' with the greatest number of keys + * in the specified slot. */ +static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, + int slot, + char **err) +{ + clusterManagerNode *node = NULL; + int numkeys = 0; + listIter li; + listNode *ln; + listRewind(nodes, &li); + if (err) *err = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *r = + CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOTi %d", slot); + int success = clusterManagerCheckRedisReply(n, r, err); + if (success) { + if (r->integer > numkeys || node == NULL) { + numkeys = r->integer; + node = n; + } + } + if (r != NULL) freeReplyObject(r); + /* If the reply contains errors */ + if (!success) { + if (err != NULL && *err != NULL) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + node = NULL; + break; + } + } + return node; +} + +static int clusterManagerFixSlotsCoverage(char *all_slots) { + int i, fixed = 0; + list *none = NULL, *single = NULL, *multi = NULL; + clusterManagerLogInfo(">>> Fixing slots coverage...\n"); + printf("List of not covered slots: \n"); + int uncovered_count = 0; + sds log = sdsempty(); + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int covered = all_slots[i]; + if (!covered) { + sds key = sdsfromlonglong((long long) i); + if (uncovered_count++ > 0) printf(","); + printf("%s", (char *) key); + list *slot_nodes = listCreate(); + sds slot_nodes_str = sdsempty(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", i, 1); + if (!clusterManagerCheckRedisReply(n, reply, NULL)) { + fixed = -1; + if (reply) freeReplyObject(reply); + goto cleanup; + } + assert(reply->type == REDIS_REPLY_ARRAY); + if (reply->elements > 0) { + listAddNodeTail(slot_nodes, n); + if (listLength(slot_nodes) > 1) + slot_nodes_str = sdscat(slot_nodes_str, ", "); + slot_nodes_str = sdscatfmt(slot_nodes_str, + "%s:%u", n->ip, n->port); + } + freeReplyObject(reply); + } + log = sdscatfmt(log, "\nSlot %S has keys in %u nodes: %S", + key, listLength(slot_nodes), slot_nodes_str); + sdsfree(slot_nodes_str); + dictAdd(clusterManagerUncoveredSlots, key, slot_nodes); + } + } + printf("\n%s\n", log); + /* For every slot, take action depending on the actual condition: + * 1) No node has keys for this slot. + * 2) A single node has keys for this slot. + * 3) Multiple nodes have keys for this slot. */ + none = listCreate(); + single = listCreate(); + multi = listCreate(); + dictIterator *iter = dictGetIterator(clusterManagerUncoveredSlots); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + list *nodes = (list *) dictGetVal(entry); + switch (listLength(nodes)){ + case 0: listAddNodeTail(none, slot); break; + case 1: listAddNodeTail(single, slot); break; + default: listAddNodeTail(multi, slot); break; + } + } + dictReleaseIterator(iter); + + /* Handle case "1": keys in no node. */ + if (listLength(none) > 0) { + printf("The following uncovered slots have no keys " + "across the cluster:\n"); + clusterManagerPrintSlotsList(none); + if (confirmWithYes("Fix these slots by covering with a random node?")){ + srand(time(NULL)); + listIter li; + listNode *ln; + listRewind(none, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + long idx = (long) (rand() % listLength(cluster_manager.nodes)); + listNode *node_n = listIndex(cluster_manager.nodes, idx); + assert(node_n != NULL); + clusterManagerNode *n = node_n->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "2": keys only in one node. */ + if (listLength(single) > 0) { + printf("The following uncovered slots have keys in just one node:\n"); + clusterManagerPrintSlotsList(single); + if (confirmWithYes("Fix these slots by covering with those nodes?")){ + listIter li; + listNode *ln; + listRewind(single, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + listNode *fn = listFirst(nodes); + assert(fn != NULL); + clusterManagerNode *n = fn->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "3": keys in multiple nodes. */ + if (listLength(multi) > 0) { + printf("The folowing uncovered slots have keys in multiple nodes:\n"); + clusterManagerPrintSlotsList(multi); + if (confirmWithYes("Fix these slots by moving keys " + "into a single node?")) { + listIter li; + listNode *ln; + listRewind(multi, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + int s = atoi(slot); + clusterManagerNode *target = + clusterManagerGetNodeWithMostKeysInSlot(nodes, s, NULL); + if (target == NULL) { + fixed = -1; + goto cleanup; + } + clusterManagerLogInfo(">>> Covering slot %s moving keys " + "to %s:%d\n", slot, + target->ip, target->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER SETSLOT %s %s", slot, "STABLE"); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + listIter nli; + listNode *nln; + listRewind(nodes, &nli); + while ((nln = listNext(&nli)) != NULL) { + clusterManagerNode *src = nln->value; + if (src == target) continue; + /* Set the source node in 'importing' state + * (even if we will actually migrate keys away) + * in order to avoid receiving redirections + * for MIGRATE. */ + redisReply *r = CLUSTER_MANAGER_COMMAND(src, + "CLUSTER SETSLOT %s %s %s", slot, + "IMPORTING", target->name); + if (!clusterManagerCheckRedisReply(target, r, NULL)) + fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + int opts = CLUSTER_MANAGER_OPT_VERBOSE | + CLUSTER_MANAGER_OPT_COLD; + if (!clusterManagerMoveSlot(src, target, s, opts, NULL)) { + fixed = -1; + goto cleanup; + } + } + fixed++; + } + } + } +cleanup: + sdsfree(log); + if (none) listRelease(none); + if (single) listRelease(single); + if (multi) listRelease(multi); + return fixed; +} + +/* Slot 'slot' was found to be in importing or migrating state in one or + * more nodes. This function fixes this condition by migrating keys where + * it seems more sensible. */ +static int clusterManagerFixOpenSlot(int slot) { + clusterManagerLogInfo(">>> Fixing open slot %d\n", slot); + /* Try to obtain the current slot owner, according to the current + * nodes configuration. */ + int success = 1; + list *owners = listCreate(); + list *migrating = listCreate(); + list *importing = listCreate(); + sds migrating_str = sdsempty(); + sds importing_str = sdsempty(); + clusterManagerNode *owner = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots[slot]) { + if (owner == NULL) owner = n; + listAddNodeTail(owners, n); + } + } + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->migrating) { + for (int i = 0; i < n->migrating_count; i += 2) { + sds migrating_slot = n->migrating[i]; + if (atoi(migrating_slot) == slot) { + char *sep = (listLength(migrating) == 0 ? "" : ","); + migrating_str = sdscatfmt(migrating_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(migrating, n); + break; + } + } + } + if (n->importing) { + for (int i = 0; i < n->importing_count; i += 2) { + sds importing_slot = n->importing[i]; + if (atoi(importing_slot) == slot) { + char *sep = (listLength(importing) == 0 ? "" : ","); + importing_str = sdscatfmt(importing_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(importing, n); + break; + } + } + } + } + printf("Set as migrating in: %s\n", migrating_str); + printf("Set as importing in: %s\n", importing_str); + /* If there is no slot owner, set as owner the slot with the biggest + * number of keys, among the set of migrating / importing nodes. */ + if (owner == NULL) { + clusterManagerLogInfo(">>> Nobody claims ownership, " + "selecting an owner...\n"); + owner = clusterManagerGetNodeWithMostKeysInSlot(cluster_manager.nodes, + slot, NULL); + // If we still don't have an owner, we can't fix it. + if (owner == NULL) { + clusterManagerLogErr("[ERR] Can't select a slot owner. " + "Impossible to fix.\n"); + success = 0; + goto cleanup; + } + + // Use ADDSLOTS to assign the slot. + printf("*** Configuring %s:%d as the slot owner\n", owner->ip, + owner->port); + redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " + "SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER ADDSLOTS %d", slot); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Make sure this information will propagate. Not strictly needed + * since there is no past owner, so all the other nodes will accept + * whatever epoch this node will claim the slot with. */ + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Remove the owner from the list of migrating/importing + * nodes. */ + clusterManagerRemoveNodeFromList(migrating, owner); + clusterManagerRemoveNodeFromList(importing, owner); + } + /* If there are multiple owners of the slot, we need to fix it + * so that a single node is the owner and all the other nodes + * are in importing state. Later the fix can be handled by one + * of the base cases above. + * + * Note that this case also covers multiple nodes having the slot + * in migrating state, since migrating is a valid state only for + * slot owners. */ + if (listLength(owners) > 1) { + owner = clusterManagerGetNodeWithMostKeysInSlot(owners, slot, NULL); + listRewind(owners, &li); + redisReply *reply = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + reply = CLUSTER_MANAGER_COMMAND(n, "CLUSTER DELSLOT %d", slot); + success = clusterManagerCheckRedisReply(n, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + success = clusterManagerSetSlot(n, owner, slot, "importing", NULL); + if (!success) goto cleanup; + clusterManagerRemoveNodeFromList(importing, n); //Avoid duplicates + listAddNodeTail(importing, n); + } + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + } + int move_opts = CLUSTER_MANAGER_OPT_VERBOSE; + /* Case 1: The slot is in migrating state in one slot, and in + * importing state in 1 slot. That's trivial to address. */ + if (listLength(migrating) == 1 && listLength(importing) == 1) { + clusterManagerNode *src = listFirst(migrating)->value; + clusterManagerNode *dst = listFirst(importing)->value; + success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL); + } + /* Case 2: There are multiple nodes that claim the slot as importing, + * they probably got keys about the slot after a restart so opened + * the slot. In this case we just move all the keys to the owner + * according to the configuration. */ + else if (listLength(migrating) == 0 && listLength(importing) > 0) { + clusterManagerLogInfo(">>> Moving all the %d slot keys to its " + "owner %s:%d\n", slot, owner->ip, owner->port); + move_opts |= CLUSTER_MANAGER_OPT_COLD; + listRewind(importing, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + success = clusterManagerMoveSlot(n, owner, slot, move_opts, NULL); + if (!success) goto cleanup; + clusterManagerLogInfo(">>> Setting %d as STABLE in " + "%s:%d\n", slot, n->ip, n->port); + + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } + } else { + int try_to_close_slot = (listLength(importing) == 0 && + listLength(migrating) == 1); + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", slot, 10); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) { + if (success) try_to_close_slot = (r->elements == 0); + freeReplyObject(r); + } + if (!success) goto cleanup; + } + /* Case 3: There are no slots claiming to be in importing state, but + * there is a migrating node that actually don't have any key. We + * can just close the slot, probably a reshard interrupted in the middle. */ + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } else { + success = 0; + clusterManagerLogErr("[ERR] Sorry, redis-cli can't fix this slot " + "yet (work in progress). Slot is set as " + "migrating in %s, as importing in %s, " + "owner is %s:%d\n", migrating_str, + importing_str, owner->ip, owner->port); + } + } +cleanup: + listRelease(owners); + listRelease(migrating); + listRelease(importing); + sdsfree(migrating_str); + sdsfree(importing_str); + return success; +} + +static int clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); - if (!ln) return; + if (!ln) return 0; + int result = 1; + int do_fix = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX; clusterManagerNode *node = ln->value; clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); @@ -3124,6 +3672,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); clusterManagerOnError(err); + result = 0; } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -3174,6 +3723,7 @@ static void clusterManagerCheckCluster(int quiet) { } } if (open_slots != NULL) { + result = 0; dictIterator *iter = dictGetIterator(open_slots); dictEntry *entry; sds errstr = sdsnew("[WARNING] The following slots are open: "); @@ -3185,6 +3735,17 @@ static void clusterManagerCheckCluster(int quiet) { } clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); + if (do_fix) { + // Fix open slots. + dictReleaseIterator(iter); + iter = dictGetIterator(open_slots); + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + result = clusterManagerFixOpenSlot(atoi(slot)); + if (!result) break; + } + } + dictReleaseIterator(iter); dictRelease(open_slots); } clusterManagerLogInfo(">>> Check slots coverage...\n"); @@ -3200,7 +3761,16 @@ static void clusterManagerCheckCluster(int quiet) { "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); clusterManagerOnError(err); + result = 0; + if (do_fix/* && result*/) { + dictType dtype = clusterManagerDictType; + dtype.valDestructor = dictListDestructor; + clusterManagerUncoveredSlots = dictCreate(&dtype, NULL); + int fixed = clusterManagerFixSlotsCoverage(slots); + if (fixed > 0) result = 1; + } } + return result; } static clusterManagerNode *clusterNodeForResharding(char *id, @@ -3546,12 +4116,7 @@ assign_replicas: } clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); - printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); - fflush(stdout); - char buf[4]; - int nread = read(fileno(stdin),buf,4); - buf[3] = '\0'; - if (nread != 0 && !strcmp("yes", buf)) { + if (confirmWithYes("Can I set the above configuration?")) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3674,13 +4239,17 @@ static int clusterManagerCommandCheck(int argc, char **argv) { clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); - clusterManagerCheckCluster(0); - return 1; + return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } +static int clusterManagerCommandFix(int argc, char **argv) { + config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX; + return clusterManagerCommandCheck(argc, argv); +} + static int clusterManagerCommandReshard(int argc, char **argv) { int port = 0; char *ip = NULL; From 78669035551e76f7c9306f6546e0b9d05fcd17e8 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:25:25 +0200 Subject: [PATCH 22/66] Cluster Manager: import command --- src/Makefile | 2 +- src/redis-cli.c | 216 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 195 insertions(+), 23 deletions(-) diff --git a/src/Makefile b/src/Makefile index a5e0e231a..a64454dad 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.c REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 8af1130c3..96bde3568 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,7 +74,7 @@ #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ - "Invalid arguments: you need to pass either a valid " \ + "[ERR] Invalid arguments: you need to pass either a valid " \ "address (ie. 120.0.0.1:7000) or space separated IP " \ "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) @@ -115,7 +115,9 @@ #define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 #define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 #define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_REPLACE 1 << 6 +#define CLUSTER_MANAGER_CMD_FLAG_COPY 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 8 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -237,6 +239,8 @@ static long getLongInfoField(char *info, char *field); * Utility functions *--------------------------------------------------------------------------- */ +uint16_t crc16(const char *buf, int len); + static long long ustime(void) { struct timeval tv; long long ust; @@ -1325,6 +1329,12 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-simulate")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-replace")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_REPLACE; + } else if (!strcmp(argv[i],"--cluster-copy")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_COPY; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1870,6 +1880,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1892,6 +1903,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -2383,6 +2396,37 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } +/* ----------------------------------------------------------------------------- + * Key space handling + * -------------------------------------------------------------------------- */ + +/* We have 16384 hash slots. The hash slot of a given key is obtained + * as the least significant 14 bits of the crc16 of the key. + * + * However if the key contains the {...} pattern, only the part between + * { and } is hashed. This may be useful in the future to force certain + * keys to be in the same node (assuming no resharding is in progress). */ +static unsigned int keyHashSlot(char *key, int keylen) { + int s, e; /* start-end indexes of { and } */ + + for (s = 0; s < keylen; s++) + if (key[s] == '{') break; + + /* No '{' ? Hash the whole key. This is the base case. */ + if (s == keylen) return crc16(key,keylen) & 0x3FFF; + + /* '{' found? Check if we have the corresponding '}'. */ + for (e = s+1; e < keylen; e++) + if (key[e] == '}') break; + + /* No '}' or nothing between {} ? Hash the whole key. */ + if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF; + + /* If we are here there is both a { and a } on its right. Hash + * what is in the middle between { and }. */ + return crc16(key+s+1,e-s-1) & 0x3FFF; +} + static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -3533,8 +3577,8 @@ static int clusterManagerFixOpenSlot(int slot) { } // Use ADDSLOTS to assign the slot. - printf("*** Configuring %s:%d as the slot owner\n", owner->ip, - owner->port); + clusterManagerLogWarn("*** Configuring %s:%d as the slot owner\n", + owner->ip, owner->port); redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " "SETSLOT %d %s", slot, "STABLE"); @@ -4527,7 +4571,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { if (over_threshold) threshold_reached = 1; } if (!threshold_reached) { - clusterManagerLogErr("*** No rebalancing needed! " + clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); result = 0; @@ -4586,7 +4630,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { listRelease(lsrc); int table_len = (int) listLength(table); if (!table || table_len != numslots) { - clusterManagerLogErr("*** Assertio failed: Reshard table " + clusterManagerLogErr("*** Assertion failed: Reshard table " "!= number of slots"); result = 0; goto end_move; @@ -4629,23 +4673,148 @@ invalid_args: return 0; } -static int clusterManagerCommandCall(int argc, char **argv) { - int port = 0; - char *ip = NULL; - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - int i; - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else { - fprintf(stderr, - "Invalid arguments: first agrumnt must be host:port.\n"); - return 0; +static int clusterManagerCommandImport(int argc, char **argv) { + int success = 1; + int port = 0, src_port = 0; + char *ip = NULL, *src_ip = NULL; + char *invalid_args_msg = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) { + invalid_args_msg = CLUSTER_MANAGER_INVALID_HOST_ARG; + goto invalid_args; } + if (config.cluster_manager_command.from == NULL) { + invalid_args_msg = "[ERR] Option '--cluster-from' is required for " + "subcommand 'import'.\n"; + goto invalid_args; + } + char *src_host[] = {config.cluster_manager_command.from}; + if (!getClusterHostFromCmdArgs(1, src_host, &src_ip, &src_port)) { + invalid_args_msg = "[ERR] Invalid --cluster-from host. You need to " + "pass a valid address (ie. 120.0.0.1:7000).\n"; + goto invalid_args; + } + clusterManagerLogInfo(">>> Importing data from %s:%d to cluster %s:%d\n", + src_ip, src_port, ip, port); + + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + char *reply_err = NULL; + redisReply *src_reply = NULL; + // Connect to the source node. + redisContext *src_ctx = redisConnect(src_ip, src_port); + if (src_ctx->err) { + success = 0; + fprintf(stderr,"Could not connect to Redis at %s:%d: %s.\n", src_ip, + src_port, src_ctx->errstr); + goto cleanup; + } + src_reply = reconnectingRedisCommand(src_ctx, "INFO"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + if (getLongInfoField(src_reply->str, "cluster_enabled")) { + clusterManagerLogErr("[ERR] The source node should not be a " + "cluster node.\n"); + success = 0; + goto cleanup; + } + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "DBSIZE"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + int size = src_reply->integer, i; + clusterManagerLogWarn("*** Importing %d keys from DB 0\n", size); + + // Build a slot -> node map + clusterManagerNode *slots_map[CLUSTER_MANAGER_SLOTS]; + memset(slots_map, 0, sizeof(slots_map) / sizeof(clusterManagerNode *)); + listIter li; + listNode *ln; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots_count == 0) continue; + if (n->slots[i]) { + slots_map[i] = n; + break; + } + } + } + + char cmdfmt[50] = "MIGRATE %s %d %s %d %d"; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COPY) + strcat(cmdfmt, " %s"); + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLACE) + strcat(cmdfmt, " %s"); + + /* Use SCAN to iterate over the keys, migrating to the + * right node as needed. */ + int cursor = -999, timeout = config.cluster_manager_command.timeout; + while (cursor != 0) { + if (cursor < 0) cursor = 0; + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "SCAN %d COUNT %d", + cursor, 1000); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + assert(src_reply->type == REDIS_REPLY_ARRAY); + assert(src_reply->elements >= 2); + assert(src_reply->element[1]->type == REDIS_REPLY_ARRAY); + if (src_reply->element[0]->type == REDIS_REPLY_STRING) + cursor = atoi(src_reply->element[0]->str); + else if (src_reply->element[0]->type == REDIS_REPLY_INTEGER) + cursor = src_reply->element[0]->integer; + int keycount = src_reply->element[1]->elements; + for (i = 0; i < keycount; i++) { + redisReply *kr = src_reply->element[1]->element[i]; + assert(kr->type == REDIS_REPLY_STRING); + char *key = kr->str; + uint16_t slot = keyHashSlot(key, kr->len); + clusterManagerNode *target = slots_map[slot]; + printf("Migrating %s to %s:%d: ", key, target->ip, target->port); + redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, + target->ip, target->port, + key, 0, timeout, + "COPY", "REPLACE"); + if (!r || r->type == REDIS_REPLY_ERROR) { + if (r && r->str) { + clusterManagerLogErr("Source %s:%d replied with " + "error:\n%s\n", src_ip, src_port, + r->str); + } + success = 0; + } + freeReplyObject(r); + if (!success) goto cleanup; + clusterManagerLogOk("OK\n"); + } + } +cleanup: + if (reply_err) + clusterManagerLogErr("Source %s:%d replied with error:\n%s\n", + src_ip, src_port, reply_err); + if (src_ctx) redisFree(src_ctx); + if (src_reply) freeReplyObject(src_reply); + return success; +invalid_args: + fprintf(stderr, "%s", invalid_args_msg); + return 0; +} + +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0, i; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; @@ -4677,6 +4846,9 @@ static int clusterManagerCommandCall(int argc, char **argv) { } zfree(argvlen); return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; } static int clusterManagerCommandHelp(int argc, char **argv) { From f3c9a00c1c3ce8a5afd01fbaed2da53739a80e46 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:53:24 +0200 Subject: [PATCH 23/66] Cluster Manager: added clusterManagerCheckCluster to import command --- src/redis-cli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 96bde3568..34072b74d 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4698,6 +4698,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; char *reply_err = NULL; redisReply *src_reply = NULL; // Connect to the source node. From 5b9b7bb7ce853e97d4c026b86c0c7e19a008e9b0 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 17:08:53 +0200 Subject: [PATCH 24/66] Cluster Manager: add-node command. --- src/redis-cli.c | 168 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 154 insertions(+), 14 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 34072b74d..c0d80801d 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -165,6 +165,7 @@ typedef struct clusterManagerCommand { char *from; char *to; char **weight; + char *master_id; int weight_argc; int slots; int timeout; @@ -1299,6 +1300,8 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-master-id") && !lastarg) { + config.cluster_manager_command.master_id = argv[++i]; } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { @@ -1335,6 +1338,9 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-copy")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_COPY; + } else if (!strcmp(argv[i],"--cluster-slave")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SLAVE; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1847,6 +1853,8 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name); static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); @@ -1875,6 +1883,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, /* Cluster Manager commands. */ static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandAddNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1895,6 +1904,8 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3030,8 +3041,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { - char *msg = (e ? e : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); + clusterManagerPrintNotClusterNodeError(node, e); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -3313,6 +3323,27 @@ static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, return node; } +/* This function returns the master that has the least number of replicas + * in the cluster. If there are multiple masters with the same smaller + * number of replicas, one at random is returned. */ + +static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { + clusterManagerNode *node = NULL; + int lowest_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (node == NULL || n->replicas_count < lowest_count) { + node = n; + lowest_count = n->replicas_count; + } + } + return node; +} + static int clusterManagerFixSlotsCoverage(char *all_slots) { int i, fixed = 0; list *none = NULL, *single = NULL, *multi = NULL; @@ -3966,6 +3997,26 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, array->nodes[array->count++] = node; } +static void clusterManagerPrintNotEmptyNodeError(clusterManagerNode *node, + char *err) +{ + char *msg; + if (err) msg = err; + else { + msg = "is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err) +{ + char *msg = (err ? err : "is not configured as a cluster node."); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + /* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; @@ -4008,8 +4059,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { - char *msg = (err ? err : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotClusterNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4025,14 +4075,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } err = NULL; if (!clusterManagerNodeIsEmpty(node, &err)) { - char *msg; - if (err) msg = err; - else { - msg = "is not empty. Either the node already knows other " - "nodes (check with CLUSTER NODES) or contains some " - "key in database 0."; - } - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotEmptyNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4263,6 +4306,104 @@ cleanup: return success; } +static int clusterManagerCommandAddNode(int argc, char **argv) { + int success = 1; + redisReply *reply = NULL; + char *ref_ip = NULL, *ip = NULL; + int ref_port = 0, port = 0; + if (!getClusterHostFromCmdArgs(argc - 1, argv + 1, &ref_ip, &ref_port)) + goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) + goto invalid_args; + clusterManagerLogInfo(">>> Adding node %s:%d to cluster %s:%d\n", ip, port, + ref_ip, ref_port); + // Check the existing cluster + clusterManagerNode *refnode = clusterManagerNewNode(ref_ip, ref_port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; + + /* If --cluster-master-id was specified, try to resolve it now so that we + * abort before starting with the node configuration. */ + clusterManagerNode *master_node = NULL; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVE) { + char *master_id = config.cluster_manager_command.master_id; + if (master_id != NULL) { + master_node = clusterManagerNodeByName(master_id); + if (master_node == NULL) { + clusterManagerLogErr("[ERR] No such master ID %s\n", master_id); + return 0; + } + } else { + master_node = clusterManagerNodeWithLeastReplicas(); + assert(master_node != NULL); + printf("Automatically selected master %s:%d\n", master_node->ip, + master_node->port); + } + } + + // Add the new node + clusterManagerNode *new_node = clusterManagerNewNode(ip, port); + int added = 0; + CLUSTER_MANAGER_NODE_CONNECT(new_node); + if (new_node->context->err) { + clusterManagerLogErr("[ERR] Sorry, can't connect to node %s:%d\n", + ip, port); + success = 0; + goto cleanup; + } + char *err = NULL; + if (!(success = clusterManagerNodeIsCluster(new_node, &err))) { + clusterManagerPrintNotClusterNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + if (!clusterManagerNodeLoadInfo(new_node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(new_node, err); + zfree(err); + } + success = 0; + goto cleanup; + } + if (!(success = clusterManagerNodeIsEmpty(new_node, &err))) { + clusterManagerPrintNotEmptyNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + clusterManagerNode *first = listFirst(cluster_manager.nodes)->value; + listAddNodeTail(cluster_manager.nodes, new_node); + added = 1; + + // Send CLUSTER MEET command to the new node + clusterManagerLogInfo(">>> Send CLUSTER MEET to node %s:%d to make it " + "join the cluster.\n", ip, port); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER MEET %s %d", + first->ip, first->port); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + + /* Additional configuration is needed if the node is added as a slave. */ + if (master_node) { + sleep(1); + clusterManagerWaitForClusterJoin(); + clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n", + master_node->ip, master_node->port); + freeReplyObject(reply); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s", + master_node->name); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + } + clusterManagerLogOk("[OK] New node added correctly.\n"); +cleanup: + if (!added && new_node) freeClusterManagerNode(new_node); + if (reply) freeReplyObject(reply); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -4531,8 +4672,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { nodes_involved++; listAddNodeTail(involved, n); } - weightedNodes = zmalloc(nodes_involved * - sizeof(clusterManagerNode *)); + weightedNodes = zmalloc(nodes_involved * sizeof(clusterManagerNode *)); if (weightedNodes == NULL) goto cleanup; /* Check cluster, only proceed if it looks sane. */ clusterManagerCheckCluster(1); From 64cf7a314cb81101952fcfa0f309af90ada9fd5a Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 18:22:44 +0200 Subject: [PATCH 25/66] - Cluster Manager: del-node command. - Cluster Manager: fixed bug in clusterManagerNodeWithLeastReplicas --- src/redis-cli.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c0d80801d..daad385dd 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1884,6 +1884,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandAddNode(int argc, char **argv); +static int clusterManagerCommandDeleteNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1906,6 +1907,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { "replicas "}, {"add-node", clusterManagerCommandAddNode, 2, "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3335,7 +3337,7 @@ static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; - if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; if (node == NULL || n->replicas_count < lowest_count) { node = n; lowest_count = n->replicas_count; @@ -4404,6 +4406,73 @@ invalid_args: return 0; } +static int clusterManagerCommandDeleteNode(int argc, char **argv) { + UNUSED(argc); + int success = 1; + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + char *node_id = argv[1]; + clusterManagerLogInfo(">>> Removing node %s from cluster %s:%d\n", + node_id, ip, port); + clusterManagerNode *ref_node = clusterManagerNewNode(ip, port); + clusterManagerNode *node = NULL; + + // Load cluster information + if (!clusterManagerLoadInfoFromNode(ref_node, 0)) return 0; + + // Check if the node exists and is not empty + node = clusterManagerNodeByName(node_id); + if (node == NULL) { + clusterManagerLogErr("[ERR] No such node ID %s\n", node_id); + return 0; + } + if (node->slots_count != 0) { + clusterManagerLogErr("[ERR] Node %s:%d is not empty! Reshard data " + "away and try again.\n", node->ip, node->port); + return 0; + } + + // Send CLUSTER FORGET to all the nodes but the node to remove + clusterManagerLogInfo(">>> Sending CLUSTER FORGET messages to the " + "cluster...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == node) continue; + if (n->replicate && !strcasecmp(n->replicate, node_id)) { + // Reconfigure the slave to replicate with some other node + clusterManagerNode *master = clusterManagerNodeWithLeastReplicas(); + //TODO: check whether master could be the same as node + assert(master != NULL); + clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", + n->ip, n->port, master->ip, master->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s", + master->name); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER FORGET %s", + node_id); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + + // Finally shutdown the node + clusterManagerLogInfo(">>> SHUTDOWN the node.\n"); + redisReply *r = redisCommand(node->context, "SHUTDOWN"); + success = clusterManagerCheckRedisReply(node, r, NULL); + if (r) freeReplyObject(r); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -5026,6 +5095,9 @@ static int clusterManagerCommandHelp(int argc, char **argv) { } } } + fprintf(stderr, "\nFor check, fix, reshard, del-node, set-timeout you " + "can specify the host and port of any working node in " + "the cluster.\n\n"); return 0; } From 7ace7af0b7b0076ea3b63741d781fabeec307f7e Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 13 Apr 2018 16:09:22 +0200 Subject: [PATCH 26/66] Cluster Manager: set-timeout command --- src/redis-cli.c | 70 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index daad385dd..e7600b91c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1890,6 +1890,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandSetTimeout(int argc, char **argv); static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1905,21 +1906,23 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, - {"add-node", clusterManagerCommandAddNode, 2, - "new_host:new_port existing_host:existing_port", "slave,master-id "}, - {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, - {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, - {"import", clusterManagerCommandImport, 1, "host:port", - "from ,copy,replace"}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, + {"set-timeout", clusterManagerCommandSetTimeout, 2, + "host:port milliseconds", NULL}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -4882,6 +4885,61 @@ invalid_args: return 0; } +static int clusterManagerCommandSetTimeout(int argc, char **argv) { + UNUSED(argc); + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + int timeout = atoi(argv[1]); + if (timeout < 100) { + fprintf(stderr, "Setting a node timeout of less than 100 " + "milliseconds is a bad idea.\n"); + return 0; + } + // Load cluster information + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int ok_count = 0, err_count = 0; + + clusterManagerLogInfo(">>> Reconfiguring node timeout in every " + "cluster node...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + char *err = NULL; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s %s %d", + "SET", + "cluster-node-timeout", + timeout); + if (reply == NULL) goto reply_err; + int ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s", "REWRITE"); + if (reply == NULL) goto reply_err; + ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + clusterManagerLogWarn("*** New timeout set for %s:%d\n", n->ip, + n->port); + ok_count++; + continue; +reply_err: + if (err == NULL) err = ""; + clusterManagerLogErr("ERR setting node-timeot for %s:%d: %s\n", n->ip, + n->port, err); + err_count++; + } + clusterManagerLogInfo(">>> New node timeout set. %d OK, %d ERR.\n", + ok_count, err_count); + return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandImport(int argc, char **argv) { int success = 1; int port = 0, src_port = 0; From a3d5864e4283131dac6e9fe89e2a0d86189af7e9 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 19 Apr 2018 18:52:01 +0200 Subject: [PATCH 27/66] Cluster Manager: code improvements and more comments added. --- src/redis-cli.c | 66 +++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index e7600b91c..c0283b28c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -68,7 +68,7 @@ #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" -#define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 @@ -172,6 +172,7 @@ typedef struct clusterManagerCommand { int pipeline; float threshold; } clusterManagerCommand; + static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1788,7 +1789,7 @@ static int evalMode(int argc, char **argv) { /* The Cluster Manager global structure */ static struct clusterManager { - list *nodes; /* List of nodes int he configuration. */ + list *nodes; /* List of nodes in the configuration. */ list *errors; } cluster_manager; @@ -1821,7 +1822,7 @@ typedef struct clusterManagerNode { int balance; /* Used by rebalance */ } clusterManagerNode; -/* Data structure used to represent a sequence of nodes. */ +/* Data structure used to represent a sequence of cluster nodes. */ typedef struct clusterManagerNodeArray { clusterManagerNode **nodes; /* Actual nodes array */ clusterManagerNode **alloc; /* Pointer to the allocated memory */ @@ -1829,7 +1830,7 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; -/* Used for reshard table. */ +/* Used for the reshard table. */ typedef struct clusterManagerReshardTableItem { clusterManagerNode *source; int slot; @@ -1865,7 +1866,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); +static void clusterManagerShowClusterInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static int clusterManagerCheckCluster(int quiet); @@ -2067,8 +2068,9 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } + /* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the - * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * latest case, if the 'err' arg is not NULL, it gets allocated with a copy * of reply error (it's up to the caller function to free it), elsewhere * the error is directly printed. */ static int clusterManagerCheckRedisReply(clusterManagerNode *n, @@ -2100,7 +2102,7 @@ static void clusterManagerRemoveNodeFromList(list *nodelist, } } -/* Return the node with the specified ID or NULL. */ +/* Return the node with the specified name (ID) or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -2121,7 +2123,7 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -/* Like get_node_by_name but the specified name can be just the first +/* Like clusterManagerNodeByName but the specified name can be just the first * part of the node ID as long as the prefix in unique across the * cluster. */ @@ -2152,6 +2154,7 @@ static void clusterManagerNodeResetSlots(clusterManagerNode *node) { node->slots_count = 0; } +/* Call "INFO" redis command on the specified node and return the reply. */ static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, char **err) { @@ -2181,7 +2184,7 @@ static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { redisReply *info = clusterManagerGetNodeRedisInfo(node, err); - int is_err = 0, is_empty = 1; + int is_empty = 1; if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; @@ -2190,11 +2193,7 @@ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); if (err != NULL) *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } + if (!clusterManagerCheckRedisReply(node, info, err)) { is_empty = 0; goto result; } @@ -2422,7 +2421,7 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { * However if the key contains the {...} pattern, only the part between * { and } is hashed. This may be useful in the future to force certain * keys to be in the same node (assuming no resharding is in progress). */ -static unsigned int keyHashSlot(char *key, int keylen) { +static unsigned int clusterManagerKeyHashSlot(char *key, int keylen) { int s, e; /* start-end indexes of { and } */ for (s = 0; s < keylen; s++) @@ -2443,6 +2442,7 @@ static unsigned int keyHashSlot(char *key, int keylen) { return crc16(key+s+1,e-s-1) & 0x3FFF; } +/* Return a string representation of the cluster node. */ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -2484,7 +2484,7 @@ static void clusterManagerShowNodes(void) { } } -static void clusterManagerShowInfo(void) { +static void clusterManagerShowClusterInfo(void) { int masters = 0; int keys = 0; listIter li; @@ -2533,11 +2533,12 @@ static void clusterManagerShowInfo(void) { printf("%.2f keys per slot on average.\n", keys_per_slot); } +/* Flush dirty slots configuration of the node by calling CLUSTER ADDSLOTS */ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0, success = 1; + int success = 1; /* First two args are used for the command itself. */ int argc = node->slots_count + 2; sds *argv = zmalloc(argc * sizeof(*argv)); @@ -2566,14 +2567,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } reply = (redisReply*) _reply; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } - success = 0; - goto cleanup; - } + success = clusterManagerCheckRedisReply(node, reply, err); cleanup: zfree(argvlen); if (argv != NULL) { @@ -2821,7 +2815,7 @@ static int clusterManagerMoveSlot(clusterManagerNode *source, } /* Flush the dirty node configuration by calling replicate for slaves or - * adding the slots for masters. */ + * adding the slots defined in the masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; @@ -2852,6 +2846,7 @@ cleanup: return success; } +/* Wait until the cluster configuration is consistent. */ static void clusterManagerWaitForClusterJoin(void) { printf("Waiting for the cluster to join\n"); while(!clusterManagerIsConfigConsistent()) { @@ -2871,13 +2866,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0, success = 1; + int success = 1; *err = NULL; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } + if (!clusterManagerCheckRedisReply(node, reply, err)) { success = 0; goto cleanup; } @@ -3114,6 +3105,7 @@ invalid_friend: return 1; } +/* Compare functions used by various sorting operations. */ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { const char **i1 = (const char **)slot1; const char **i2 = (const char **)slot2; @@ -3252,6 +3244,7 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +/* Add the error string to cluster_manager.errors and print it. */ static void clusterManagerOnError(sds err) { if (cluster_manager.errors == NULL) cluster_manager.errors = listCreate(); @@ -3259,6 +3252,9 @@ static void clusterManagerOnError(sds err) { clusterManagerLogErr("%s\n", (char *) err); } +/* Check the slots coverage of the cluster. The 'all_slots' argument must be + * and array of 16384 bytes. Every covered slot will be set to 1 in the + * 'all_slots' array. The function returns the total number if covered slots.*/ static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -4482,7 +4478,7 @@ static int clusterManagerCommandInfo(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return 1; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -4495,7 +4491,7 @@ static int clusterManagerCommandCheck(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5047,7 +5043,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { redisReply *kr = src_reply->element[1]->element[i]; assert(kr->type == REDIS_REPLY_STRING); char *key = kr->str; - uint16_t slot = keyHashSlot(key, kr->len); + uint16_t slot = clusterManagerKeyHashSlot(key, kr->len); clusterManagerNode *target = slots_map[slot]; printf("Migrating %s to %s:%d: ", key, target->ip, target->port); redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, From 4026c92b1bc337f0f38783163a78207f3ebb5e16 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 18:08:30 +0200 Subject: [PATCH 28/66] Cluster Manager: fixed bug when parsing CLUSTER NODES reply (clusterManagerNodeLoadInfo) --- src/redis-cli.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c0283b28c..b55cf93e8 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2922,6 +2922,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, line = p + 1; remaining--; } else line = p; + char *dash = NULL; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating @@ -2953,7 +2954,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, node->importing[node->importing_count - 1] = src; } - } else if ((p = strchr(slotsdef, '-')) != NULL) { + } else if ((dash = strchr(slotsdef, '-')) != NULL) { + p = dash; int start, stop; *p = '\0'; start = atoi(slotsdef); @@ -5078,7 +5080,7 @@ invalid_args: static int clusterManagerCommandCall(int argc, char **argv) { int port = 0, i; char *ip = NULL; - if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; From 9e373c89b8c0276e7883a52b88a4c95389f9dbe5 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:25:08 +0200 Subject: [PATCH 29/66] Cluster Manager: fixed expected slots calculation (rebalance) Cluster Manager: fixed argument parsing after --cluster-weight --- src/redis-cli.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b55cf93e8..36531f884 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1318,6 +1318,7 @@ static int parseOptions(int argc, char **argv) { if (wargc > 0) { config.cluster_manager_command.weight = weight; config.cluster_manager_command.weight_argc = wargc; + i += wargc; } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); @@ -4724,7 +4725,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { int nodes_involved = 0; int use_empty = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; - involved = listCreate(); listIter li; listNode *ln; @@ -4762,15 +4762,15 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; weightedNodes[i++] = n; - int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * - (int) n->weight); + int expected = (int) (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + n->weight); n->balance = n->slots_count - expected; total_balance += n->balance; /* Compute the percentage of difference between the * expected number of slots and the real one, to see * if it's over the threshold specified by the user. */ int over_threshold = 0; - if (config.cluster_manager_command.threshold > 0) { + if (threshold > 0) { if (n->slots_count > 0) { float err_perc = fabs((100-(100.0*expected/n->slots_count))); if (err_perc > threshold) over_threshold = 1; @@ -4784,7 +4784,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); - result = 0; goto cleanup; } /* Because of rounding, it is possible that the balance of all nodes From 363197ab7b5b1a5074ae191db52bf0a56104f2ca Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:29:42 +0200 Subject: [PATCH 30/66] Cluster tests now using redis-cli instead of redis-trib --- tests/cluster/tests/04-resharding.tcl | 10 +++++----- tests/cluster/tests/12-replica-migration-2.tcl | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl index 0ccbf717d..68fba135e 100644 --- a/tests/cluster/tests/04-resharding.tcl +++ b/tests/cluster/tests/04-resharding.tcl @@ -73,12 +73,12 @@ test "Cluster consistency during live resharding" { flush stdout set target [dict get [get_myself [randomInt 5]] id] set tribpid [lindex [exec \ - ../../../src/redis-trib.rb reshard \ - --from all \ - --to $target \ - --slots 100 \ - --yes \ + ../../../src/redis-cli --cluster reshard \ 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-from all \ + --cluster-to $target \ + --cluster-slots 100 \ + --cluster-yes \ | [info nameofexecutable] \ ../tests/helpers/onlydots.tcl \ &] 0] diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl index 48ecd1d50..3d8b7b04b 100644 --- a/tests/cluster/tests/12-replica-migration-2.tcl +++ b/tests/cluster/tests/12-replica-migration-2.tcl @@ -31,9 +31,9 @@ test "Each master should have at least two replicas attached" { set master0_id [dict get [get_myself 0] id] test "Resharding all the master #0 slots away from it" { set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=0 \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=0 >@ stdout ] } test "Master #0 should lose its replicas" { @@ -49,10 +49,10 @@ test "Resharding back some slot to master #0" { # new resharding. after 10000 set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=.01 \ - --use-empty-masters \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=.01 \ + --cluster-use-empty-masters >@ stdout] } test "Master #0 should re-acquire one or more replicas" { From cf8d65a35d50da2cfbad9e8a25c855a2cc3af77b Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 12 Jan 2018 11:06:24 +0100 Subject: [PATCH 31/66] Cluster Manager mode --- src/redis-cli.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index d80973e75..92467a6bf 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -65,6 +65,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -77,6 +78,16 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; +/* Cluster Manager command info */ +struct clusterManagerCommand { + char *name; + int argc; + char **argv; + int flags; + int replicas; +}; + + static redisContext *context; static struct config { char *hostip; @@ -119,8 +130,29 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + struct clusterManagerCommand cluster_manager_command; } config; +/* Cluster Manager commands. */ +typedef int clusterManagerCommandProc(int argc, char **argv); +static struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; +}; + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("CLUSTER: create\n"); + printf("Arguments: %d\n", argc); + printf("Replicas: %d\n", config.cluster_manager_command.replicas); + fprintf(stderr, "Not implemented yet!\n"); + return 0; +} + +struct clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2} +}; + /* User preferences. */ static struct pref { int hints; @@ -1061,6 +1093,13 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + struct clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; +} + static int parseOptions(int argc, char **argv) { int i; @@ -1146,6 +1185,18 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--cluster") && !lastarg) { + if (CLUSTER_MANAGER_MODE()) usage(); + char *cmd = argv[++i]; + int j = i; + for (; j < argc; j++) if (argv[j][0] == '-') break; + j--; + createClusterManagerCommand(cmd, j - i, argv + i); + i = j; + } else if (!strcmp(argv[i],"--cluster") && lastarg) { + usage(); + } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { + config.cluster_manager_command.replicas = atoi(argv[++i]); } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1243,9 +1294,13 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" +" --cluster [args...]\n" +" Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" +"Cluster Manager Commands:\n" +"\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" " redis-cli get mypasswd\n" @@ -1569,6 +1624,43 @@ static int evalMode(int argc, char **argv) { return retval; } +/*------------------------------------------------------------------------------ + * Cluster Manager mode + *--------------------------------------------------------------------------- */ + +static clusterManagerCommandProc *validateClusterManagerCommand(void) { + int i, commands_count = sizeof(clusterManagerCommands) / + sizeof(struct clusterManagerCommandDef); + clusterManagerCommandProc *proc = NULL; + char *cmdname = config.cluster_manager_command.name; + int argc = config.cluster_manager_command.argc; + for (i = 0; i < commands_count; i++) { + struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + if (!strcmp(cmddef.name, cmdname)) { + if ((cmddef.arity > 0 && argc != cmddef.arity) || + (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { + fprintf(stderr, "[ERR] Wrong number of arguments for " + "specified --cluster sub command\n"); + return NULL; + } + proc = cmddef.proc; + } + } + if (!proc) fprintf(stderr, "Unknown --cluster subcommand\n"); + return proc; +} + +static void clusterManagerMode(clusterManagerCommandProc *proc) { + int argc = config.cluster_manager_command.argc; + char **argv = config.cluster_manager_command.argv; + if (!proc(argc, argv)) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + exit(0); +} + /*------------------------------------------------------------------------------ * Latency and latency history modes *--------------------------------------------------------------------------- */ @@ -2862,7 +2954,11 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; - + config.cluster_manager_command.name = NULL; + config.cluster_manager_command.argc = 0; + config.cluster_manager_command.argv = NULL; + config.cluster_manager_command.flags = 0; + config.cluster_manager_command.replicas = 0; pref.hints = 1; spectrum_palette = spectrum_palette_color; @@ -2878,6 +2974,17 @@ int main(int argc, char **argv) { argc -= firstarg; argv += firstarg; + /* Cluster Manager mode */ + if (CLUSTER_MANAGER_MODE()) { + clusterManagerCommandProc *proc = validateClusterManagerCommand(); + if (!proc) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + clusterManagerMode(proc); + } + /* Latency mode */ if (config.latency_mode) { if (cliConnect(0) == REDIS_ERR) exit(1); From be7458e4ce593381aa4c8d867574a7cf7bd85b5b Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 16:26:21 +0100 Subject: [PATCH 32/66] Cluster Manager: 'create', 'info' and 'check' commands --- src/Makefile | 2 +- src/redis-cli.c | 1297 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 1272 insertions(+), 27 deletions(-) diff --git a/src/Makefile b/src/Makefile index 3f6ac4541..14112aa1f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 92467a6bf..9943d5753 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -41,13 +41,15 @@ #include #include #include -#include +#include #include #include #include #include #include /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */ +#include "dict.h" +#include "adlist.h" #include "zmalloc.h" #include "linenoise.h" #include "help.h" @@ -65,7 +67,64 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) +#define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_COMMAND(n,...) \ + (reconnectingRedisCommand(n->context, __VA_ARGS__)) +#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) + +#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ + memset(n->slots, 0, sizeof(n->slots)); \ + n->slots_count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ + array->alloc = array->nodes; \ + array->len = alloc_len; \ + array->count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ + if (array->nodes > array->alloc) { \ + array->len = array->nodes - array->alloc; \ + array->nodes = array->alloc; \ + array->count = 0; \ + int i = 0; \ + for(; i < array->len; i++) { \ + if (array->nodes[i] != NULL) array->count++;\ + } \ + } \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) + +#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ + assert(array->nodes < (array->nodes + array->len)); \ + if (*array->nodes != NULL) array->count--; \ + nodeptr = *array->nodes; \ + array->nodes++; \ + array->len--; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ + assert(array->nodes < (array->nodes + array->len)); \ + assert(nodeptr != NULL); \ + array->nodes[array->count++] = nodeptr; \ +} while(0) + +#define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ + fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + +#define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 +#define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_FLAG_FRIEND 1 << 2 +#define CLUSTER_MANAGER_FLAG_NOADDR 1 << 3 +#define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 +#define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -79,13 +138,13 @@ int *spectrum_palette; int spectrum_palette_size; /* Cluster Manager command info */ -struct clusterManagerCommand { +typedef struct clusterManagerCommand { char *name; int argc; char **argv; int flags; int replicas; -}; +} clusterManagerCommand; static redisContext *context; @@ -130,28 +189,70 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; - struct clusterManagerCommand cluster_manager_command; + clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager commands. */ +/* Cluster Manager */ + +static struct clusterManager { + list *nodes; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; + int dirty; + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + list *friends; +} clusterManagerNode; + +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; + clusterManagerNode **alloc; + int len; + int count; +} clusterManagerNodeArray; + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); typedef int clusterManagerCommandProc(int argc, char **argv); -static struct clusterManagerCommandDef { +typedef struct clusterManagerCommandDef { char *name; clusterManagerCommandProc *proc; int arity; -}; + char *args; + char *options; +} clusterManagerCommandDef; +static int clusterManagerIsConfigConsistent(void); -static int clusterManagerCommandCreate(int argc, char **argv) { - printf("CLUSTER: create\n"); - printf("Arguments: %d\n", argc); - printf("Replicas: %d\n", config.cluster_manager_command.replicas); - fprintf(stderr, "Not implemented yet!\n"); - return 0; -} +/* Cluster Manager commands. */ -struct clusterManagerCommandDef clusterManagerCommands[] = { - {"create", clusterManagerCommandCreate, -2} -}; +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ static struct pref { @@ -165,6 +266,9 @@ char *redisGitSHA1(void); char *redisGitDirty(void); static int cliConnect(int force); +static char *getInfoField(char *info, char *field); +static long getLongInfoField(char *info, char *field); + /*------------------------------------------------------------------------------ * Utility functions *--------------------------------------------------------------------------- */ @@ -317,6 +421,36 @@ static void parseRedisUri(const char *uri) { config.dbnum = atoi(curr); } +static uint64_t dictSdsHash(const void *key) { + return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + int l1,l2; + DICT_NOTUSED(privdata); + + l1 = sdslen((sds)key1); + l2 = sdslen((sds)key2); + if (l1 != l2) return 0; + return memcmp(key1, key2, l1) == 0; +} + +static void dictSdsDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + sdsfree(val); +} + +/* _serverAssert is needed by dict */ +void _serverAssert(const char *estr, const char *file, int line) { + fprintf(stderr, "=== ASSERTION FAILED ==="); + fprintf(stderr, "==> %s:%d '%s' is not true",file,line,estr); + *((char*)-1) = 'x'; +} + /*------------------------------------------------------------------------------ * Help functions *--------------------------------------------------------------------------- */ @@ -1094,7 +1228,7 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. *--------------------------------------------------------------------------- */ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - struct clusterManagerCommand *cmd = &config.cluster_manager_command; + clusterManagerCommand *cmd = &config.cluster_manager_command; cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; @@ -1191,7 +1325,7 @@ static int parseOptions(int argc, char **argv) { int j = i; for (; j < argc; j++) if (argv[j][0] == '-') break; j--; - createClusterManagerCommand(cmd, j - i, argv + i); + createClusterManagerCommand(cmd, j - i, argv + i + 1); i = j; } else if (!strcmp(argv[i],"--cluster") && lastarg) { usage(); @@ -1300,6 +1434,7 @@ static void usage(void) { " --version Output version and exit.\n" "\n" "Cluster Manager Commands:\n" +" Use --cluster help to list all available cluster manager commands.\n" "\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" @@ -1628,14 +1763,22 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", + "cluster-replicas"}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"help", clusterManagerCommandHelp, 0, NULL, NULL} +}; + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / - sizeof(struct clusterManagerCommandDef); + sizeof(clusterManagerCommandDef); clusterManagerCommandProc *proc = NULL; char *cmdname = config.cluster_manager_command.name; int argc = config.cluster_manager_command.argc; for (i = 0; i < commands_count; i++) { - struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + clusterManagerCommandDef cmddef = clusterManagerCommands[i]; if (!strcmp(cmddef.name, cmdname)) { if ((cmddef.arity > 0 && argc != cmddef.arity) || (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { @@ -1650,15 +1793,1117 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +static void freeClusterManagerNode(clusterManagerNode *node) { + if (node->context != NULL) redisFree(node->context); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *fn = ln->value; + freeClusterManagerNode(fn); + } + listRelease(node->friends); + node->friends = NULL; + } + if (node->name != NULL) sdsfree(node->name); + if (node->replicate != NULL) sdsfree(node->replicate); + if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) + sdsfree(node->ip); + zfree(node); +} + +static void freeClusterManager(void) { + if (cluster_manager.nodes != NULL) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + freeClusterManagerNode(n); + } + listRelease(cluster_manager.nodes); + cluster_manager.nodes = NULL; + } +} + +static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { + clusterManagerNode *node = zmalloc(sizeof(*node)); + node->context = NULL; + node->name = NULL; + node->ip = ip; + node->port = port; + node->current_epoch = 0; + node->ping_sent = 0; + node->ping_recv = 0; + node->flags = 0; + node->replicate = NULL; + node->dirty = 0; + node->friends = NULL; + CLUSTER_MANAGER_RESET_SLOTS(node); + return node; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + freeReplyObject(info); + return 0; + } + int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); + freeReplyObject(info); + return is_cluster; +} + +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0, is_empty = 1; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + if (strstr(info->str, "db0:") != NULL) { + is_empty = 0; + goto result; + } + freeReplyObject(info); + info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + long known_nodes = getLongInfoField(info->str, "cluster_known_nodes"); + is_empty = (known_nodes == 1); +result: + freeReplyObject(info); + return is_empty; +} + +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len) +{ + assert(offending != NULL); + int score = 0, i, j; + int node_len = cluster_manager.nodes->len; + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + clusterManagerNode **offending_p = *offending; + dictType dtype = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ + }; + for (i = 0; i < ip_len; i++) { + clusterManagerNodeArray *node_array = &(ipnodes[i]); + dict *related = dictCreate(&dtype, NULL); + char *ip = NULL; + for (j = 0; j < node_array->len; j++) { + clusterManagerNode *node = node_array->nodes[j]; + if (node == NULL) continue; + if (!ip) ip = node->ip; + sds types; + if (!node->replicate) { + assert(node->name != NULL); + dictEntry *entry = dictFind(related, node->name); + if (entry) types = (sds) dictGetVal(entry); + else types = sdsempty(); + types = sdscatprintf(types, "m%s", types); + dictReplace(related, node->name, types); + } else { + dictEntry *entry = dictFind(related, node->replicate); + if (entry) types = (sds) dictGetVal(entry); + else { + types = sdsempty(); + dictAdd(related, node->replicate, types); + } + sdscat(types, "s"); + } + } + dictIterator *iter = dictGetIterator(related); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds types = (sds) dictGetVal(entry); + sds name = (sds) dictGetKey(entry); + int typeslen = sdslen(types); + if (typeslen < 2) continue; + if (types[0] == 'm') score += (10000 * (typeslen - 1)); + else score += (1 * typeslen); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate == NULL) continue; + if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { + *(offending_p++) = n; + break; + } + } + } + if (offending_len != NULL) *offending_len = offending_p - *offending; + dictReleaseIterator(iter); + dictRelease(related); + } + return score; +} + +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len) +{ + clusterManagerNode **offenders = NULL, **aux; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + if (score == 0) goto cleanup; + printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + int node_len = cluster_manager.nodes->len; + int maxiter = 500 * node_len; + srand(time(NULL)); + while (maxiter > 0) { + int offending_len = 0; + if (offenders != NULL) { + zfree(offenders); + offenders = NULL; + } + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + &offending_len); + if (score == 0) break; + int rand_idx = rand() % offending_len; + clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode **other_replicas = zcalloc((node_len - 1) * + sizeof(*other_replicas)); + int other_replicas_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n != first && n->replicate != NULL) + other_replicas[other_replicas_count++] = n; + } + if (other_replicas_count == 0) { + zfree(other_replicas); + break; + } + rand_idx = rand() % other_replicas_count; + second = other_replicas[rand_idx]; + char *first_master = first->replicate, + *second_master = second->replicate; + first->replicate = second_master, first->dirty = 1; + second->replicate = first_master, second->dirty = 1; + zfree(aux), aux = NULL; + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + &aux, NULL); + if (new_score > score) { + first->replicate = first_master; + second->replicate = second_master; + } + zfree(other_replicas); + maxiter--; + } + zfree(aux), aux = NULL; + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + char *msg; + if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + else if (score >= 10000) + msg = ("[WARNING] Some slaves are in the same host as their master"); + else + msg=("[WARNING] Some slaves of the same master are in the same host"); + printf("%s\n", msg); +cleanup: + zfree(offenders); + zfree(aux); +} + +static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { + sds slots = sdsempty(); + int first_range_idx = -1, last_slot_idx = -1, i; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int has_slot = node->slots[i]; + if (has_slot) { + if (first_range_idx == -1) { + if (sdslen(slots)) slots = sdscat(slots, ","); + first_range_idx = i; + slots = sdscatfmt(slots, "[%u", i); + } + last_slot_idx = i; + } else { + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) + slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + last_slot_idx = -1; + first_range_idx = -1; + } + } + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + return slots; +} + +static sds clusterManagerNodeInfo(clusterManagerNode *node) { + sds info = sdsempty(); + int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); + char *role = (is_master ? "M" : "S"); + sds slots = NULL; + if (node->dirty && node->replicate != NULL) + info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port); + else { + slots = clusterManagerNodeSlotsString(node); + info = sdscatfmt(info, "%s: %S %s:%u\n" + " slots:%S (%u slots) " + "", //TODO: flags string + role, node->name, node->ip, node->port, + slots, node->slots_count); + sdsfree(slots); + } + if (node->replicate != NULL) + info = sdscatfmt(info, "\n replicates %S", node->replicate); + //else if () {} //TODO: add replicas info + return info; +} + +static void clusterManagerShowNodes(void) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds info = clusterManagerNodeInfo(node); + printf("%s\n", info); + sdsfree(info); + } +} + +static void clusterManagerShowInfo(void) { + int masters = 0; + int keys = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) { + if (!node->name) continue; + int replicas = 0; + int dbsize = -1; + char name[9]; + memcpy(name, node->name, 8); + name[8] = '\0'; + listIter ri; + listNode *rn; + listRewind(cluster_manager.nodes, &ri); + while ((rn = listNext(&ri)) != NULL) { + clusterManagerNode *n = rn->value; + if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_SLAVE)) + continue; + if (n->replicate && !strcmp(n->replicate, node->name)) + replicas++; + } + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "DBSIZE"); + if (reply != NULL || reply->type == REDIS_REPLY_INTEGER) + dbsize = reply->integer; + if (dbsize < 0) { + char *err = ""; + if (reply != NULL && reply->type == REDIS_REPLY_ERROR) + err = reply->str; + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + if (reply != NULL) freeReplyObject(reply); + return; + }; + if (reply != NULL) freeReplyObject(reply); + printf("%s:%d (%s...) -> %d keys | %d slots | %d slaves.\n", + node->ip, node->port, name, dbsize, + node->slots_count, replicas); + masters++; + keys += dbsize; + } + } + printf("[OK] %d keys in %d masters.\n", keys, masters); + float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; + printf("%.2f keys per slot on average.\n", keys_per_slot); +} + +static int clusterManagerAddSlots(clusterManagerNode *node, char**err) +{ + redisReply *reply = NULL; + void *_reply = NULL; + int is_err = 0; + int argc; + sds *argv = NULL; + size_t *argvlen = NULL; + *err = NULL; + sds cmd = sdsnew("CLUSTER ADDSLOTS "); + int i, added = 0; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (node->slots[i]) { + char *fmt = (!last_slot ? "%u " : "%u"); + cmd = sdscatfmt(cmd, fmt, i); + added++; + } + } + if (!added) goto node_cmd_err; + argv = cliSplitArgs(cmd, &argc); + if (argc == 0 || argv == NULL) goto node_cmd_err; + argvlen = zmalloc(argc*sizeof(size_t)); + for (i = 0; i < argc; i++) + argvlen[i] = sdslen(argv[i]); + redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); + if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + reply = (redisReply*) _reply; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + sdsfree(cmd); + zfree(argvlen); + sdsfreesplitres(argv,argc); + freeReplyObject(reply); + return 1; +node_cmd_err: + sdsfree(cmd); + zfree(argvlen); + if (argv != NULL) sdsfreesplitres(argv,argc); + if (reply != NULL) freeReplyObject(reply); + return 0; +} + +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { + if (!node->dirty) return 0; + redisReply *reply = NULL; + int is_err = 0; + *err = NULL; + if (node->replicate != NULL) { + reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", + node->replicate); + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + } else { + int added = clusterManagerAddSlots(node, err); + if (!added || *err != NULL) goto node_cmd_err; + } + node->dirty = 0; + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static void clusterManagerWaitForClusterJoin(void) { + printf("Waiting for the cluster to join\n"); + while(!clusterManagerIsConfigConsistent()) { + printf("."); + fflush(stdout); + sleep(1); + } + printf("\n"); +} + +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err) +{ + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + int is_err = 0; + *err = NULL; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); + char *lines = reply->str, *p, *line; + while ((p = strstr(lines, "\n")) != NULL) { + *p = '\0'; + line = lines; + lines = p + 1; + char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, + *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, + *link_status = NULL; + int i = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + switch(i++){ + case 0: name = token; break; + case 1: addr = token; break; + case 2: flags = token; break; + case 3: master_id = token; break; + case 4: ping_sent = token; break; + case 5: ping_recv = token; break; + case 6: config_epoch = token; break; + case 7: link_status = token; break; + } + if (i == 8) break; // Slots + } + if (!flags) goto node_cmd_err; + int myself = (strstr(flags, "myself") != NULL); + if (strstr(flags, "noaddr") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_FAIL; + clusterManagerNode *currentNode = NULL; + if (myself) { + node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; + currentNode = node; + CLUSTER_MANAGER_RESET_SLOTS(node); + if (i == 8) { + int remaining = strlen(line); + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + remaining -= (p - line); + + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] == '[') { + //TODO: migrating/importing + } else if ((p = strchr(slotsdef, '-')) != NULL) { + int start, stop; + *p = '\0'; + start = atoi(slotsdef); + stop = atoi(p + 1); + node->slots_count += (stop - (start - 1)); + while (start <= stop) node->slots[start++] = 1; + } else if (p > slotsdef) { + node->slots[atoi(slotsdef)] = 1; + node->slots_count++; + } + } + } + node->dirty = 0; + } else if (!getfriends) { + if (!(node->flags & CLUSTER_MANAGER_FLAG_MYSELF)) continue; + else break; + } else { + if (addr == NULL) { + // TODO: find a better err message + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + *c = '\0'; + int port = atoi(++c); + currentNode = clusterManagerNewNode(sdsnew(addr), port); + currentNode->flags |= CLUSTER_MANAGER_FLAG_FRIEND; + if (node->friends == NULL) node->friends = listCreate(); + listAddNodeTail(node->friends, currentNode); + } + if (name != NULL) currentNode->name = sdsnew(name); + if (strstr(flags, "slave") != NULL) { + currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; + if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + } + if (config_epoch != NULL) + currentNode->current_epoch = atoll(config_epoch); + if (ping_sent != NULL) currentNode->ping_sent = atoll(ping_sent); + if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); + if (!getfriends && myself) break; + } + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { + if (node->context == NULL) + node->context = redisConnect(node->ip, node->port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, + node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; + char *e = NULL; + if (!clusterManagerNodeIsCluster(node, &e)) { + char *msg = (e ? e : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + if (e) zfree(e); + freeClusterManagerNode(node); + return 0; + } + e = NULL; + if (!clusterManagerNodeLoadInfo(node, opts, &e)) { + if (e) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, e); + zfree(e); + } + freeClusterManagerNode(node); + return 0; + } + cluster_manager.nodes = listCreate(); + listAddNodeTail(cluster_manager.nodes, node); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *friend = ln->value; + if (!friend->ip || !friend->port) continue; + if (!friend->context) + friend->context = redisConnect(friend->ip, friend->port); + if (friend->context->err) continue; + e = NULL; + if (clusterManagerNodeLoadInfo(friend, 0, &e)) { + if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | + CLUSTER_MANAGER_FLAG_DISCONNECT | + CLUSTER_MANAGER_FLAG_FAIL)) continue; + listAddNodeTail(cluster_manager.nodes, friend); + + } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + } + listRelease(node->friends); + node->friends = NULL; + } + return 1; +} + +int clusterManagerSlotCompare(const void *slot1, const void *slot2) { + const char **i1 = (const char **)slot1; + const char **i2 = (const char **)slot2; + return strcmp(*i1, *i2); +} + +static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { + sds signature = NULL; + int node_count = 0, i = 0, name_len = 0; + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + if (reply == NULL || reply->type == REDIS_REPLY_ERROR) + goto cleanup; + char *lines = reply->str, *p, *line; + char **node_configs = NULL; + while ((p = strstr(lines, "\n")) != NULL) { + i = 0; + *p = '\0'; + line = lines; + lines = p + 1; + char *nodename = NULL; + int tot_size = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + if (i == 0) { + nodename = token; + tot_size = p - token; + name_len = tot_size; + } else if (i == 8) break; + i++; + } + if (i != 8) continue; + if (nodename == NULL) continue; + int remaining = strlen(line); + if (remaining == 0) continue; + char **slots = NULL; + int c = 0; + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + int size = (p - line); + remaining -= size; + tot_size += size; + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] != '[') { + c++; + slots = zrealloc(slots, (c * sizeof(char *))); + slots[c - 1] = slotsdef; + } + } + if (c > 0) { + if (c > 1) + qsort(slots, c, sizeof(char *), clusterManagerSlotCompare); + node_count++; + node_configs = + zrealloc(node_configs, (node_count * sizeof(char *))); + tot_size += (sizeof(char) * (c - 1)); + char *cfg = zmalloc((sizeof(char) * tot_size) + 1); + memcpy(cfg, nodename, name_len); + char *sp = cfg + name_len; + *(sp++) = ':'; + for (i = 0; i < c; i++) { + if (i > 0) *(sp++) = '|'; + int slen = strlen(slots[i]); + memcpy(sp, slots[i], slen); + sp += slen; + } + *(sp++) = '\0'; + node_configs[node_count - 1] = cfg; + } + zfree(slots); + } + if (node_count > 0) { + if (node_count > 1) { + qsort(node_configs, node_count, sizeof(char *), + clusterManagerSlotCompare); + } + signature = sdsempty(); + for (i = 0; i < node_count; i++) { + if (i > 0) signature = sdscatprintf(signature, "%c", '|'); + signature = sdscatfmt(signature, "%s", node_configs[i]); + } + } +cleanup: + if (reply != NULL) freeReplyObject(reply); + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + return signature; +} + +static int clusterManagerIsConfigConsistent(void) { + if (cluster_manager.nodes == NULL) return 0; + int consistent = 0; + sds first_cfg = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds cfg = clusterManagerGetConfigSignature(node); + if (cfg == NULL) { + consistent = 0; + break; + } + if (first_cfg == NULL) first_cfg = cfg; + else { + consistent = !sdscmp(first_cfg, cfg); + sdsfree(cfg); + if (!consistent) break; + } + } + if (first_cfg != NULL) sdsfree(first_cfg); + return consistent; +} + +static void clusterManagerCheckCluster(int quiet) { + listNode *ln = listFirst(cluster_manager.nodes); + if (!ln) return; + clusterManagerNode *node = ln->value; + printf(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); + if (!quiet) clusterManagerShowNodes(); + if (!clusterManagerIsConfigConsistent()) + printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array + else + printf("[OK] All nodes agree about slots configuration.\n"); + //TODO:check_open_slots + //TODO:check_slots_coverage +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; - if (!proc(argc, argv)) { - sdsfree(config.hostip); - sdsfree(config.mb_delim); - exit(1); - } + cluster_manager.nodes = NULL; + if (!proc(argc, argv)) goto cluster_manager_err; + freeClusterManager(); exit(0); +cluster_manager_err: + freeClusterManager(); + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); +} + +/* Cluster Manager Commands */ + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("Cluster Manager: Creating Cluster\n"); + int i, j; + cluster_manager.nodes = listCreate(); + for (i = 0; i < argc; i++) { + char *addr = argv[i]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Invalid address format: %s\n", addr); + return 0; + } + *c = '\0'; + char *ip = addr; + int port = atoi(++c); + clusterManagerNode *node = clusterManagerNewNode(ip, port); + node->context = redisConnect(ip, port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + char *err = NULL; + if (!clusterManagerNodeIsCluster(node, &err)) { + char *msg = (err ? err : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeLoadInfo(node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeIsEmpty(node, &err)) { + char *msg; + if (err) msg = err; + else { + msg = " is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + listAddNodeTail(cluster_manager.nodes, node); + } + int node_len = cluster_manager.nodes->len; + int replicas = config.cluster_manager_command.replicas; + int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); + if (masters_count < 3) { + fprintf(stderr, + "*** ERROR: Invalid configuration for cluster creation.\n"); + fprintf(stderr, + "*** Redis Cluster requires at least 3 master nodes.\n"); + fprintf(stderr, + "*** This is not possible with %d nodes and %d replicas per node.", + node_len, replicas); + fprintf(stderr, "\n*** At least %d nodes are required.\n", + (3 * (replicas + 1))); + return 0; + } + printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + int interleaved_len = 0, ips_len = 0; + clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); + char **ips = zcalloc(node_len * sizeof(char*)); + clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + int found = 0; + for (i = 0; i < ips_len; i++) { + char *ip = ips[i]; + if (!strcmp(ip, n->ip)) { + found = 1; + break; + } + } + if (!found) { + ips[ips_len++] = n->ip; + } + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->nodes == NULL) + CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); + CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + } + while (interleaved_len < node_len) { + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->count > 0) { + clusterManagerNode *n; + CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + interleaved[interleaved_len++] = n; + } + } + } + clusterManagerNode **masters = interleaved; + interleaved += masters_count; + interleaved_len -= masters_count; + float slots_per_node = CLUSTER_MANAGER_SLOTS / (float) masters_count; + long first = 0; + float cursor = 0.0f; + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + long last = lround(cursor + slots_per_node - 1); + if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1)) + last = CLUSTER_MANAGER_SLOTS - 1; + if (last < first) last = first; + printf("Master[%d] -> Slots %lu - %lu\n", i, first, last); + master->slots_count = 0; + for (j = first; j <= last; j++) { + master->slots[j] = 1; + master->slots_count++; + } + master->dirty = 1; + first = last + 1; + cursor += slots_per_node; + } + + int assign_unused = 0, available_count = interleaved_len; +assign_replicas: + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + int assigned_replicas = 0; + while (assigned_replicas < replicas) { + if (available_count == 0) break; + clusterManagerNode *found = NULL, *slave = NULL; + int firstNodeIdx = -1; + for (j = 0; j < interleaved_len; j++) { + clusterManagerNode *n = interleaved[j]; + if (n == NULL) continue; + if (strcmp(n->ip, master->ip)) { + found = n; + interleaved[j] = NULL; + break; + } + if (firstNodeIdx < 0) firstNodeIdx = j; + } + if (found) slave = found; + else if (firstNodeIdx >= 0) { + slave = interleaved[firstNodeIdx]; + interleaved_len -= (interleaved - (interleaved + firstNodeIdx)); + interleaved += (firstNodeIdx + 1); + } + if (slave != NULL) { + assigned_replicas++; + available_count--; + slave->replicate = sdsnew(master->name); + slave->dirty = 1; + } else break; + printf("Adding replica %s:%d to %s:%d\n", slave->ip, slave->port, + master->ip, master->port); + if (assign_unused) break; + } + } + if (!assign_unused && available_count > 0) { + assign_unused = 1; + printf("Adding extra replicas...\n"); + goto assign_replicas; + } + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + } + clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerShowNodes(); + printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread != 0 && !strcmp("yes", buf)) { + printf("\nFlushing configuration!\n"); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && node->dirty && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + printf(">>> Nodes configuration updated\n"); + printf(">>> Assign a different config epoch to each node\n"); + int config_epoch = 1; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, + "cluster set-config-epoch %d", + config_epoch++); + if (reply != NULL) freeReplyObject(reply); + } + printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerNode *first = NULL; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (first == NULL) { + first = node; + continue; + } + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", + first->ip, first->port); + if (reply != NULL) freeReplyObject(reply); + } + // Give one second for the join to start, in order to avoid that + // waiting for cluster join will find all the nodes agree about + // the config as they are still empty with unassigned slots. + sleep(1); + clusterManagerWaitForClusterJoin(); + // Useful for the replicas //TODO: create a function for this? + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!node->dirty) continue; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + // Reset Nodes + listRewind(cluster_manager.nodes, &li); + clusterManagerNode *first_node = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!first_node) first_node = node; + else freeClusterManagerNode(node); + } + listEmpty(cluster_manager.nodes); + if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + clusterManagerCheckCluster(0); + } + /* Free everything */ + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 1; +cmd_err: + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 0; +} + +static int clusterManagerCommandInfo(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandCheck(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + clusterManagerCheckCluster(0); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandHelp(int argc, char **argv) { + UNUSED(argc); + UNUSED(argv); + int commands_count = sizeof(clusterManagerCommands) / + sizeof(clusterManagerCommandDef); + int i = 0, j; + fprintf(stderr, "Cluster Manager Commands:\n"); + for (; i < commands_count; i++) { + clusterManagerCommandDef *def = &(clusterManagerCommands[i]); + int namelen = strlen(def->name), padlen = 15 - namelen; + fprintf(stderr, " %s", def->name); + for (j = 0; j < padlen; j++) fprintf(stderr, " "); + fprintf(stderr, "%s\n", (def->args ? def->args : "")); + //TODO: if (def->options) + } + return 0; } /*------------------------------------------------------------------------------ From d74107c05d0bc45947dc4998b6eff5fdfa3219b5 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 17:57:16 +0100 Subject: [PATCH 33/66] Added check for open slots (clusterManagerCheckCluster) --- src/redis-cli.c | 162 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 143 insertions(+), 19 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9943d5753..b20cd31d1 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,6 +74,13 @@ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) +#define CLUSTER_MANAGER_ERROR(err) do { \ + if (cluster_manager.errors == NULL) \ + cluster_manager.errors = listCreate(); \ + listAddNodeTail(cluster_manager.errors, err); \ + fprintf(stderr, "%s\n", (char *) err); \ +} while(0) + #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ memset(n->slots, 0, sizeof(n->slots)); \ n->slots_count = 0; \ @@ -137,7 +144,14 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; -/* Cluster Manager command info */ +/* Dict Helpers */ + +static uint64_t dictSdsHash(const void *key); +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2); +static void dictSdsDestructor(void *privdata, void *val); + +/* Cluster Manager Command Info */ typedef struct clusterManagerCommand { char *name; int argc; @@ -196,6 +210,7 @@ static struct config { static struct clusterManager { list *nodes; + list *errors; } cluster_manager; typedef struct clusterManagerNode { @@ -212,6 +227,10 @@ typedef struct clusterManagerNode { uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; } clusterManagerNode; typedef struct clusterManagerNodeArray { @@ -221,6 +240,15 @@ typedef struct clusterManagerNodeArray { int count; } clusterManagerNodeArray; +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1810,13 +1838,22 @@ static void freeClusterManagerNode(clusterManagerNode *node) { if (node->replicate != NULL) sdsfree(node->replicate); if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) sdsfree(node->ip); + int i; + if (node->migrating != NULL) { + for (i = 0; i < node->migrating_count; i++) sdsfree(node->migrating[i]); + zfree(node->migrating); + } + if (node->importing != NULL) { + for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]); + zfree(node->importing); + } zfree(node); } static void freeClusterManager(void) { + listIter li; + listNode *ln; if (cluster_manager.nodes != NULL) { - listIter li; - listNode *ln; listRewind(cluster_manager.nodes,&li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; @@ -1825,9 +1862,18 @@ static void freeClusterManager(void) { listRelease(cluster_manager.nodes); cluster_manager.nodes = NULL; } + if (cluster_manager.errors != NULL) { + listRewind(cluster_manager.errors,&li); + while ((ln = listNext(&li)) != NULL) { + sds err = ln->value; + sdsfree(err); + } + listRelease(cluster_manager.errors); + cluster_manager.errors = NULL; + } } -static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { +static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNode *node = zmalloc(sizeof(*node)); node->context = NULL; node->name = NULL; @@ -1840,6 +1886,10 @@ static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { node->replicate = NULL; node->dirty = 0; node->friends = NULL; + node->migrating = NULL; + node->importing = NULL; + node->migrating_count = 0; + node->importing_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } @@ -1902,17 +1952,9 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int node_len = cluster_manager.nodes->len; *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); clusterManagerNode **offending_p = *offending; - dictType dtype = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ - }; for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); - dict *related = dictCreate(&dtype, NULL); + dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; for (j = 0; j < node_array->len; j++) { clusterManagerNode *node = node_array->nodes[j]; @@ -2291,7 +2333,32 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (remaining) line = p + 1; else line = p; if (slotsdef[0] == '[') { - //TODO: migrating/importing + slotsdef++; + if ((p = strstr(slotsdef, "->-"))) { // Migrating + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds dst = sdsnew(p); + node->migrating_count += 2; + node->migrating = zrealloc(node->migrating, + (node->migrating_count * sizeof(sds))); + node->migrating[node->migrating_count - 2] = + slot; + node->migrating[node->migrating_count - 1] = + dst; + } else if ((p = strstr(slotsdef, "-<-"))) {//Importing + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds src = sdsnew(p); + node->importing_count += 2; + node->importing = zrealloc(node->importing, + (node->importing_count * sizeof(sds))); + node->importing[node->importing_count - 2] = + slot; + node->importing[node->importing_count - 1] = + src; + } } else if ((p = strchr(slotsdef, '-')) != NULL) { int start, stop; *p = '\0'; @@ -2529,11 +2596,68 @@ static void clusterManagerCheckCluster(int quiet) { printf(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); if (!quiet) clusterManagerShowNodes(); - if (!clusterManagerIsConfigConsistent()) - printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array - else - printf("[OK] All nodes agree about slots configuration.\n"); - //TODO:check_open_slots + if (!clusterManagerIsConfigConsistent()) { + sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); + CLUSTER_MANAGER_ERROR(err); + } else printf("[OK] All nodes agree about slots configuration.\n"); + // Check open slots + listIter li; + listRewind(cluster_manager.nodes, &li); + int i; + dict *open_slots = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->migrating != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "migrating state ", + n->ip, + n->port); + for (i = 0; i < n->migrating_count; i += 2) { + sds slot = n->migrating[i]; + dictAdd(open_slots, slot, n->migrating[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + if (n->importing != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "importing state ", + n->ip, + n->port); + for (i = 0; i < n->importing_count; i += 2) { + sds slot = n->importing[i]; + dictAdd(open_slots, slot, n->importing[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + } + if (open_slots != NULL) { + dictIterator *iter = dictGetIterator(open_slots); + dictEntry *entry; + sds errstr = sdsnew("[WARNING] The following slots are open: "); + i = 0; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + char *fmt = (i++ > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + fprintf(stderr, "%s.\n", (char *) errstr); + sdsfree(errstr); + dictRelease(open_slots); + } //TODO:check_slots_coverage } From 4ccca8e7a8b1edfb43da428538a7791c1c3c290e Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 19:25:02 +0100 Subject: [PATCH 34/66] - Cluster Manager: fixed various memory leaks - Cluster Manager: fixed flags assignment in clusterManagerNodeLoadInfo --- src/redis-cli.c | 54 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b20cd31d1..a596afca2 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2310,12 +2310,6 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (!flags) goto node_cmd_err; int myself = (strstr(flags, "myself") != NULL); - if (strstr(flags, "noaddr") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; - if (strstr(flags, "disconnected") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; - if (strstr(flags, "fail") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_FAIL; clusterManagerNode *currentNode = NULL; if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; @@ -2396,10 +2390,22 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (node->friends == NULL) node->friends = listCreate(); listAddNodeTail(node->friends, currentNode); } - if (name != NULL) currentNode->name = sdsnew(name); + if (name != NULL) { + if (currentNode->name) sdsfree(currentNode->name); + currentNode->name = sdsnew(name); + } + if (strstr(flags, "noaddr") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; if (strstr(flags, "slave") != NULL) { currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; - if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + if (master_id != NULL) { + if (currentNode->replicate) sdsfree(currentNode->replicate); + currentNode->replicate = sdsnew(master_id); + } } if (config_epoch != NULL) currentNode->current_epoch = atoll(config_epoch); @@ -2442,27 +2448,39 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { freeClusterManagerNode(node); return 0; } + listIter li; + listNode *ln; + if (cluster_manager.nodes != NULL) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) + freeClusterManagerNode((clusterManagerNode *) ln->value); + listRelease(cluster_manager.nodes); + } cluster_manager.nodes = listCreate(); listAddNodeTail(cluster_manager.nodes, node); if (node->friends != NULL) { - listIter li; - listNode *ln; listRewind(node->friends, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *friend = ln->value; - if (!friend->ip || !friend->port) continue; + if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) friend->context = redisConnect(friend->ip, friend->port); - if (friend->context->err) continue; + if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | CLUSTER_MANAGER_FLAG_DISCONNECT | - CLUSTER_MANAGER_FLAG_FAIL)) continue; + CLUSTER_MANAGER_FLAG_FAIL)) + goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); - - } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + } else { + fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + goto invalid_friend; + } + continue; +invalid_friend: + freeClusterManagerNode(friend); } listRelease(node->friends); node->friends = NULL; @@ -2601,6 +2619,7 @@ static void clusterManagerCheckCluster(int quiet) { CLUSTER_MANAGER_ERROR(err); } else printf("[OK] All nodes agree about slots configuration.\n"); // Check open slots + printf(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2836,6 +2855,7 @@ assign_replicas: if (slave != NULL) { assigned_replicas++; available_count--; + if (slave->replicate) sdsfree(slave->replicate); slave->replicate = sdsnew(master->name); slave->dirty = 1; } else break; @@ -2873,7 +2893,7 @@ assign_replicas: zfree(err); } goto cmd_err; - } + } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); printf(">>> Assign a different config epoch to each node\n"); From fda6c55176cc9c37dfa557ad9abf93c984bc78a5 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 17:43:36 +0100 Subject: [PATCH 35/66] Cluster Manager: slots coverage check. --- src/redis-cli.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index a596afca2..0dede2d9c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2607,6 +2607,24 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static int clusterManagerGetCoveredSlots(char *all_slots) { + if (cluster_manager.nodes == NULL) return 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + int totslots = 0, i; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + if (node->slots[i] && !all_slots[i]) { + all_slots[i] = 1; + totslots++; + } + } + } + return totslots; +} + static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; @@ -2677,7 +2695,19 @@ static void clusterManagerCheckCluster(int quiet) { sdsfree(errstr); dictRelease(open_slots); } - //TODO:check_slots_coverage + printf(">>> Check slots coverage...\n"); + char slots[CLUSTER_MANAGER_SLOTS]; + memset(slots, 0, CLUSTER_MANAGER_SLOTS); + int coverage = clusterManagerGetCoveredSlots(slots); + if (coverage == CLUSTER_MANAGER_SLOTS) + printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); + else { + sds err = sdsempty(); + err = sdscatprintf(err, "[ERR] Not all %d slots are " + "covered by nodes.\n", + CLUSTER_MANAGER_SLOTS); + CLUSTER_MANAGER_ERROR(err); + } } static void clusterManagerMode(clusterManagerCommandProc *proc) { From 377717d6e18bf97202a8525f3fda87f89555c876 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 20:09:30 +0100 Subject: [PATCH 36/66] Cluster Manager: reply error catch for MEET command --- src/redis-cli.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 0dede2d9c..83638616a 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2949,7 +2949,16 @@ assign_replicas: redisReply *reply = NULL; reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", first->ip, first->port); - if (reply != NULL) freeReplyObject(reply); + int is_err = 0; + if (reply != NULL) { + if ((is_err = reply->type == REDIS_REPLY_ERROR)) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, reply->str); + freeReplyObject(reply); + } else { + is_err = 1; + fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); + } + if (is_err) goto cmd_err; } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about From 653799a4d822efa140a50765687732254c032c93 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 11:29:25 +0100 Subject: [PATCH 37/66] Cluster Manager: cluster is considered consistent if only one node has been found --- src/redis-cli.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 83638616a..19c8fcddb 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2584,7 +2584,10 @@ cleanup: static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; - int consistent = 0; + int consistent = (listLength(cluster_manager.nodes) <= 1); + // If the Cluster has only one node, it's always consistent + // Does it make sense? + if (consistent) return 1; sds first_cfg = NULL; listIter li; listNode *ln; From a376881fc2a4cb2057cf90caf00929b4cbd84972 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 12:02:56 +0100 Subject: [PATCH 38/66] ClusterManager: added replicas count to clusterManagerNode --- src/redis-cli.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 19c8fcddb..791b0dd87 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -223,9 +223,11 @@ typedef struct clusterManagerNode { time_t ping_recv; int flags; sds replicate; + list replicas; int dirty; uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; + int replicas_count; list *friends; sds *migrating; sds *importing; @@ -250,6 +252,7 @@ static dictType clusterManagerDictType = { }; static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); @@ -265,6 +268,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); + typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { char *name; @@ -1890,10 +1894,31 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->importing = NULL; node->migrating_count = 0; node->importing_count = 0; + node->replicas_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } +static clusterManagerNode *clusterManagerNodeByName(const char *name) { + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && !sdscmp(n->name, lcname)) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); int is_err = 0; @@ -2119,7 +2144,9 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { } if (node->replicate != NULL) info = sdscatfmt(info, "\n replicates %S", node->replicate); - //else if () {} //TODO: add replicas info + else if (node->replicas_count) + info = sdscatfmt(info, "\n %U additional replica(s)", + node->replicas_count); return info; } @@ -2485,6 +2512,18 @@ invalid_friend: listRelease(node->friends); node->friends = NULL; } + // Count replicas for each node + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate != NULL) { + clusterManagerNode *master = clusterManagerNodeByName(n->replicate); + if (master == NULL) { + printf("*** WARNING: %s:%d claims to be slave of unknown " + "node ID %s.\n", n->ip, n->port, n->replicate); + } else master->replicas_count++; + } + } return 1; } From a0e1884b543918b304d60e9fa480a9364c22d6d2 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 9 Feb 2018 13:02:37 +0100 Subject: [PATCH 39/66] Cluster Manager: CLUSTER_MANAGER_NODE_CONNECT macro --- src/redis-cli.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 791b0dd87..4ce3a12dc 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -70,6 +70,8 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_NODE_CONNECT(n) \ + (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) @@ -2449,7 +2451,7 @@ node_cmd_err: static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) - node->context = redisConnect(node->ip, node->port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, @@ -2491,7 +2493,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { clusterManagerNode *friend = ln->value; if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) - friend->context = redisConnect(friend->ip, friend->port); + CLUSTER_MANAGER_NODE_CONNECT(friend); if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { @@ -2785,7 +2787,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *ip = addr; int port = atoi(++c); clusterManagerNode *node = clusterManagerNewNode(ip, port); - node->context = redisConnect(ip, port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); From fa986e8cad72c3b9561b8cb9788d09dedc93cd01 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 13 Feb 2018 12:00:06 +0100 Subject: [PATCH 40/66] Cluster Manager: 'call' command. --- src/redis-cli.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4ce3a12dc..00b5e90a0 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -286,6 +286,7 @@ static int clusterManagerIsConfigConsistent(void); static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ @@ -1802,6 +1803,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { "cluster-replicas"}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"call", clusterManagerCommandCall, -2, + "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -2449,6 +2452,11 @@ node_cmd_err: return 0; } +/* Retrieves info about the cluster using argument 'node' as the starting + * point. All nodes will be loaded inside the cluster_manager.nodes list. + * Warning: if something goes wrong, it will free the starting node before + * returning 0. */ + static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -3115,6 +3123,56 @@ invalid_args: return 0; } +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0; + char *ip = NULL; + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + int i; + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else { + fprintf(stderr, + "Invalid arguments: first agrumnt must be host:port.\n"); + return 0; + } + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + argc--; + argv++; + size_t *argvlen = zmalloc(argc*sizeof(size_t)); + printf(">>> Calling"); + for (i = 0; i < argc; i++) { + argvlen[i] = strlen(argv[i]); + printf(" %s", argv[i]); + } + printf("\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (!n->context) CLUSTER_MANAGER_NODE_CONNECT(n); + redisReply *reply = NULL; + redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen); + int status = redisGetReply(n->context, (void **)(&reply)); + if (status != REDIS_OK || reply == NULL ) + printf("%s:%d: Failed!\n", n->ip, n->port); //TODO: better message? + else { + sds formatted_reply = cliFormatReplyTTY(reply, ""); + printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply); + sdsfree(formatted_reply); + } + if (reply != NULL) freeReplyObject(reply); + } + zfree(argvlen); + return 1; +} + static int clusterManagerCommandHelp(int argc, char **argv) { UNUSED(argc); UNUSED(argv); From 3161a5bed0d966ab82a8147c0909d5639d307e36 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 17:54:46 +0100 Subject: [PATCH 41/66] Cluster Manager: improved cleanup/error handling in various functions --- src/redis-cli.c | 101 +++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 00b5e90a0..63a4f69bd 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2220,7 +2220,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; int argc; sds *argv = NULL; size_t *argvlen = NULL; @@ -2235,39 +2235,44 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) added++; } } - if (!added) goto node_cmd_err; + if (!added) { + success = 0; + goto cleanup; + } argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) goto node_cmd_err; + if (argc == 0 || argv == NULL) { + success = 0; + goto cleanup; + } argvlen = zmalloc(argc*sizeof(size_t)); for (i = 0; i < argc; i++) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); - if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + if (redisGetReply(node->context, &_reply) != REDIS_OK) { + success = 1; + goto cleanup; + } reply = (redisReply*) _reply; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } - sdsfree(cmd); - zfree(argvlen); - sdsfreesplitres(argv,argc); - freeReplyObject(reply); - return 1; -node_cmd_err: +cleanup: sdsfree(cmd); zfree(argvlen); if (argv != NULL) sdsfreesplitres(argv,argc); if (reply != NULL) freeReplyObject(reply); - return 0; + return success; } static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", @@ -2277,18 +2282,20 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) goto node_cmd_err; + if (!added || *err != NULL) { + success = 0; + goto cleanup; + } } node->dirty = 0; - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply != NULL) freeReplyObject(reply); + return success; } static void clusterManagerWaitForClusterJoin(void) { @@ -2305,14 +2312,15 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); char *lines = reply->str, *p, *line; @@ -2340,7 +2348,10 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (i == 8) break; // Slots } - if (!flags) goto node_cmd_err; + if (!flags) { + success = 0; + goto cleanup; + } int myself = (strstr(flags, "myself") != NULL); clusterManagerNode *currentNode = NULL; if (myself) { @@ -2406,14 +2417,16 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (addr == NULL) { // TODO: find a better err message fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } char *c = strrchr(addr, '@'); if (c != NULL) *c = '\0'; c = strrchr(addr, ':'); if (c == NULL) { fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } *c = '\0'; int port = atoi(++c); @@ -2445,11 +2458,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); if (!getfriends && myself) break; } - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply) freeReplyObject(reply); + return success; } /* Retrieves info about the cluster using argument 'node' as the starting @@ -2780,7 +2791,7 @@ cluster_manager_err: static int clusterManagerCommandCreate(int argc, char **argv) { printf("Cluster Manager: Creating Cluster\n"); - int i, j; + int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { char *addr = argv[i]; @@ -2974,7 +2985,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); @@ -3010,7 +3022,10 @@ assign_replicas: is_err = 1; fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); } - if (is_err) goto cmd_err; + if (is_err) { + success = 0; + goto cleanup; + } } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about @@ -3029,7 +3044,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } } // Reset Nodes @@ -3041,9 +3057,13 @@ assign_replicas: else freeClusterManagerNode(node); } listEmpty(cluster_manager.nodes); - if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + if (!clusterManagerLoadInfoFromNode(first_node, 0)) { + success = 0; + goto cleanup; //TODO: msg? + } clusterManagerCheckCluster(0); } +cleanup: /* Free everything */ zfree(masters); zfree(ips); @@ -3052,16 +3072,7 @@ assign_replicas: CLUSTER_MANAGER_NODEARRAY_FREE(node_array); } zfree(ip_nodes); - return 1; -cmd_err: - zfree(masters); - zfree(ips); - for (i = 0; i < node_len; i++) { - clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); - } - zfree(ip_nodes); - return 0; + return success; } static int clusterManagerCommandInfo(int argc, char **argv) { From b068d0fb13e3a4d488c23e72e9cd3f66d7aeb885 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 19:29:28 +0100 Subject: [PATCH 42/66] Cluster Manager: colorized output --- src/redis-cli.c | 130 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 63a4f69bd..09ad54979 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -67,6 +67,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" + #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) @@ -80,7 +81,7 @@ if (cluster_manager.errors == NULL) \ cluster_manager.errors = listCreate(); \ listAddNodeTail(cluster_manager.errors, err); \ - fprintf(stderr, "%s\n", (char *) err); \ + clusterManagerLogErr("%s\n", (char *) err); \ } while(0) #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ @@ -124,7 +125,20 @@ } while(0) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ - fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ + n->ip, n->port, err); + +#define clusterManagerLogInfo(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_INFO,__VA_ARGS__) + +#define clusterManagerLogErr(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_ERR,__VA_ARGS__) + +#define clusterManagerLogWarn(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_WARN,__VA_ARGS__) + +#define clusterManagerLogOk(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_SUCCESS,__VA_ARGS__) #define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 #define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 @@ -133,7 +147,22 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 + +#define CLUSTER_MANAGER_LOG_LVL_INFO 1 +#define CLUSTER_MANAGER_LOG_LVL_WARN 2 +#define CLUSTER_MANAGER_LOG_LVL_ERR 3 +#define CLUSTER_MANAGER_LOG_LVL_SUCCESS 4 + +#define LOG_COLOR_BOLD "29;1m" +#define LOG_COLOR_RED "31;1m" +#define LOG_COLOR_GREEN "32;1m" +#define LOG_COLOR_YELLOW "33;1m" +#define LOG_COLOR_RESET "0m" /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -270,6 +299,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { @@ -1267,6 +1297,7 @@ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; } static int parseOptions(int argc, char **argv) { @@ -2042,7 +2073,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerNode **offenders = NULL, **aux; int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); if (score == 0) goto cleanup; - printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + clusterManagerLogInfo(">>> Trying to optimize slaves allocation " + "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; int maxiter = 500 * node_len; srand(time(NULL)); @@ -2091,12 +2123,15 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(aux), aux = NULL; score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); char *msg; - if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + int perfect = (score == 0); + int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : + CLUSTER_MANAGER_LOG_LVL_WARN); + if (perfect) msg = "[OK] Perfect anti-affinity obtained!"; else if (score >= 10000) msg = ("[WARNING] Some slaves are in the same host as their master"); else msg=("[WARNING] Some slaves of the same master are in the same host"); - printf("%s\n", msg); + clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); zfree(aux); @@ -2211,7 +2246,7 @@ static void clusterManagerShowInfo(void) { keys += dbsize; } } - printf("[OK] %d keys in %d masters.\n", keys, masters); + clusterManagerLogOk("[OK] %d keys in %d masters.\n", keys, masters); float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; printf("%.2f keys per slot on average.\n", keys_per_slot); } @@ -2482,7 +2517,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { char *msg = (e ? e : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -2522,8 +2557,9 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); } else { - fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + clusterManagerLogErr("[ERR] Unable to load info for " + "node %s:%d\n", + friend->ip, friend->port); goto invalid_friend; } continue; @@ -2692,15 +2728,18 @@ static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; clusterManagerNode *node = ln->value; - printf(">>> Performing Cluster Check (using node %s:%d)\n", - node->ip, node->port); + clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); CLUSTER_MANAGER_ERROR(err); - } else printf("[OK] All nodes agree about slots configuration.\n"); + } else { + clusterManagerLogOk("[OK] All nodes agree about slots " + "configuration.\n"); + } // Check open slots - printf(">>> Check for open slots...\n"); + clusterManagerLogInfo(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2754,17 +2793,18 @@ static void clusterManagerCheckCluster(int quiet) { char *fmt = (i++ > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } - fprintf(stderr, "%s.\n", (char *) errstr); + clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); dictRelease(open_slots); } - printf(">>> Check slots coverage...\n"); + clusterManagerLogInfo(">>> Check slots coverage...\n"); char slots[CLUSTER_MANAGER_SLOTS]; memset(slots, 0, CLUSTER_MANAGER_SLOTS); int coverage = clusterManagerGetCoveredSlots(slots); - if (coverage == CLUSTER_MANAGER_SLOTS) - printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); - else { + if (coverage == CLUSTER_MANAGER_SLOTS) { + clusterManagerLogOk("[OK] All %d slots covered.\n", + CLUSTER_MANAGER_SLOTS); + } else { sds err = sdsempty(); err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", @@ -2773,6 +2813,26 @@ static void clusterManagerCheckCluster(int quiet) { } } +static void clusterManagerLog(int level, const char* fmt, ...) { + int use_colors = + (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); + if (use_colors) { + printf("\033["); + switch (level) { + case CLUSTER_MANAGER_LOG_LVL_INFO: printf(LOG_COLOR_BOLD); break; + case CLUSTER_MANAGER_LOG_LVL_WARN: printf(LOG_COLOR_YELLOW); break; + case CLUSTER_MANAGER_LOG_LVL_ERR: printf(LOG_COLOR_RED); break; + case CLUSTER_MANAGER_LOG_LVL_SUCCESS: printf(LOG_COLOR_GREEN); break; + default: printf(LOG_COLOR_RESET); break; + } + } + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + if (use_colors) printf("\033[" LOG_COLOR_RESET); +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2790,7 +2850,6 @@ cluster_manager_err: /* Cluster Manager Commands */ static int clusterManagerCommandCreate(int argc, char **argv) { - printf("Cluster Manager: Creating Cluster\n"); int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { @@ -2816,7 +2875,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { char *msg = (err ? err : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2835,11 +2894,11 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *msg; if (err) msg = err; else { - msg = " is not empty. Either the node already knows other " + msg = "is not empty. Either the node already knows other " "nodes (check with CLUSTER NODES) or contains some " "key in database 0."; } - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2850,18 +2909,17 @@ static int clusterManagerCommandCreate(int argc, char **argv) { int replicas = config.cluster_manager_command.replicas; int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); if (masters_count < 3) { - fprintf(stderr, - "*** ERROR: Invalid configuration for cluster creation.\n"); - fprintf(stderr, - "*** Redis Cluster requires at least 3 master nodes.\n"); - fprintf(stderr, + clusterManagerLogErr( + "*** ERROR: Invalid configuration for cluster creation.\n" + "*** Redis Cluster requires at least 3 master nodes.\n" "*** This is not possible with %d nodes and %d replicas per node.", node_len, replicas); - fprintf(stderr, "\n*** At least %d nodes are required.\n", - (3 * (replicas + 1))); + clusterManagerLogErr("\n*** At least %d nodes are required.\n", + 3 * (replicas + 1)); return 0; } - printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + clusterManagerLogInfo(">>> Performing hash slots allocation " + "on %d nodes...\n", node_len); int interleaved_len = 0, ips_len = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); @@ -2989,8 +3047,9 @@ assign_replicas: goto cleanup; } else if (err != NULL) zfree(err); } - printf(">>> Nodes configuration updated\n"); - printf(">>> Assign a different config epoch to each node\n"); + clusterManagerLogInfo(">>> Nodes configuration updated\n"); + clusterManagerLogInfo(">>> Assign a different config epoch to " + "each node\n"); int config_epoch = 1; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3001,7 +3060,8 @@ assign_replicas: config_epoch++); if (reply != NULL) freeReplyObject(reply); } - printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerLogInfo(">>> Sending CLUSTER MEET messages to join " + "the cluster\n"); clusterManagerNode *first = NULL; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3156,7 +3216,7 @@ static int clusterManagerCommandCall(int argc, char **argv) { argc--; argv++; size_t *argvlen = zmalloc(argc*sizeof(size_t)); - printf(">>> Calling"); + clusterManagerLogInfo(">>> Calling"); for (i = 0; i < argc; i++) { argvlen[i] = strlen(argv[i]); printf(" %s", argv[i]); From a0b8992ea351d1a9d9f5ccf945f6d80a7818e036 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 20 Feb 2018 12:01:13 +0100 Subject: [PATCH 43/66] - Fixed bug in clusterManagerGetAntiAffinityScore - Code improvements --- src/redis-cli.c | 57 ++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 09ad54979..6a5279d2e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -505,7 +505,6 @@ static int dictSdsKeyCompare(void *privdata, const void *key1, static void dictSdsDestructor(void *privdata, void *val) { DICT_NOTUSED(privdata); - sdsfree(val); } @@ -2008,11 +2007,13 @@ result: static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_len, clusterManagerNode ***offending, int *offending_len) { - assert(offending != NULL); int score = 0, i, j; int node_len = cluster_manager.nodes->len; - *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); - clusterManagerNode **offending_p = *offending; + clusterManagerNode **offending_p = NULL; + if (offending != NULL) { + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + offending_p = *offending; + } for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); @@ -2021,23 +2022,21 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, clusterManagerNode *node = node_array->nodes[j]; if (node == NULL) continue; if (!ip) ip = node->ip; - sds types; - if (!node->replicate) { - assert(node->name != NULL); - dictEntry *entry = dictFind(related, node->name); - if (entry) types = (sds) dictGetVal(entry); - else types = sdsempty(); - types = sdscatprintf(types, "m%s", types); - dictReplace(related, node->name, types); - } else { - dictEntry *entry = dictFind(related, node->replicate); - if (entry) types = (sds) dictGetVal(entry); - else { - types = sdsempty(); - dictAdd(related, node->replicate, types); - } - sdscat(types, "s"); + sds types, otypes; + // We always use the Master ID as key + sds key = (!node->replicate ? node->name : node->replicate); + assert(key != NULL); + dictEntry *entry = dictFind(related, key); + if (entry) otypes = (sds) dictGetVal(entry); + else { + otypes = sdsempty(); + dictAdd(related, key, otypes); } + // Master type 'm' is always set as the first character of the + // types string. + if (!node->replicate) types = sdscatprintf(otypes, "m%s", otypes); + else types = sdscat(otypes, "s"); + if (types != otypes) dictReplace(related, key, types); } dictIterator *iter = dictGetIterator(related); dictEntry *entry; @@ -2048,6 +2047,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (typeslen < 2) continue; if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); + if (offending == NULL) continue; listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2056,11 +2056,12 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (n->replicate == NULL) continue; if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { *(offending_p++) = n; + if (offending_len != NULL) (*offending_len)++; break; } } } - if (offending_len != NULL) *offending_len = offending_p - *offending; + //if (offending_len != NULL) *offending_len = offending_p - *offending; dictReleaseIterator(iter); dictRelease(related); } @@ -2070,8 +2071,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_len) { - clusterManagerNode **offenders = NULL, **aux; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + clusterManagerNode **offenders = NULL; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); @@ -2088,7 +2089,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, &offending_len); if (score == 0) break; int rand_idx = rand() % offending_len; - clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode *first = offenders[rand_idx], + *second = NULL; clusterManagerNode **other_replicas = zcalloc((node_len - 1) * sizeof(*other_replicas)); int other_replicas_count = 0; @@ -2110,9 +2112,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - zfree(aux), aux = NULL; int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, - &aux, NULL); + NULL, NULL); if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2120,8 +2121,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - zfree(aux), aux = NULL; - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2134,7 +2134,6 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); - zfree(aux); } static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { From efe06fcecd32961a4e9095b1876e9048402d9840 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 22 Feb 2018 18:32:39 +0100 Subject: [PATCH 44/66] Cluster Manager: - Almost all Cluster Manager related code moved to the same section. - Many macroes converted to functions - Added various comments - Little code restyling --- src/redis-cli.c | 460 ++++++++++++++++++++++++++++-------------------- 1 file changed, 271 insertions(+), 189 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 6a5279d2e..66fc4d183 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -75,54 +75,8 @@ (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) -#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) -#define CLUSTER_MANAGER_ERROR(err) do { \ - if (cluster_manager.errors == NULL) \ - cluster_manager.errors = listCreate(); \ - listAddNodeTail(cluster_manager.errors, err); \ - clusterManagerLogErr("%s\n", (char *) err); \ -} while(0) - -#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ - memset(n->slots, 0, sizeof(n->slots)); \ - n->slots_count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ - array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ - array->alloc = array->nodes; \ - array->len = alloc_len; \ - array->count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ - if (array->nodes > array->alloc) { \ - array->len = array->nodes - array->alloc; \ - array->nodes = array->alloc; \ - array->count = 0; \ - int i = 0; \ - for(; i < array->len; i++) { \ - if (array->nodes[i] != NULL) array->count++;\ - } \ - } \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) - -#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ - assert(array->nodes < (array->nodes + array->len)); \ - if (*array->nodes != NULL) array->count--; \ - nodeptr = *array->nodes; \ - array->nodes++; \ - array->len--; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ - assert(array->nodes < (array->nodes + array->len)); \ - assert(nodeptr != NULL); \ - array->nodes[array->count++] = nodeptr; \ -} while(0) +#define CLUSTER_MANAGER_NODE_ARRAY_FREE(array) zfree(array->alloc) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ @@ -190,6 +144,7 @@ typedef struct clusterManagerCommand { int flags; int replicas; } clusterManagerCommand; +static void createClusterManagerCommand(char *cmdname, int argc, char **argv); static redisContext *context; @@ -237,88 +192,6 @@ static struct config { clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager */ - -static struct clusterManager { - list *nodes; - list *errors; -} cluster_manager; - -typedef struct clusterManagerNode { - redisContext *context; - sds name; - char *ip; - int port; - uint64_t current_epoch; - time_t ping_sent; - time_t ping_recv; - int flags; - sds replicate; - list replicas; - int dirty; - uint8_t slots[CLUSTER_MANAGER_SLOTS]; - int slots_count; - int replicas_count; - list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; -} clusterManagerNode; - -typedef struct clusterManagerNodeArray { - clusterManagerNode **nodes; - clusterManagerNode **alloc; - int len; - int count; -} clusterManagerNodeArray; - -static dictType clusterManagerDictType = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ -}; - -static clusterManagerNode *clusterManagerNewNode(char *ip, int port); -static clusterManagerNode *clusterManagerNodeByName(const char *name); -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); -static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, - char **err); -static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); -static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); -static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len); -static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len); -static sds clusterManagerNodeInfo(clusterManagerNode *node); -static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); -static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); -static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); -static void clusterManagerLog(int level, const char* fmt, ...); - -typedef int clusterManagerCommandProc(int argc, char **argv); -typedef struct clusterManagerCommandDef { - char *name; - clusterManagerCommandProc *proc; - int arity; - char *args; - char *options; -} clusterManagerCommandDef; -static int clusterManagerIsConfigConsistent(void); - -/* Cluster Manager commands. */ - -static int clusterManagerCommandCreate(int argc, char **argv); -static int clusterManagerCommandInfo(int argc, char **argv); -static int clusterManagerCommandCheck(int argc, char **argv); -static int clusterManagerCommandCall(int argc, char **argv); -static int clusterManagerCommandHelp(int argc, char **argv); - /* User preferences. */ static struct pref { int hints; @@ -1291,14 +1164,6 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ -static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - clusterManagerCommand *cmd = &config.cluster_manager_command; - cmd->name = cmdname; - cmd->argc = argc; - cmd->argv = argc ? argv : NULL; - if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; -} - static int parseOptions(int argc, char **argv) { int i; @@ -1828,6 +1693,100 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +/* The Cluster Manager global structure */ +static struct clusterManager { + list *nodes; /* List of nodes int he configuration. */ + list *errors; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; /* Master ID if node is a slave */ + list replicas; + int dirty; /* Node has changes that can be flushed */ + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + int replicas_count; + list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; +} clusterManagerNode; + +/* Data structure used to represent a sequence of nodes. */ +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; /* Actual nodes array */ + clusterManagerNode **alloc; /* Pointer to the allocated memory */ + int len; /* Actual length of the array */ + int count; /* Non-NULL nodes count */ +} clusterManagerNodeArray; + +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + +typedef int clusterManagerCommandProc(int argc, char **argv); + +/* Cluster Manager helper functions */ + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); +static void clusterManagerNodeResetSlots(clusterManagerNode *node); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_count, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_count); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); +static int clusterManagerIsConfigConsistent(void); +static void clusterManagerOnError(sds err); +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int len); +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array); +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr); +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node); + +/* Cluster Manager commands. */ + +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); + +typedef struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; + char *args; + char *options; +} clusterManagerCommandDef; + clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "cluster-replicas"}, @@ -1838,6 +1797,16 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; + +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; +} + + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / sizeof(clusterManagerCommandDef); @@ -1930,7 +1899,7 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); return node; } @@ -1954,41 +1923,49 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); - int is_err = 0; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { +static void clusterManagerNodeResetSlots(clusterManagerNode *node) { + memset(node->slots, 0, sizeof(node->slots)); + node->slots_count = 0; +} + +static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, + char **err) +{ + redisReply *info = CLUSTER_MANAGER_COMMAND(node, "INFO"); + if (err != NULL) *err = NULL; + if (info == NULL) return NULL; + if (info->type == REDIS_REPLY_ERROR) { + if (err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); strcpy(*err, info->str); } freeReplyObject(info); - return 0; + return NULL; } + return info; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); + if (info == NULL) return 0; int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); freeReplyObject(info); return is_cluster; } +/* Checks whether the node is empty. Node is considered not-empty if it has + * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); int is_err = 0, is_empty = 1; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } - is_empty = 0; - goto result; - } + if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; goto result; } freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (err != NULL) *err = NULL; if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); @@ -2004,8 +1981,37 @@ result: return is_empty; } +/* Return the anti-affinity score, which is a measure of the amount of + * violations of anti-affinity in the current cluster layout, that is, how + * badly the masters and slaves are distributed in the different IP + * addresses so that slaves of the same master are not in the master + * host and are also in different hosts. + * + * The score is calculated as follows: + * + * SAME_AS_MASTER = 10000 * each slave in the same IP of its master. + * SAME_AS_SLAVE = 1 * each slave having the same IP as another slave + of the same master. + * FINAL_SCORE = SAME_AS_MASTER + SAME_AS_SLAVE + * + * So a greater score means a worse anti-affinity level, while zero + * means perfect anti-affinity. + * + * The anti affinity optimizator will try to get a score as low as + * possible. Since we do not want to sacrifice the fact that slaves should + * not be in the same host as the master, we assign 10000 times the score + * to this violation, so that we'll optimize for the second factor only + * if it does not impact the first one. + * + * The ipnodes argument is an array of clusterManagerNodeArray, one for + * each IP, while ip_count is the total number of IPs in the configuration. + * + * The function returns the above score, and the list of + * offending slaves can be stored into the 'offending' argument, + * so that the optimizer can try changing the configuration of the + * slaves violating the anti-affinity goals. */ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len) + int ip_count, clusterManagerNode ***offending, int *offending_len) { int score = 0, i, j; int node_len = cluster_manager.nodes->len; @@ -2014,7 +2020,10 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); offending_p = *offending; } - for (i = 0; i < ip_len; i++) { + /* For each set of nodes in the same host, split by + * related nodes (masters and slaves which are involved in + * replication of each other) */ + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; @@ -2038,6 +2047,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, else types = sdscat(otypes, "s"); if (types != otypes) dictReplace(related, key, types); } + /* Now it's trivial to check, for each related group having the + * same host, what is their local score. */ dictIterator *iter = dictGetIterator(related); dictEntry *entry; while ((entry = dictNext(iter)) != NULL) { @@ -2048,6 +2059,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); if (offending == NULL) continue; + /* Populate the list of offending nodes. */ listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2069,15 +2081,16 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, } static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len) + int ip_count) { clusterManagerNode **offenders = NULL; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, + NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; - int maxiter = 500 * node_len; + int maxiter = 500 * node_len; // Effort is proportional to cluster size... srand(time(NULL)); while (maxiter > 0) { int offending_len = 0; @@ -2085,9 +2098,14 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(offenders); offenders = NULL; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, + &offenders, &offending_len); - if (score == 0) break; + if (score == 0) break; // Optimal anti affinity reached + /* We'll try to randomly swap a slave's assigned master causing + * an affinity problem with another random slave, to see if we + * can improve the affinity. */ int rand_idx = rand() % offending_len; clusterManagerNode *first = offenders[rand_idx], *second = NULL; @@ -2112,8 +2130,12 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, NULL, NULL); + /* If the change actually makes thing worse, revert. Otherwise + * leave as it is becuase the best solution may need a few + * combined swaps. */ if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2121,7 +2143,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2136,6 +2158,7 @@ cleanup: zfree(offenders); } +/* Return a representable string of the node's slots */ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { sds slots = sdsempty(); int first_range_idx = -1, last_slot_idx = -1, i; @@ -2303,11 +2326,13 @@ cleanup: return success; } +/* Flush the dirty node configuration by calling replicate for slaves or + * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; int is_err = 0, success = 1; - *err = NULL; + if (err != NULL) *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", node->replicate); @@ -2317,14 +2342,15 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { strcpy(*err, reply->str); } success = 0; + /* If the cluster did not already joined it is possible that + * the slave does not know the master node yet. So on errors + * we return ASAP leaving the dirty flag set, to flush the + * config later. */ goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) { - success = 0; - goto cleanup; - } + if (!added || *err != NULL) success = 0; } node->dirty = 0; cleanup: @@ -2342,6 +2368,11 @@ static void clusterManagerWaitForClusterJoin(void) { printf("\n"); } +/* Load node's cluster configuration by calling "CLUSTER NODES" command. + * Node's configuration (name, replicate, slots, ...) is then updated. + * If CLUSTER_MANAGER_OPT_GETFRIENDS flag is set into 'opts' argument, + * and node already knows other nodes, the node's friends list is populated + * with the other nodes info. */ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { @@ -2391,7 +2422,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; currentNode = node; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); //TODO: just while(remaining) && assign p inside the block @@ -2501,7 +2532,6 @@ cleanup: * point. All nodes will be loaded inside the cluster_manager.nodes list. * Warning: if something goes wrong, it will free the starting node before * returning 0. */ - static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -2681,7 +2711,6 @@ static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; int consistent = (listLength(cluster_manager.nodes) <= 1); // If the Cluster has only one node, it's always consistent - // Does it make sense? if (consistent) return 1; sds first_cfg = NULL; listIter li; @@ -2705,6 +2734,13 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static void clusterManagerOnError(sds err) { + if (cluster_manager.errors == NULL) + cluster_manager.errors = listCreate(); + listAddNodeTail(cluster_manager.errors, err); + clusterManagerLogErr("%s\n", (char *) err); +} + static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -2732,7 +2768,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -2761,7 +2797,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } if (n->importing != NULL) { if (open_slots == NULL) @@ -2779,7 +2815,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } } if (open_slots != NULL) { @@ -2808,7 +2844,7 @@ static void clusterManagerCheckCluster(int quiet) { err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } } @@ -2832,6 +2868,53 @@ static void clusterManagerLog(int level, const char* fmt, ...) { if (use_colors) printf("\033[" LOG_COLOR_RESET); } +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int alloc_len) +{ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*)); + array->alloc = array->nodes; + array->len = alloc_len; + array->count = 0; +} + +/* Reset array->nodes to the original array allocation and re-count non-NULL + * nodes. */ +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array) { + if (array->nodes > array->alloc) { + array->len = array->nodes - array->alloc; + array->nodes = array->alloc; + array->count = 0; + int i = 0; + for(; i < array->len; i++) { + if (array->nodes[i] != NULL) array->count++; + } + } +} + +/* Shift array->nodes and store the shifted node into 'nodeptr'. */ +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr) +{ + assert(array->nodes < (array->nodes + array->len)); + /* If the first node to be shifted is not NULL, decrement count. */ + if (*array->nodes != NULL) array->count--; + /* Store the first node to be shifted into 'nodeptr'. */ + *nodeptr = *array->nodes; + /* Shift the nodes array and decrement length. */ + array->nodes++; + array->len--; +} + +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node) +{ + assert(array->nodes < (array->nodes + array->len)); + assert(node != NULL); + assert(array->count < array->len); + array->nodes[array->count++] = node; +} + +/* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2919,7 +3002,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } clusterManagerLogInfo(">>> Performing hash slots allocation " "on %d nodes...\n", node_len); - int interleaved_len = 0, ips_len = 0; + int interleaved_len = 0, ip_count = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); @@ -2929,7 +3012,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; int found = 0; - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { char *ip = ips[i]; if (!strcmp(ip, n->ip)) { found = 1; @@ -2937,19 +3020,19 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } } if (!found) { - ips[ips_len++] = n->ip; + ips[ip_count++] = n->ip; } clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->nodes == NULL) - CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); - CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + clusterManagerNodeArrayInit(node_array, node_len); + clusterManagerNodeArrayAdd(node_array, n); } while (interleaved_len < node_len) { - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->count > 0) { - clusterManagerNode *n; - CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + clusterManagerNode *n = NULL; + clusterManagerNodeArrayShift(node_array, &n); interleaved[interleaved_len++] = n; } } @@ -3019,11 +3102,11 @@ assign_replicas: printf("Adding extra replicas...\n"); goto assign_replicas; } - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + clusterManagerNodeArrayReset(node_array); } - clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); fflush(stdout); @@ -3031,7 +3114,6 @@ assign_replicas: int nread = read(fileno(stdin),buf,4); buf[3] = '\0'; if (nread != 0 && !strcmp("yes", buf)) { - printf("\nFlushing configuration!\n"); listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3128,7 +3210,7 @@ cleanup: zfree(ips); for (i = 0; i < node_len; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + CLUSTER_MANAGER_NODE_ARRAY_FREE(node_array); } zfree(ip_nodes); return success; From 50d95cc5c8b7e0da8b5ded47215b65e693f58fd1 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 10:44:11 +0100 Subject: [PATCH 45/66] Cluster Manager: reshard command, fixed slots parsing bug and other minor bugs. --- src/redis-cli.c | 655 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 593 insertions(+), 62 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 66fc4d183..fcf48a473 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -69,6 +69,13 @@ #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" #define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 +#define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 + +#define CLUSTER_MANAGER_INVALID_HOST_ARG \ + "Invalid arguments: you need to pass either a valid " \ + "address (ie. 120.0.0.1:7000) or space separated IP " \ + "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) #define CLUSTER_MANAGER_NODE_CONNECT(n) \ @@ -103,9 +110,14 @@ #define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 #define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 #define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_OPT_COLD 1 << 1 +#define CLUSTER_MANAGER_OPT_UPDATE 1 << 2 +#define CLUSTER_MANAGER_OPT_QUIET 1 << 6 +#define CLUSTER_MANAGER_OPT_VERBOSE 1 << 7 #define CLUSTER_MANAGER_LOG_LVL_INFO 1 #define CLUSTER_MANAGER_LOG_LVL_WARN 2 @@ -143,6 +155,11 @@ typedef struct clusterManagerCommand { char **argv; int flags; int replicas; + char *from; + char *to; + int slots; + int timeout; + int pipeline; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1261,6 +1278,19 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { + config.cluster_manager_command.from = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { + config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { + config.cluster_manager_command.slots = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { + config.cluster_manager_command.timeout = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { + config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-yes")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_YES; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1358,7 +1388,7 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" -" --cluster [args...]\n" +" --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" @@ -1729,6 +1759,12 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; +/* Used for reshard table. */ +typedef struct clusterManagerReshardTableItem { + clusterManagerNode *source; + int slot; +} clusterManagerReshardTableItem; + static dictType clusterManagerDictType = { dictSdsHash, /* hash function */ NULL, /* key dup */ @@ -1754,7 +1790,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_count, clusterManagerNode ***offending, int *offending_len); static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); -static sds clusterManagerNodeInfo(clusterManagerNode *node); +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); @@ -1776,6 +1812,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1789,9 +1826,11 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", - "cluster-replicas"}, - {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"reshard", clusterManagerCommandReshard, -1, "host:port", + "from ,to ,slots ,yes,timeout ,pipeline "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1829,6 +1868,38 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +/* Get host ip and port from command arguments. If only one argument has + * been provided it must be in the form of 'ip:port', elsewhere + * the first argument must be the ip and the second one the port. + * If host and port can be detected, it returns 1 and it stores host and + * port into variables referenced by'ip_ptr' and 'port_ptr' pointers, + * elsewhere it returns 0. */ +static int getClusterHostFromCmdArgs(int argc, char **argv, + char **ip_ptr, int *port_ptr) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else return 0; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) return 0; + else { + *ip_ptr = ip; + *port_ptr = port; + } + return 1; +} + static void freeClusterManagerNode(clusterManagerNode *node) { if (node->context != NULL) redisFree(node->context); if (node->friends != NULL) { @@ -2188,8 +2259,12 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } -static sds clusterManagerNodeInfo(clusterManagerNode *node) { +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); + sds spaces = sdsempty(); + int i; + for (i = 0; i < indent; i++) spaces = sdscat(spaces, " "); + if (indent) info = sdscat(info, spaces); int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); char *role = (is_master ? "M" : "S"); sds slots = NULL; @@ -2198,17 +2273,18 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { else { slots = clusterManagerNodeSlotsString(node); info = sdscatfmt(info, "%s: %S %s:%u\n" - " slots:%S (%u slots) " + "%s slots:%S (%u slots) " "", //TODO: flags string - role, node->name, node->ip, node->port, + role, node->name, node->ip, node->port, spaces, slots, node->slots_count); sdsfree(slots); } if (node->replicate != NULL) - info = sdscatfmt(info, "\n replicates %S", node->replicate); + info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); else if (node->replicas_count) - info = sdscatfmt(info, "\n %U additional replica(s)", - node->replicas_count); + info = sdscatfmt(info, "\n%s %U additional replica(s)", + spaces, node->replicas_count); + sdsfree(spaces); return info; } @@ -2218,7 +2294,7 @@ static void clusterManagerShowNodes(void) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; - sds info = clusterManagerNodeInfo(node); + sds info = clusterManagerNodeInfo(node, 0); printf("%s\n", info); sdsfree(info); } @@ -2306,7 +2382,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { - success = 1; + success = 0; goto cleanup; } reply = (redisReply*) _reply; @@ -2326,6 +2402,193 @@ cleanup: return success; } +/* Set slot status to "importing" or "migrating" */ +static int clusterManagerSetSlot(clusterManagerNode *node1, + clusterManagerNode *node2, + int slot, const char *mode, char **err) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " + "SETSLOT %d %s %s", + slot, mode, + (char *) node2->name); + if (err != NULL) *err = NULL; + if (!reply) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + return 0; + } + return 1; +} + +static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int timeout, + int pipeline, int verbose, + char **err) +{ + int success = 1; + while (1) { + redisReply *reply = NULL, *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " + "GETKEYSINSLOT %d %d", slot, + pipeline); + success = (reply != NULL); + if (!success) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + assert(reply->type == REDIS_REPLY_ARRAY); + size_t count = reply->elements; + if (count == 0) { + freeReplyObject(reply); + break; + } + char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + /* Calling MIGRATE command. */ + size_t argc = count + 8; + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + argv[6] = "REPLACE"; + argv_len[6] = 7; + argv[7] = "KEYS"; + argv_len[7] = 4; + for (size_t i = 0; i < count; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + 8; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (verbose) dots[i] = '.'; + } + if (verbose) dots[count] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); + if (!success) goto next; + migrate_reply = (redisReply *) _reply; + if (migrate_reply->type == REDIS_REPLY_ERROR) { + // TODO: Implement fix. + success = 0; + if (err != NULL) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + if (verbose) { + printf("%s", dots); + fflush(stdout); + } +next: + if (reply != NULL) freeReplyObject(reply); + if (migrate_reply != NULL) freeReplyObject(migrate_reply); + zfree(argv); + zfree(argv_len); + if (!success) break; + } + return success; +} + +/* Move slots between source and target nodes using MIGRATE. + * + * Options: + * CLUSTER_MANAGER_OPT_VERBOSE -- Print a dot for every moved key. + * CLUSTER_MANAGER_OPT_COLD -- Move keys without opening slots / + * reconfiguring the nodes. + * CLUSTER_MANAGER_OPT_UPDATE -- Update node->slots for source/target nodes. + * CLUSTER_MANAGER_OPT_QUIET -- Don't print info messages. +*/ +static int clusterManagerMoveSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int opts, char**err) +{ + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) { + printf("Moving slot %d from %s:%d to %s:%d: ", slot, source->ip, + source->port, target->ip, target->port); + fflush(stdout); + } + if (err != NULL) *err = NULL; + int pipeline = config.cluster_manager_command.pipeline, + timeout = config.cluster_manager_command.timeout, + print_dots = (opts & CLUSTER_MANAGER_OPT_VERBOSE), + option_cold = (opts & CLUSTER_MANAGER_OPT_COLD), + success = 1; + if (!option_cold) { + success = clusterManagerSetSlot(target, source, slot, + "importing", err); + if (!success) return 0; + success = clusterManagerSetSlot(source, target, slot, + "migrating", err); + if (!success) return 0; + } + success = clusterManagerMigrateKeysInSlot(source, target, slot, timeout, + pipeline, print_dots, err); + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) printf("\n"); + if (!success) return 0; + /* Set the new node as the owner of the slot in all the known nodes. */ + if (!option_cold) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER " + "SETSLOT %d %s %s", + slot, "node", + target->name); + success = (r != NULL); + if (!success) return 0; + if (r->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + } + } + freeReplyObject(r); + if (!success) return 0; + } + } + /* Update the node logical config */ + if (opts & CLUSTER_MANAGER_OPT_UPDATE) { + source->slots[slot] = 0; + target->slots[slot] = 1; + } + return 1; +} + /* Flush the dirty node configuration by calling replicate for slaves or * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { @@ -2425,20 +2688,24 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; remaining -= (p - line); char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds dst = sdsnew(p); node->migrating_count += 2; @@ -2451,6 +2718,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } else if ((p = strstr(slotsdef, "-<-"))) {//Importing *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds src = sdsnew(p); node->importing_count += 2; @@ -2605,8 +2874,9 @@ invalid_friend: if (n->replicate != NULL) { clusterManagerNode *master = clusterManagerNodeByName(n->replicate); if (master == NULL) { - printf("*** WARNING: %s:%d claims to be slave of unknown " - "node ID %s.\n", n->ip, n->port, n->replicate); + clusterManagerLogWarn("*** WARNING: %s:%d claims to be " + "slave of unknown node ID %s.\n", + n->ip, n->port, n->replicate); } else master->replicas_count++; } } @@ -2619,6 +2889,12 @@ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { return strcmp(*i1, *i2); } +int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node2->slots_count - node1->slots_count; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -2651,16 +2927,18 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { if (remaining == 0) continue; char **slots = NULL; int c = 0; - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; int size = (p - line); remaining -= size; tot_size += size; char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] != '[') { c++; slots = zrealloc(slots, (c * sizeof(char *))); @@ -2792,7 +3070,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->migrating_count; i += 2) { sds slot = n->migrating[i]; - dictAdd(open_slots, slot, n->migrating[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->migrating[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2810,7 +3088,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->importing_count; i += 2) { sds slot = n->importing[i]; - dictAdd(open_slots, slot, n->importing[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->importing[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2848,6 +3126,76 @@ static void clusterManagerCheckCluster(int quiet) { } } +static clusterManagerNode *clusterNodeForResharding(char *id, + clusterManagerNode *target, + int *raise_err) +{ + clusterManagerNode *node = NULL; + const char *invalid_node_msg = "*** The specified node is not known or " + "not a master, please retry.\n"; + node = clusterManagerNodeByName(id); + *raise_err = 0; + if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { + clusterManagerLogErr(invalid_node_msg); + *raise_err = 1; + return NULL; + } else if (node != NULL && target != NULL) { + if (!strcmp(node->name, target->name)) { + clusterManagerLogErr( "*** It is not possible to use " + "the target node as " + "source node.\n"); + return NULL; + } + } + return node; +} + +static list *clusterManagerComputeReshardTable(list *sources, int numslots) { + list *moved = listCreate(); + int src_count = listLength(sources), i = 0, tot_slots = 0, j; + clusterManagerNode **sorted = zmalloc(src_count * sizeof(**sorted)); + listIter li; + listNode *ln; + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + tot_slots += node->slots_count; + sorted[i++] = node; + } + qsort(sorted, src_count, sizeof(clusterManagerNode *), + clusterManagerSlotCountCompareDesc); + for (i = 0; i < src_count; i++) { + clusterManagerNode *node = sorted[i]; + float n = ((float) numslots / tot_slots * node->slots_count); + if (i == 0) n = ceil(n); + else n = floor(n); + int max = (int) n, count = 0; + for (j = 0; j < CLUSTER_MANAGER_SLOTS; j++) { + int slot = node->slots[j]; + if (!slot) continue; + if (count >= max || (int)listLength(moved) >= numslots) break; + clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + item->source = node; + item->slot = j; + listAddNodeTail(moved, item); + count++; + } + } + zfree(sorted); + return moved; +} + +static void clusterManagerShowReshardTable(list *table) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + clusterManagerNode *n = item->source; + printf(" Moving slot %d from %s\n", item->slot, (char *) n->name); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3219,59 +3567,218 @@ cleanup: static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } static int clusterManagerCommandCheck(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); clusterManagerCheckCluster(0); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandReshard(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerCheckCluster(0); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + fflush(stdout); + fprintf(stderr, + "*** Please fix your cluster problems before resharding\n"); + return 0; + } + int slots = config.cluster_manager_command.slots; + if (!slots) { + while (slots <= 0 || slots > CLUSTER_MANAGER_SLOTS) { + printf("How many slots do you want to move (from 1 to %d)? ", + CLUSTER_MANAGER_SLOTS); + fflush(stdout); + char buf[6]; + int nread = read(fileno(stdin),buf,6); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + slots = atoi(buf); + } + } + char buf[255]; + char *to = config.cluster_manager_command.to, + *from = config.cluster_manager_command.from; + while (to == NULL) { + printf("What is the receiving node ID? "); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (strlen(buf) > 0) to = buf; + } + int raise_err = 0; + clusterManagerNode *target = clusterNodeForResharding(to, NULL, &raise_err); + if (target == NULL) return 0; + list *sources = listCreate(); + list *table = NULL; + int all = 0, result = 1; + if (from == NULL) { + printf("Please enter all the source node IDs.\n"); + printf(" Type 'all' to use all the nodes as source nodes for " + "the hash slots.\n"); + printf(" Type 'done' once you entered all the source nodes IDs.\n"); + while (1) { + printf("Source node #%lu: ", listLength(sources) + 1); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (!strcmp(buf, "done")) break; + else if (!strcmp(buf, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(buf, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + } else { + char *p; + while((p = strchr(from, ',')) != NULL) { + *p = '\0'; + if (!strcmp(from, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + from = p + 1; + } + /* Check if there's still another source to process. */ + if (!all && strlen(from) > 0) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + listIter li; + listNode *ln; + if (all) { + listEmpty(sources); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!sdscmp(n->name, target->name)) continue; + listAddNodeTail(sources, n); + } + } + if (listLength(sources) == 0) { + fprintf(stderr, "*** No source nodes given, operation aborted.\n"); + result = 0; + goto cleanup; + } + printf("\nReady to move %d slots.\n", slots); + printf(" Source nodes:\n"); + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *src = ln->value; + sds info = clusterManagerNodeInfo(src, 4); + printf("%s\n", info); + sdsfree(info); + } + printf(" Destination node:\n"); + sds info = clusterManagerNodeInfo(target, 4); + printf("%s\n", info); + sdsfree(info); + table = clusterManagerComputeReshardTable(sources, slots); + printf(" Resharding plan:\n"); + clusterManagerShowReshardTable(table); + if (!(config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_YES)) + { + printf("Do you want to proceed with the proposed " + "reshard plan (yes/no)? "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread <= 0 || strcmp("yes", buf) != 0) { + result = 0; + goto cleanup; + } + } + int opts = CLUSTER_MANAGER_OPT_VERBOSE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + char *err = NULL; + result = clusterManagerMoveSlot(item->source, target, item->slot, + opts, &err); + if (!result) { + if (err != NULL) { + clusterManagerLogErr("\n%s\n", err); + zfree(err); + } + goto cleanup; + } + } +cleanup: + listRelease(sources); + if (table) { + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } @@ -3332,13 +3839,32 @@ static int clusterManagerCommandHelp(int argc, char **argv) { sizeof(clusterManagerCommandDef); int i = 0, j; fprintf(stderr, "Cluster Manager Commands:\n"); + int padding = 15; for (; i < commands_count; i++) { clusterManagerCommandDef *def = &(clusterManagerCommands[i]); - int namelen = strlen(def->name), padlen = 15 - namelen; + int namelen = strlen(def->name), padlen = padding - namelen; fprintf(stderr, " %s", def->name); for (j = 0; j < padlen; j++) fprintf(stderr, " "); fprintf(stderr, "%s\n", (def->args ? def->args : "")); - //TODO: if (def->options) + if (def->options != NULL) { + int optslen = strlen(def->options); + char *p = def->options, *eos = p + optslen; + char *comma = NULL; + while ((comma = strchr(p, ',')) != NULL) { + int deflen = (int)(comma - p); + char buf[255]; + memcpy(buf, p, deflen); + buf[deflen] = '\0'; + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", buf); + p = comma + 1; + if (p >= eos) break; + } + if (p < eos) { + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", p); + } + } } return 0; } @@ -4641,6 +5167,11 @@ int main(int argc, char **argv) { config.cluster_manager_command.argv = NULL; config.cluster_manager_command.flags = 0; config.cluster_manager_command.replicas = 0; + config.cluster_manager_command.from = NULL; + config.cluster_manager_command.to = NULL; + config.cluster_manager_command.slots = 0; + config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; + config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; pref.hints = 1; spectrum_palette = spectrum_palette_color; From 6e734eeedd6e9ddb725fa14f6253899b49b66885 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 11:49:10 +0100 Subject: [PATCH 46/66] Fixed memory write error in clusterManagerGetConfigSignature --- src/redis-cli.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index fcf48a473..baaa615c5 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2295,7 +2295,7 @@ static void clusterManagerShowNodes(void) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; sds info = clusterManagerNodeInfo(node, 0); - printf("%s\n", info); + printf("%s\n", (char *) info); sdsfree(info); } } @@ -2916,8 +2916,8 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { line = p + 1; if (i == 0) { nodename = token; - tot_size = p - token; - name_len = tot_size; + tot_size = (p - token); + name_len = tot_size++; // Make room for ':' in tot_size } else if (i == 8) break; i++; } @@ -2951,6 +2951,7 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { node_count++; node_configs = zrealloc(node_configs, (node_count * sizeof(char *))); + /* Make room for '|' separators. */ tot_size += (sizeof(char) * (c - 1)); char *cfg = zmalloc((sizeof(char) * tot_size) + 1); memcpy(cfg, nodename, name_len); @@ -3760,7 +3761,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { opts, &err); if (!result) { if (err != NULL) { - clusterManagerLogErr("\n%s\n", err); + //clusterManagerLogErr("\n%s\n", err); zfree(err); } goto cleanup; From c5eca339177ff206f67fb83bd21bfd2a33b0f6f7 Mon Sep 17 00:00:00 2001 From: Artix Date: Wed, 28 Feb 2018 15:21:08 +0100 Subject: [PATCH 47/66] Cluster Manager: fixed some memory error --- src/redis-cli.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index baaa615c5..317b1125e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2412,14 +2412,19 @@ static int clusterManagerSetSlot(clusterManagerNode *node1, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; + int success = 1; if (reply->type == REDIS_REPLY_ERROR) { + success = 0; if (err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node1, err); } - return 0; + goto cleanup; } - return 1; +cleanup: + freeReplyObject(reply); + return success; } static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, @@ -3175,7 +3180,7 @@ static list *clusterManagerComputeReshardTable(list *sources, int numslots) { int slot = node->slots[j]; if (!slot) continue; if (count >= max || (int)listLength(moved) >= numslots) break; - clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + clusterManagerReshardTableItem *item = zmalloc(sizeof(*item)); item->source = node; item->slot = j; listAddNodeTail(moved, item); From e730b6e3eef5c9d9057d9a2430e8cd8c0c90fdfa Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 2 Mar 2018 17:06:50 +0100 Subject: [PATCH 48/66] ClusterManager: fixed --cluster-from 'all' parsing --- src/redis-cli.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 317b1125e..8fa2d7254 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -3137,12 +3137,12 @@ static clusterManagerNode *clusterNodeForResharding(char *id, int *raise_err) { clusterManagerNode *node = NULL; - const char *invalid_node_msg = "*** The specified node is not known or " - "not a master, please retry.\n"; + const char *invalid_node_msg = "*** The specified node (%s) is not known " + "or not a master, please retry.\n"; node = clusterManagerNodeByName(id); *raise_err = 0; if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { - clusterManagerLogErr(invalid_node_msg); + clusterManagerLogErr(invalid_node_msg, id); *raise_err = 1; return NULL; } else if (node != NULL && target != NULL) { @@ -3700,12 +3700,15 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } /* Check if there's still another source to process. */ if (!all && strlen(from) > 0) { - clusterManagerNode *src = - clusterNodeForResharding(from, target, &raise_err); - if (src != NULL) listAddNodeTail(sources, src); - else if (raise_err) { - result = 0; - goto cleanup; + if (!strcmp(from, "all")) all = 1; + if (!all) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } } } } From 98f67ebcfb98ff9258038317bce3a516da439287 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 6 Mar 2018 13:06:04 +0200 Subject: [PATCH 49/66] clusterManagerAddSlots: changed the way ADDSLOTS command is built --- src/redis-cli.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 8fa2d7254..4f87f9067 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2354,32 +2354,28 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) redisReply *reply = NULL; void *_reply = NULL; int is_err = 0, success = 1; - int argc; - sds *argv = NULL; - size_t *argvlen = NULL; + /* First two args are used for the command itself. */ + int argc = node->slots_count + 2; + sds *argv = zmalloc(argc * sizeof(*argv)); + size_t *argvlen = zmalloc(argc * sizeof(*argvlen)); + argv[0] = "CLUSTER"; + argv[1] = "ADDSLOTS"; + argvlen[0] = 7; + argvlen[1] = 8; *err = NULL; - sds cmd = sdsnew("CLUSTER ADDSLOTS "); - int i, added = 0; + int i, argv_idx = 2; for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { - int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (argv_idx >= argc) break; if (node->slots[i]) { - char *fmt = (!last_slot ? "%u " : "%u"); - cmd = sdscatfmt(cmd, fmt, i); - added++; + argv[argv_idx] = sdsfromlonglong((long long) i); + argvlen[argv_idx] = sdslen(argv[argv_idx]); + argv_idx++; } } - if (!added) { + if (!argv_idx) { success = 0; goto cleanup; } - argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) { - success = 0; - goto cleanup; - } - argvlen = zmalloc(argc*sizeof(size_t)); - for (i = 0; i < argc; i++) - argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { success = 0; @@ -2395,9 +2391,11 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } cleanup: - sdsfree(cmd); zfree(argvlen); - if (argv != NULL) sdsfreesplitres(argv,argc); + if (argv != NULL) { + for (i = 2; i < argc; i++) sdsfree(argv[i]); + zfree(argv); + } if (reply != NULL) freeReplyObject(reply); return success; } From 4907218145e854ed950dc2434f34b741022dcfc6 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 23 Mar 2018 16:46:43 +0100 Subject: [PATCH 50/66] Cluster Manager: rebalance command --- src/redis-cli.c | 297 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 286 insertions(+), 11 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4f87f9067..49ba41257 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -71,6 +71,7 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 +#define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ "Invalid arguments: you need to pass either a valid " \ @@ -108,10 +109,13 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 -#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 -#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 +#define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 +#define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 +#define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -157,9 +161,12 @@ typedef struct clusterManagerCommand { int replicas; char *from; char *to; + char **weight; + int weight_argc; int slots; int timeout; int pipeline; + float threshold; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -206,6 +213,7 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + int verbose; clusterManagerCommand cluster_manager_command; } config; @@ -1266,6 +1274,8 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--verbose")) { + config.verbose = 1; } else if (!strcmp(argv[i],"--cluster") && !lastarg) { if (CLUSTER_MANAGER_MODE()) usage(); char *cmd = argv[++i]; @@ -1282,15 +1292,35 @@ static int parseOptions(int argc, char **argv) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-weight") && !lastarg) { + int widx = i + 1; + char **weight = argv + widx; + int wargc = 0; + for (; widx < argc; widx++) { + if (strstr(argv[widx], "--") == argv[widx]) break; + wargc++; + } + if (wargc > 0) { + config.cluster_manager_command.weight = weight; + config.cluster_manager_command.weight_argc = wargc; + } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { config.cluster_manager_command.timeout = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-threshold") && !lastarg) { + config.cluster_manager_command.threshold = atof(argv[++i]); } else if (!strcmp(argv[i],"--cluster-yes")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_YES; + } else if (!strcmp(argv[i],"--cluster-simulate")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1390,6 +1420,7 @@ static void usage(void) { " are not rolled back from the server memory.\n" " --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" +" --verbose Verbose mode.\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" @@ -1749,6 +1780,8 @@ typedef struct clusterManagerNode { sds *importing; int migrating_count; int importing_count; + float weight; /* Weight used by rebalance */ + int balance; /* Used by rebalance */ } clusterManagerNode; /* Data structure used to represent a sequence of nodes. */ @@ -1780,6 +1813,7 @@ typedef int clusterManagerCommandProc(int argc, char **argv); static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static clusterManagerNode *clusterManagerNodeByName(const char *name); +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1813,6 +1847,7 @@ static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); +static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1831,6 +1866,9 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, + {"rebalance", clusterManagerCommandRebalance, -1, "host:port", + "weight ,use-empty-masters," + "timeout ,simulate,pipeline ,threshold "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1970,10 +2008,13 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; + node->weight = 1.0f; + node->balance = 0; clusterManagerNodeResetSlots(node); return node; } +/* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -1994,6 +2035,32 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } +/* Like get_node_by_name but the specified name can be just the first + * part of the node ID as long as the prefix in unique across the + * cluster. + */ +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char*name) +{ + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && + strstr(n->name, lcname) == n->name) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static void clusterManagerNodeResetSlots(clusterManagerNode *node) { memset(node->slots, 0, sizeof(node->slots)); node->slots_count = 0; @@ -2898,6 +2965,12 @@ int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { return node2->slots_count - node1->slots_count; } +int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node1->balance - node2->balance; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -3200,6 +3273,19 @@ static void clusterManagerShowReshardTable(list *table) { } } +static void clusterManagerReleaseReshardTable(list *table) { + if (table != NULL) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3775,14 +3861,199 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } cleanup: listRelease(sources); - if (table) { - listRewind(table, &li); - while ((ln = listNext(&li)) != NULL) { - clusterManagerReshardTableItem *item = ln->value; - zfree(item); - } - listRelease(table); + clusterManagerReleaseReshardTable(table); + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandRebalance(int argc, char **argv) { + int port = 0; + char *ip = NULL; + clusterManagerNode **weightedNodes = NULL; + list *involved = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int result = 1, i; + if (config.cluster_manager_command.weight != NULL) { + for (i = 0; i < config.cluster_manager_command.weight_argc; i++) { + char *name = config.cluster_manager_command.weight[i]; + char *p = strchr(name, '='); + if (p == NULL) { + result = 0; + goto cleanup; + } + *p = '\0'; + float w = atof(++p); + clusterManagerNode *n = clusterManagerNodeByAbbreviatedName(name); + if (n == NULL) { + clusterManagerLogErr("*** No such master node %s\n", name); + result = 0; + goto cleanup; + } + n->weight = w; + } } + float total_weight = 0; + int nodes_involved = 0; + int use_empty = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; + + involved = listCreate(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + /* Compute the total cluster weight. */ + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!use_empty && n->slots_count == 0) { + n->weight = 0; + continue; + } + total_weight += n->weight; + nodes_involved++; + listAddNodeTail(involved, n); + } + weightedNodes = zmalloc(nodes_involved * + sizeof(clusterManagerNode *)); + if (weightedNodes == NULL) goto cleanup; + /* Check cluster, only proceed if it looks sane. */ + clusterManagerCheckCluster(1); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + clusterManagerLogErr("*** Please fix your cluster problems " + "before rebalancing" ); + result = 0; + goto cleanup; + } + /* Calculate the slots balance for each node. It's the number of + * slots the node should lose (if positive) or gain (if negative) + * in order to be balanced. */ + int threshold_reached = 0, total_balance = 0; + float threshold = config.cluster_manager_command.threshold; + i = 0; + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + weightedNodes[i++] = n; + int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + (int) n->weight); + n->balance = n->slots_count - expected; + total_balance += n->balance; + /* Compute the percentage of difference between the + * expected number of slots and the real one, to see + * if it's over the threshold specified by the user. */ + int over_threshold = 0; + if (config.cluster_manager_command.threshold > 0) { + if (n->slots_count > 0) { + float err_perc = fabs((100-(100.0*expected/n->slots_count))); + if (err_perc > threshold) over_threshold = 1; + } else if (expected > 1) { + over_threshold = 1; + } + } + if (over_threshold) threshold_reached = 1; + } + if (!threshold_reached) { + clusterManagerLogErr("*** No rebalancing needed! " + "All nodes are within the %.2f%% threshold.\n", + config.cluster_manager_command.threshold); + result = 0; + goto cleanup; + } + /* Because of rounding, it is possible that the balance of all nodes + * summed does not give 0. Make sure that nodes that have to provide + * slots are always matched by nodes receiving slots. */ + while (total_balance > 0) { + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->balance < 0 && total_balance > 0) { + n->balance--; + total_balance--; + } + } + } + /* Sort nodes by their slots balance. */ + qsort(weightedNodes, nodes_involved, sizeof(clusterManagerNode *), + clusterManagerCompareNodeBalance); + clusterManagerLogInfo(">>> Rebalancing across %d nodes. " + "Total weight = %.2f\n", + nodes_involved, total_weight); + if (config.verbose) { + for (i = 0; i < nodes_involved; i++) { + clusterManagerNode *n = weightedNodes[i]; + printf("%s:%d balance is %d slots\n", n->ip, n->port, n->balance); + } + } + /* Now we have at the start of the 'sn' array nodes that should get + * slots, at the end nodes that must give slots. + * We take two indexes, one at the start, and one at the end, + * incrementing or decrementing the indexes accordingly til we + * find nodes that need to get/provide slots. */ + int dst_idx = 0; + int src_idx = nodes_involved - 1; + int simulate = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + while (dst_idx < src_idx) { + clusterManagerNode *dst = weightedNodes[dst_idx]; + clusterManagerNode *src = weightedNodes[src_idx]; + int db = abs(dst->balance); + int sb = abs(src->balance); + int numslots = (db < sb ? db : sb); + if (numslots > 0) { + printf("Moving %d slots from %s:%d to %s:%d\n", numslots, + src->ip, + src->port, + dst->ip, + dst->port); + /* Actaully move the slots. */ + list *lsrc = listCreate(), *table = NULL; + listAddNodeTail(lsrc, src); + table = clusterManagerComputeReshardTable(lsrc, numslots); + listRelease(lsrc); + int table_len = (int) listLength(table); + if (!table || table_len != numslots) { + clusterManagerLogErr("*** Assertio failed: Reshard table " + "!= number of slots"); + result = 0; + goto end_move; + } + if (simulate) { + for (i = 0; i < table_len; i++) printf("#"); + } else { + int opts = CLUSTER_MANAGER_OPT_QUIET | + CLUSTER_MANAGER_OPT_UPDATE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + result = clusterManagerMoveSlot(item->source, + dst, + item->slot, + opts, NULL); + if (!result) goto end_move; + printf("#"); + fflush(stdout); + } + + } + printf("\n"); +end_move: + clusterManagerReleaseReshardTable(table); + if (!result) goto cleanup; + } + /* Update nodes balance. */ + dst->balance += numslots; + src->balance -= numslots; + if (dst->balance == 0) dst_idx++; + if (src->balance == 0) src_idx --; + } +cleanup: + if (involved != NULL) listRelease(involved); + if (weightedNodes != NULL) zfree(weightedNodes); return result; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5169,6 +5440,7 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; + config.verbose = 0; config.cluster_manager_command.name = NULL; config.cluster_manager_command.argc = 0; config.cluster_manager_command.argv = NULL; @@ -5176,9 +5448,12 @@ int main(int argc, char **argv) { config.cluster_manager_command.replicas = 0; config.cluster_manager_command.from = NULL; config.cluster_manager_command.to = NULL; + config.cluster_manager_command.weight = NULL; config.cluster_manager_command.slots = 0; config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; + config.cluster_manager_command.threshold = + CLUSTER_MANAGER_REBALANCE_THRESHOLD; pref.hints = 1; spectrum_palette = spectrum_palette_color; From 80f4553a37000cb7141efc1c22d93b3e892b9a87 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 6 Apr 2018 18:02:40 +0200 Subject: [PATCH 51/66] Cluster Manager: fix command. --- src/redis-cli.c | 715 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 642 insertions(+), 73 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 49ba41257..8d5732c20 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -151,6 +151,7 @@ static uint64_t dictSdsHash(const void *key); static int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2); static void dictSdsDestructor(void *privdata, void *val); +static void dictListDestructor(void *privdata, void *val); /* Cluster Manager Command Info */ typedef struct clusterManagerCommand { @@ -406,6 +407,12 @@ static void dictSdsDestructor(void *privdata, void *val) sdsfree(val); } +void dictListDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + listRelease((list*)val); +} + /* _serverAssert is needed by dict */ void _serverAssert(const char *estr, const char *file, int line) { fprintf(stderr, "=== ASSERTION FAILED ==="); @@ -1446,6 +1453,15 @@ static void usage(void) { exit(1); } +static int confirmWithYes(char *msg) { + printf("%s (type 'yes' to accept): ", msg); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + return (nread != 0 && !strcmp("yes", buf)); +} + /* Turn the plain C strings into Sds strings */ static char **convertToSds(int count, char** args) { int j; @@ -1751,7 +1767,7 @@ static int evalMode(int argc, char **argv) { } /*------------------------------------------------------------------------------ - * Cluster Manager mode + * Cluster Manager *--------------------------------------------------------------------------- */ /* The Cluster Manager global structure */ @@ -1760,6 +1776,9 @@ static struct clusterManager { list *errors; } cluster_manager; +/* Used by clusterManagerFixSlotsCoverage */ +dict *clusterManagerUncoveredSlots = NULL; + typedef struct clusterManagerNode { redisContext *context; sds name; @@ -1776,10 +1795,12 @@ typedef struct clusterManagerNode { int slots_count; int replicas_count; list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; + sds *migrating; /* An array of sds where even strings are slots and odd + * strings are the destination node IDs. */ + sds *importing; /* An array of sds where even strings are slots and odd + * strings are the source node IDs. */ + int migrating_count; /* Length of the migrating array (migrating slots*2) */ + int importing_count; /* Length of the importing array (importing slots*2) */ float weight; /* Weight used by rebalance */ int balance; /* Used by rebalance */ } clusterManagerNode; @@ -1829,7 +1850,7 @@ static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); +static int clusterManagerCheckCluster(int quiet); static void clusterManagerLog(int level, const char* fmt, ...); static int clusterManagerIsConfigConsistent(void); static void clusterManagerOnError(sds err); @@ -1846,6 +1867,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); @@ -1863,6 +1885,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, @@ -1988,6 +2011,8 @@ static void freeClusterManager(void) { listRelease(cluster_manager.errors); cluster_manager.errors = NULL; } + if (clusterManagerUncoveredSlots != NULL) + dictRelease(clusterManagerUncoveredSlots); } static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { @@ -2013,6 +2038,38 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } +/* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the + * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * of reply error (it's up to the caller function to free it), elsewhere + * the error is directly printed. */ +static int clusterManagerCheckRedisReply(clusterManagerNode *n, + redisReply *r, char **err) +{ + int is_err = 0; + if (!r || (is_err = (r->type == REDIS_REPLY_ERROR))) { + if (is_err) { + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, r->str); + } + return 0; + } + return 1; +} + +static void clusterManagerRemoveNodeFromList(list *nodelist, + clusterManagerNode *node) { + listIter li; + listNode *ln; + listRewind(nodelist, &li); + while ((ln = listNext(&li)) != NULL) { + if (node == ln->value) { + listDelNode(nodelist, ln); + break; + } + } +} /* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { @@ -2470,10 +2527,10 @@ cleanup: /* Set slot status to "importing" or "migrating" */ static int clusterManagerSetSlot(clusterManagerNode *node1, clusterManagerNode *node2, - int slot, const char *mode, char **err) { + int slot, const char *status, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " "SETSLOT %d %s %s", - slot, mode, + slot, status, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; @@ -2492,6 +2549,70 @@ cleanup: return success; } +/* Migrate keys taken from reply->elements. It returns the reply from the + * MIGRATE command, or NULL if something goes wrong. If the argument 'dots' + * is not NULL, a dot will be printed for every migrated key. */ +static redisReply *clusterManagerMigrateKeysInReply(clusterManagerNode *source, + clusterManagerNode *target, + redisReply *reply, + int replace, int timeout, + char *dots) +{ + redisReply *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + int c = (replace ? 8 : 7); + size_t argc = c + reply->elements; + size_t i, offset = 6; // Keys Offset + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + if (replace) { + argv[offset] = "REPLACE"; + argv_len[offset] = 7; + offset++; + } + argv[offset] = "KEYS"; + argv_len[offset] = 4; + offset++; + for (i = 0; i < reply->elements; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + offset; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (dots) dots[i] = '.'; + } + if (dots) dots[reply->elements] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + int success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (i = 0; i < reply->elements; i++) sdsfree(argv[i + offset]); + if (!success) goto cleanup; + migrate_reply = (redisReply *) _reply; +cleanup: + zfree(argv); + zfree(argv_len); + return migrate_reply; +} + +/* Migrate all keys in the given slot from source to target.*/ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, clusterManagerNode *target, int slot, int timeout, @@ -2499,10 +2620,11 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, char **err) { int success = 1; + int do_fix = (config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX); while (1) { + char *dots = NULL; redisReply *reply = NULL, *migrate_reply = NULL; - char **argv = NULL; - size_t *argv_len = NULL; reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " "GETKEYSINSLOT %d %d", slot, pipeline); @@ -2523,57 +2645,37 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, freeReplyObject(reply); break; } - char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + if (verbose) dots = zmalloc((count+1) * sizeof(char)); /* Calling MIGRATE command. */ - size_t argc = count + 8; - argv = zcalloc(argc * sizeof(char *)); - argv_len = zcalloc(argc * sizeof(size_t)); - char portstr[255]; - char timeoutstr[255]; - snprintf(portstr, 10, "%d", target->port); - snprintf(timeoutstr, 10, "%d", timeout); - argv[0] = "MIGRATE"; - argv_len[0] = 7; - argv[1] = target->ip; - argv_len[1] = strlen(target->ip); - argv[2] = portstr; - argv_len[2] = strlen(portstr); - argv[3] = ""; - argv_len[3] = 0; - argv[4] = "0"; - argv_len[4] = 1; - argv[5] = timeoutstr; - argv_len[5] = strlen(timeoutstr); - argv[6] = "REPLACE"; - argv_len[6] = 7; - argv[7] = "KEYS"; - argv_len[7] = 4; - for (size_t i = 0; i < count; i++) { - redisReply *entry = reply->element[i]; - size_t idx = i + 8; - assert(entry->type == REDIS_REPLY_STRING); - argv[idx] = (char *) sdsnew(entry->str); - argv_len[idx] = entry->len; - if (verbose) dots[i] = '.'; - } - if (verbose) dots[count] = '\0'; - void *_reply = NULL; - redisAppendCommandArgv(source->context,argc, - (const char**)argv,argv_len); - success = (redisGetReply(source->context, &_reply) == REDIS_OK); - for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); - if (!success) goto next; - migrate_reply = (redisReply *) _reply; + migrate_reply = clusterManagerMigrateKeysInReply(source, target, + reply, 0, timeout, + dots); + if (migrate_reply == NULL) goto next; if (migrate_reply->type == REDIS_REPLY_ERROR) { - // TODO: Implement fix. - success = 0; - if (err != NULL) { - *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); - strcpy(*err, migrate_reply->str); - printf("\n"); - CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + if (do_fix && strstr(migrate_reply->str, "BUSYKEY")) { + clusterManagerLogWarn("*** Target key exists. " + "Replacing it for FIX.\n"); + freeReplyObject(migrate_reply); + /* Try to migrate keys adding REPLACE option. */ + migrate_reply = clusterManagerMigrateKeysInReply(source, + target, + reply, + 1, timeout, + NULL); + success = (migrate_reply != NULL && + migrate_reply->type != REDIS_REPLY_ERROR); + } else success = 0; + if (!success) { + if (migrate_reply != NULL) { + if (err) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + } + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; } - goto next; } if (verbose) { printf("%s", dots); @@ -2582,8 +2684,7 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, next: if (reply != NULL) freeReplyObject(reply); if (migrate_reply != NULL) freeReplyObject(migrate_reply); - zfree(argv); - zfree(argv_len); + if (dots) zfree(dots); if (!success) break; } return success; @@ -2729,6 +2830,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, *link_status = NULL; + UNUSED(link_status); int i = 0; while ((p = strchr(line, ' ')) != NULL) { *p = '\0'; @@ -2974,11 +3076,11 @@ int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; + char **node_configs = NULL; redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); if (reply == NULL || reply->type == REDIS_REPLY_ERROR) goto cleanup; char *lines = reply->str, *p, *line; - char **node_configs = NULL; while ((p = strstr(lines, "\n")) != NULL) { i = 0; *p = '\0'; @@ -3057,8 +3159,10 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { } cleanup: if (reply != NULL) freeReplyObject(reply); - for (i = 0; i < node_count; i++) zfree(node_configs[i]); - zfree(node_configs); + if (node_configs != NULL) { + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + } return signature; } @@ -3114,9 +3218,453 @@ static int clusterManagerGetCoveredSlots(char *all_slots) { return totslots; } -static void clusterManagerCheckCluster(int quiet) { +static void clusterManagerPrintSlotsList(list *slots) { + listIter li; + listNode *ln; + listRewind(slots, &li); + sds first = NULL; + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + if (!first) first = slot; + else printf(", "); + printf("%s", slot); + } + printf("\n"); +} + +/* Return the node, among 'nodes' with the greatest number of keys + * in the specified slot. */ +static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, + int slot, + char **err) +{ + clusterManagerNode *node = NULL; + int numkeys = 0; + listIter li; + listNode *ln; + listRewind(nodes, &li); + if (err) *err = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *r = + CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOTi %d", slot); + int success = clusterManagerCheckRedisReply(n, r, err); + if (success) { + if (r->integer > numkeys || node == NULL) { + numkeys = r->integer; + node = n; + } + } + if (r != NULL) freeReplyObject(r); + /* If the reply contains errors */ + if (!success) { + if (err != NULL && *err != NULL) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + node = NULL; + break; + } + } + return node; +} + +static int clusterManagerFixSlotsCoverage(char *all_slots) { + int i, fixed = 0; + list *none = NULL, *single = NULL, *multi = NULL; + clusterManagerLogInfo(">>> Fixing slots coverage...\n"); + printf("List of not covered slots: \n"); + int uncovered_count = 0; + sds log = sdsempty(); + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int covered = all_slots[i]; + if (!covered) { + sds key = sdsfromlonglong((long long) i); + if (uncovered_count++ > 0) printf(","); + printf("%s", (char *) key); + list *slot_nodes = listCreate(); + sds slot_nodes_str = sdsempty(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", i, 1); + if (!clusterManagerCheckRedisReply(n, reply, NULL)) { + fixed = -1; + if (reply) freeReplyObject(reply); + goto cleanup; + } + assert(reply->type == REDIS_REPLY_ARRAY); + if (reply->elements > 0) { + listAddNodeTail(slot_nodes, n); + if (listLength(slot_nodes) > 1) + slot_nodes_str = sdscat(slot_nodes_str, ", "); + slot_nodes_str = sdscatfmt(slot_nodes_str, + "%s:%u", n->ip, n->port); + } + freeReplyObject(reply); + } + log = sdscatfmt(log, "\nSlot %S has keys in %u nodes: %S", + key, listLength(slot_nodes), slot_nodes_str); + sdsfree(slot_nodes_str); + dictAdd(clusterManagerUncoveredSlots, key, slot_nodes); + } + } + printf("\n%s\n", log); + /* For every slot, take action depending on the actual condition: + * 1) No node has keys for this slot. + * 2) A single node has keys for this slot. + * 3) Multiple nodes have keys for this slot. */ + none = listCreate(); + single = listCreate(); + multi = listCreate(); + dictIterator *iter = dictGetIterator(clusterManagerUncoveredSlots); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + list *nodes = (list *) dictGetVal(entry); + switch (listLength(nodes)){ + case 0: listAddNodeTail(none, slot); break; + case 1: listAddNodeTail(single, slot); break; + default: listAddNodeTail(multi, slot); break; + } + } + dictReleaseIterator(iter); + + /* Handle case "1": keys in no node. */ + if (listLength(none) > 0) { + printf("The following uncovered slots have no keys " + "across the cluster:\n"); + clusterManagerPrintSlotsList(none); + if (confirmWithYes("Fix these slots by covering with a random node?")){ + srand(time(NULL)); + listIter li; + listNode *ln; + listRewind(none, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + long idx = (long) (rand() % listLength(cluster_manager.nodes)); + listNode *node_n = listIndex(cluster_manager.nodes, idx); + assert(node_n != NULL); + clusterManagerNode *n = node_n->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "2": keys only in one node. */ + if (listLength(single) > 0) { + printf("The following uncovered slots have keys in just one node:\n"); + clusterManagerPrintSlotsList(single); + if (confirmWithYes("Fix these slots by covering with those nodes?")){ + listIter li; + listNode *ln; + listRewind(single, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + listNode *fn = listFirst(nodes); + assert(fn != NULL); + clusterManagerNode *n = fn->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "3": keys in multiple nodes. */ + if (listLength(multi) > 0) { + printf("The folowing uncovered slots have keys in multiple nodes:\n"); + clusterManagerPrintSlotsList(multi); + if (confirmWithYes("Fix these slots by moving keys " + "into a single node?")) { + listIter li; + listNode *ln; + listRewind(multi, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + int s = atoi(slot); + clusterManagerNode *target = + clusterManagerGetNodeWithMostKeysInSlot(nodes, s, NULL); + if (target == NULL) { + fixed = -1; + goto cleanup; + } + clusterManagerLogInfo(">>> Covering slot %s moving keys " + "to %s:%d\n", slot, + target->ip, target->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER SETSLOT %s %s", slot, "STABLE"); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + listIter nli; + listNode *nln; + listRewind(nodes, &nli); + while ((nln = listNext(&nli)) != NULL) { + clusterManagerNode *src = nln->value; + if (src == target) continue; + /* Set the source node in 'importing' state + * (even if we will actually migrate keys away) + * in order to avoid receiving redirections + * for MIGRATE. */ + redisReply *r = CLUSTER_MANAGER_COMMAND(src, + "CLUSTER SETSLOT %s %s %s", slot, + "IMPORTING", target->name); + if (!clusterManagerCheckRedisReply(target, r, NULL)) + fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + int opts = CLUSTER_MANAGER_OPT_VERBOSE | + CLUSTER_MANAGER_OPT_COLD; + if (!clusterManagerMoveSlot(src, target, s, opts, NULL)) { + fixed = -1; + goto cleanup; + } + } + fixed++; + } + } + } +cleanup: + sdsfree(log); + if (none) listRelease(none); + if (single) listRelease(single); + if (multi) listRelease(multi); + return fixed; +} + +/* Slot 'slot' was found to be in importing or migrating state in one or + * more nodes. This function fixes this condition by migrating keys where + * it seems more sensible. */ +static int clusterManagerFixOpenSlot(int slot) { + clusterManagerLogInfo(">>> Fixing open slot %d\n", slot); + /* Try to obtain the current slot owner, according to the current + * nodes configuration. */ + int success = 1; + list *owners = listCreate(); + list *migrating = listCreate(); + list *importing = listCreate(); + sds migrating_str = sdsempty(); + sds importing_str = sdsempty(); + clusterManagerNode *owner = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots[slot]) { + if (owner == NULL) owner = n; + listAddNodeTail(owners, n); + } + } + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->migrating) { + for (int i = 0; i < n->migrating_count; i += 2) { + sds migrating_slot = n->migrating[i]; + if (atoi(migrating_slot) == slot) { + char *sep = (listLength(migrating) == 0 ? "" : ","); + migrating_str = sdscatfmt(migrating_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(migrating, n); + break; + } + } + } + if (n->importing) { + for (int i = 0; i < n->importing_count; i += 2) { + sds importing_slot = n->importing[i]; + if (atoi(importing_slot) == slot) { + char *sep = (listLength(importing) == 0 ? "" : ","); + importing_str = sdscatfmt(importing_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(importing, n); + break; + } + } + } + } + printf("Set as migrating in: %s\n", migrating_str); + printf("Set as importing in: %s\n", importing_str); + /* If there is no slot owner, set as owner the slot with the biggest + * number of keys, among the set of migrating / importing nodes. */ + if (owner == NULL) { + clusterManagerLogInfo(">>> Nobody claims ownership, " + "selecting an owner...\n"); + owner = clusterManagerGetNodeWithMostKeysInSlot(cluster_manager.nodes, + slot, NULL); + // If we still don't have an owner, we can't fix it. + if (owner == NULL) { + clusterManagerLogErr("[ERR] Can't select a slot owner. " + "Impossible to fix.\n"); + success = 0; + goto cleanup; + } + + // Use ADDSLOTS to assign the slot. + printf("*** Configuring %s:%d as the slot owner\n", owner->ip, + owner->port); + redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " + "SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER ADDSLOTS %d", slot); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Make sure this information will propagate. Not strictly needed + * since there is no past owner, so all the other nodes will accept + * whatever epoch this node will claim the slot with. */ + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Remove the owner from the list of migrating/importing + * nodes. */ + clusterManagerRemoveNodeFromList(migrating, owner); + clusterManagerRemoveNodeFromList(importing, owner); + } + /* If there are multiple owners of the slot, we need to fix it + * so that a single node is the owner and all the other nodes + * are in importing state. Later the fix can be handled by one + * of the base cases above. + * + * Note that this case also covers multiple nodes having the slot + * in migrating state, since migrating is a valid state only for + * slot owners. */ + if (listLength(owners) > 1) { + owner = clusterManagerGetNodeWithMostKeysInSlot(owners, slot, NULL); + listRewind(owners, &li); + redisReply *reply = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + reply = CLUSTER_MANAGER_COMMAND(n, "CLUSTER DELSLOT %d", slot); + success = clusterManagerCheckRedisReply(n, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + success = clusterManagerSetSlot(n, owner, slot, "importing", NULL); + if (!success) goto cleanup; + clusterManagerRemoveNodeFromList(importing, n); //Avoid duplicates + listAddNodeTail(importing, n); + } + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + } + int move_opts = CLUSTER_MANAGER_OPT_VERBOSE; + /* Case 1: The slot is in migrating state in one slot, and in + * importing state in 1 slot. That's trivial to address. */ + if (listLength(migrating) == 1 && listLength(importing) == 1) { + clusterManagerNode *src = listFirst(migrating)->value; + clusterManagerNode *dst = listFirst(importing)->value; + success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL); + } + /* Case 2: There are multiple nodes that claim the slot as importing, + * they probably got keys about the slot after a restart so opened + * the slot. In this case we just move all the keys to the owner + * according to the configuration. */ + else if (listLength(migrating) == 0 && listLength(importing) > 0) { + clusterManagerLogInfo(">>> Moving all the %d slot keys to its " + "owner %s:%d\n", slot, owner->ip, owner->port); + move_opts |= CLUSTER_MANAGER_OPT_COLD; + listRewind(importing, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + success = clusterManagerMoveSlot(n, owner, slot, move_opts, NULL); + if (!success) goto cleanup; + clusterManagerLogInfo(">>> Setting %d as STABLE in " + "%s:%d\n", slot, n->ip, n->port); + + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } + } else { + int try_to_close_slot = (listLength(importing) == 0 && + listLength(migrating) == 1); + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", slot, 10); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) { + if (success) try_to_close_slot = (r->elements == 0); + freeReplyObject(r); + } + if (!success) goto cleanup; + } + /* Case 3: There are no slots claiming to be in importing state, but + * there is a migrating node that actually don't have any key. We + * can just close the slot, probably a reshard interrupted in the middle. */ + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } else { + success = 0; + clusterManagerLogErr("[ERR] Sorry, redis-cli can't fix this slot " + "yet (work in progress). Slot is set as " + "migrating in %s, as importing in %s, " + "owner is %s:%d\n", migrating_str, + importing_str, owner->ip, owner->port); + } + } +cleanup: + listRelease(owners); + listRelease(migrating); + listRelease(importing); + sdsfree(migrating_str); + sdsfree(importing_str); + return success; +} + +static int clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); - if (!ln) return; + if (!ln) return 0; + int result = 1; + int do_fix = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX; clusterManagerNode *node = ln->value; clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); @@ -3124,6 +3672,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); clusterManagerOnError(err); + result = 0; } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -3174,6 +3723,7 @@ static void clusterManagerCheckCluster(int quiet) { } } if (open_slots != NULL) { + result = 0; dictIterator *iter = dictGetIterator(open_slots); dictEntry *entry; sds errstr = sdsnew("[WARNING] The following slots are open: "); @@ -3185,6 +3735,17 @@ static void clusterManagerCheckCluster(int quiet) { } clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); + if (do_fix) { + // Fix open slots. + dictReleaseIterator(iter); + iter = dictGetIterator(open_slots); + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + result = clusterManagerFixOpenSlot(atoi(slot)); + if (!result) break; + } + } + dictReleaseIterator(iter); dictRelease(open_slots); } clusterManagerLogInfo(">>> Check slots coverage...\n"); @@ -3200,7 +3761,16 @@ static void clusterManagerCheckCluster(int quiet) { "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); clusterManagerOnError(err); + result = 0; + if (do_fix/* && result*/) { + dictType dtype = clusterManagerDictType; + dtype.valDestructor = dictListDestructor; + clusterManagerUncoveredSlots = dictCreate(&dtype, NULL); + int fixed = clusterManagerFixSlotsCoverage(slots); + if (fixed > 0) result = 1; + } } + return result; } static clusterManagerNode *clusterNodeForResharding(char *id, @@ -3546,12 +4116,7 @@ assign_replicas: } clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); - printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); - fflush(stdout); - char buf[4]; - int nread = read(fileno(stdin),buf,4); - buf[3] = '\0'; - if (nread != 0 && !strcmp("yes", buf)) { + if (confirmWithYes("Can I set the above configuration?")) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3674,13 +4239,17 @@ static int clusterManagerCommandCheck(int argc, char **argv) { clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); - clusterManagerCheckCluster(0); - return 1; + return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } +static int clusterManagerCommandFix(int argc, char **argv) { + config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX; + return clusterManagerCommandCheck(argc, argv); +} + static int clusterManagerCommandReshard(int argc, char **argv) { int port = 0; char *ip = NULL; From 68f53b8be1df5357387589601f7347173f71b79d Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:25:25 +0200 Subject: [PATCH 52/66] Cluster Manager: import command --- src/Makefile | 2 +- src/redis-cli.c | 216 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 195 insertions(+), 23 deletions(-) diff --git a/src/Makefile b/src/Makefile index 14112aa1f..269a70933 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 8d5732c20..08a356eb1 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,7 +74,7 @@ #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ - "Invalid arguments: you need to pass either a valid " \ + "[ERR] Invalid arguments: you need to pass either a valid " \ "address (ie. 120.0.0.1:7000) or space separated IP " \ "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) @@ -115,7 +115,9 @@ #define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 #define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 #define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_REPLACE 1 << 6 +#define CLUSTER_MANAGER_CMD_FLAG_COPY 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 8 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -237,6 +239,8 @@ static long getLongInfoField(char *info, char *field); * Utility functions *--------------------------------------------------------------------------- */ +uint16_t crc16(const char *buf, int len); + static long long ustime(void) { struct timeval tv; long long ust; @@ -1325,6 +1329,12 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-simulate")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-replace")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_REPLACE; + } else if (!strcmp(argv[i],"--cluster-copy")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_COPY; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1870,6 +1880,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1892,6 +1903,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -2383,6 +2396,37 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } +/* ----------------------------------------------------------------------------- + * Key space handling + * -------------------------------------------------------------------------- */ + +/* We have 16384 hash slots. The hash slot of a given key is obtained + * as the least significant 14 bits of the crc16 of the key. + * + * However if the key contains the {...} pattern, only the part between + * { and } is hashed. This may be useful in the future to force certain + * keys to be in the same node (assuming no resharding is in progress). */ +static unsigned int keyHashSlot(char *key, int keylen) { + int s, e; /* start-end indexes of { and } */ + + for (s = 0; s < keylen; s++) + if (key[s] == '{') break; + + /* No '{' ? Hash the whole key. This is the base case. */ + if (s == keylen) return crc16(key,keylen) & 0x3FFF; + + /* '{' found? Check if we have the corresponding '}'. */ + for (e = s+1; e < keylen; e++) + if (key[e] == '}') break; + + /* No '}' or nothing between {} ? Hash the whole key. */ + if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF; + + /* If we are here there is both a { and a } on its right. Hash + * what is in the middle between { and }. */ + return crc16(key+s+1,e-s-1) & 0x3FFF; +} + static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -3533,8 +3577,8 @@ static int clusterManagerFixOpenSlot(int slot) { } // Use ADDSLOTS to assign the slot. - printf("*** Configuring %s:%d as the slot owner\n", owner->ip, - owner->port); + clusterManagerLogWarn("*** Configuring %s:%d as the slot owner\n", + owner->ip, owner->port); redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " "SETSLOT %d %s", slot, "STABLE"); @@ -4527,7 +4571,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { if (over_threshold) threshold_reached = 1; } if (!threshold_reached) { - clusterManagerLogErr("*** No rebalancing needed! " + clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); result = 0; @@ -4586,7 +4630,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { listRelease(lsrc); int table_len = (int) listLength(table); if (!table || table_len != numslots) { - clusterManagerLogErr("*** Assertio failed: Reshard table " + clusterManagerLogErr("*** Assertion failed: Reshard table " "!= number of slots"); result = 0; goto end_move; @@ -4629,23 +4673,148 @@ invalid_args: return 0; } -static int clusterManagerCommandCall(int argc, char **argv) { - int port = 0; - char *ip = NULL; - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - int i; - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else { - fprintf(stderr, - "Invalid arguments: first agrumnt must be host:port.\n"); - return 0; +static int clusterManagerCommandImport(int argc, char **argv) { + int success = 1; + int port = 0, src_port = 0; + char *ip = NULL, *src_ip = NULL; + char *invalid_args_msg = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) { + invalid_args_msg = CLUSTER_MANAGER_INVALID_HOST_ARG; + goto invalid_args; } + if (config.cluster_manager_command.from == NULL) { + invalid_args_msg = "[ERR] Option '--cluster-from' is required for " + "subcommand 'import'.\n"; + goto invalid_args; + } + char *src_host[] = {config.cluster_manager_command.from}; + if (!getClusterHostFromCmdArgs(1, src_host, &src_ip, &src_port)) { + invalid_args_msg = "[ERR] Invalid --cluster-from host. You need to " + "pass a valid address (ie. 120.0.0.1:7000).\n"; + goto invalid_args; + } + clusterManagerLogInfo(">>> Importing data from %s:%d to cluster %s:%d\n", + src_ip, src_port, ip, port); + + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + char *reply_err = NULL; + redisReply *src_reply = NULL; + // Connect to the source node. + redisContext *src_ctx = redisConnect(src_ip, src_port); + if (src_ctx->err) { + success = 0; + fprintf(stderr,"Could not connect to Redis at %s:%d: %s.\n", src_ip, + src_port, src_ctx->errstr); + goto cleanup; + } + src_reply = reconnectingRedisCommand(src_ctx, "INFO"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + if (getLongInfoField(src_reply->str, "cluster_enabled")) { + clusterManagerLogErr("[ERR] The source node should not be a " + "cluster node.\n"); + success = 0; + goto cleanup; + } + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "DBSIZE"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + int size = src_reply->integer, i; + clusterManagerLogWarn("*** Importing %d keys from DB 0\n", size); + + // Build a slot -> node map + clusterManagerNode *slots_map[CLUSTER_MANAGER_SLOTS]; + memset(slots_map, 0, sizeof(slots_map) / sizeof(clusterManagerNode *)); + listIter li; + listNode *ln; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots_count == 0) continue; + if (n->slots[i]) { + slots_map[i] = n; + break; + } + } + } + + char cmdfmt[50] = "MIGRATE %s %d %s %d %d"; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COPY) + strcat(cmdfmt, " %s"); + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLACE) + strcat(cmdfmt, " %s"); + + /* Use SCAN to iterate over the keys, migrating to the + * right node as needed. */ + int cursor = -999, timeout = config.cluster_manager_command.timeout; + while (cursor != 0) { + if (cursor < 0) cursor = 0; + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "SCAN %d COUNT %d", + cursor, 1000); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + assert(src_reply->type == REDIS_REPLY_ARRAY); + assert(src_reply->elements >= 2); + assert(src_reply->element[1]->type == REDIS_REPLY_ARRAY); + if (src_reply->element[0]->type == REDIS_REPLY_STRING) + cursor = atoi(src_reply->element[0]->str); + else if (src_reply->element[0]->type == REDIS_REPLY_INTEGER) + cursor = src_reply->element[0]->integer; + int keycount = src_reply->element[1]->elements; + for (i = 0; i < keycount; i++) { + redisReply *kr = src_reply->element[1]->element[i]; + assert(kr->type == REDIS_REPLY_STRING); + char *key = kr->str; + uint16_t slot = keyHashSlot(key, kr->len); + clusterManagerNode *target = slots_map[slot]; + printf("Migrating %s to %s:%d: ", key, target->ip, target->port); + redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, + target->ip, target->port, + key, 0, timeout, + "COPY", "REPLACE"); + if (!r || r->type == REDIS_REPLY_ERROR) { + if (r && r->str) { + clusterManagerLogErr("Source %s:%d replied with " + "error:\n%s\n", src_ip, src_port, + r->str); + } + success = 0; + } + freeReplyObject(r); + if (!success) goto cleanup; + clusterManagerLogOk("OK\n"); + } + } +cleanup: + if (reply_err) + clusterManagerLogErr("Source %s:%d replied with error:\n%s\n", + src_ip, src_port, reply_err); + if (src_ctx) redisFree(src_ctx); + if (src_reply) freeReplyObject(src_reply); + return success; +invalid_args: + fprintf(stderr, "%s", invalid_args_msg); + return 0; +} + +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0, i; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; @@ -4677,6 +4846,9 @@ static int clusterManagerCommandCall(int argc, char **argv) { } zfree(argvlen); return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; } static int clusterManagerCommandHelp(int argc, char **argv) { From d2d9fee4bc885a4504862b4fa8e986e938da6a7f Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:53:24 +0200 Subject: [PATCH 53/66] Cluster Manager: added clusterManagerCheckCluster to import command --- src/redis-cli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 08a356eb1..9d93f29bc 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4698,6 +4698,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; char *reply_err = NULL; redisReply *src_reply = NULL; // Connect to the source node. From 3813fe12c4a3be1fb6de8c7d14b0b4c7339837dd Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 17:08:53 +0200 Subject: [PATCH 54/66] Cluster Manager: add-node command. --- src/redis-cli.c | 168 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 154 insertions(+), 14 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9d93f29bc..da2421c72 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -165,6 +165,7 @@ typedef struct clusterManagerCommand { char *from; char *to; char **weight; + char *master_id; int weight_argc; int slots; int timeout; @@ -1299,6 +1300,8 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-master-id") && !lastarg) { + config.cluster_manager_command.master_id = argv[++i]; } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { @@ -1335,6 +1338,9 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-copy")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_COPY; + } else if (!strcmp(argv[i],"--cluster-slave")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SLAVE; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1847,6 +1853,8 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name); static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); @@ -1875,6 +1883,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, /* Cluster Manager commands. */ static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandAddNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1895,6 +1904,8 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3030,8 +3041,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { - char *msg = (e ? e : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); + clusterManagerPrintNotClusterNodeError(node, e); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -3313,6 +3323,27 @@ static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, return node; } +/* This function returns the master that has the least number of replicas + * in the cluster. If there are multiple masters with the same smaller + * number of replicas, one at random is returned. */ + +static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { + clusterManagerNode *node = NULL; + int lowest_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (node == NULL || n->replicas_count < lowest_count) { + node = n; + lowest_count = n->replicas_count; + } + } + return node; +} + static int clusterManagerFixSlotsCoverage(char *all_slots) { int i, fixed = 0; list *none = NULL, *single = NULL, *multi = NULL; @@ -3966,6 +3997,26 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, array->nodes[array->count++] = node; } +static void clusterManagerPrintNotEmptyNodeError(clusterManagerNode *node, + char *err) +{ + char *msg; + if (err) msg = err; + else { + msg = "is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err) +{ + char *msg = (err ? err : "is not configured as a cluster node."); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + /* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; @@ -4008,8 +4059,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { - char *msg = (err ? err : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotClusterNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4025,14 +4075,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } err = NULL; if (!clusterManagerNodeIsEmpty(node, &err)) { - char *msg; - if (err) msg = err; - else { - msg = "is not empty. Either the node already knows other " - "nodes (check with CLUSTER NODES) or contains some " - "key in database 0."; - } - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotEmptyNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4263,6 +4306,104 @@ cleanup: return success; } +static int clusterManagerCommandAddNode(int argc, char **argv) { + int success = 1; + redisReply *reply = NULL; + char *ref_ip = NULL, *ip = NULL; + int ref_port = 0, port = 0; + if (!getClusterHostFromCmdArgs(argc - 1, argv + 1, &ref_ip, &ref_port)) + goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) + goto invalid_args; + clusterManagerLogInfo(">>> Adding node %s:%d to cluster %s:%d\n", ip, port, + ref_ip, ref_port); + // Check the existing cluster + clusterManagerNode *refnode = clusterManagerNewNode(ref_ip, ref_port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; + + /* If --cluster-master-id was specified, try to resolve it now so that we + * abort before starting with the node configuration. */ + clusterManagerNode *master_node = NULL; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVE) { + char *master_id = config.cluster_manager_command.master_id; + if (master_id != NULL) { + master_node = clusterManagerNodeByName(master_id); + if (master_node == NULL) { + clusterManagerLogErr("[ERR] No such master ID %s\n", master_id); + return 0; + } + } else { + master_node = clusterManagerNodeWithLeastReplicas(); + assert(master_node != NULL); + printf("Automatically selected master %s:%d\n", master_node->ip, + master_node->port); + } + } + + // Add the new node + clusterManagerNode *new_node = clusterManagerNewNode(ip, port); + int added = 0; + CLUSTER_MANAGER_NODE_CONNECT(new_node); + if (new_node->context->err) { + clusterManagerLogErr("[ERR] Sorry, can't connect to node %s:%d\n", + ip, port); + success = 0; + goto cleanup; + } + char *err = NULL; + if (!(success = clusterManagerNodeIsCluster(new_node, &err))) { + clusterManagerPrintNotClusterNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + if (!clusterManagerNodeLoadInfo(new_node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(new_node, err); + zfree(err); + } + success = 0; + goto cleanup; + } + if (!(success = clusterManagerNodeIsEmpty(new_node, &err))) { + clusterManagerPrintNotEmptyNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + clusterManagerNode *first = listFirst(cluster_manager.nodes)->value; + listAddNodeTail(cluster_manager.nodes, new_node); + added = 1; + + // Send CLUSTER MEET command to the new node + clusterManagerLogInfo(">>> Send CLUSTER MEET to node %s:%d to make it " + "join the cluster.\n", ip, port); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER MEET %s %d", + first->ip, first->port); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + + /* Additional configuration is needed if the node is added as a slave. */ + if (master_node) { + sleep(1); + clusterManagerWaitForClusterJoin(); + clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n", + master_node->ip, master_node->port); + freeReplyObject(reply); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s", + master_node->name); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + } + clusterManagerLogOk("[OK] New node added correctly.\n"); +cleanup: + if (!added && new_node) freeClusterManagerNode(new_node); + if (reply) freeReplyObject(reply); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -4531,8 +4672,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { nodes_involved++; listAddNodeTail(involved, n); } - weightedNodes = zmalloc(nodes_involved * - sizeof(clusterManagerNode *)); + weightedNodes = zmalloc(nodes_involved * sizeof(clusterManagerNode *)); if (weightedNodes == NULL) goto cleanup; /* Check cluster, only proceed if it looks sane. */ clusterManagerCheckCluster(1); From 6ed5e32a5d6cedc38e62082b4ff37606d6b59898 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 18:22:44 +0200 Subject: [PATCH 55/66] - Cluster Manager: del-node command. - Cluster Manager: fixed bug in clusterManagerNodeWithLeastReplicas --- src/redis-cli.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index da2421c72..9a1ab0fdb 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1884,6 +1884,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandAddNode(int argc, char **argv); +static int clusterManagerCommandDeleteNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1906,6 +1907,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { "replicas "}, {"add-node", clusterManagerCommandAddNode, 2, "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3335,7 +3337,7 @@ static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; - if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; if (node == NULL || n->replicas_count < lowest_count) { node = n; lowest_count = n->replicas_count; @@ -4404,6 +4406,73 @@ invalid_args: return 0; } +static int clusterManagerCommandDeleteNode(int argc, char **argv) { + UNUSED(argc); + int success = 1; + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + char *node_id = argv[1]; + clusterManagerLogInfo(">>> Removing node %s from cluster %s:%d\n", + node_id, ip, port); + clusterManagerNode *ref_node = clusterManagerNewNode(ip, port); + clusterManagerNode *node = NULL; + + // Load cluster information + if (!clusterManagerLoadInfoFromNode(ref_node, 0)) return 0; + + // Check if the node exists and is not empty + node = clusterManagerNodeByName(node_id); + if (node == NULL) { + clusterManagerLogErr("[ERR] No such node ID %s\n", node_id); + return 0; + } + if (node->slots_count != 0) { + clusterManagerLogErr("[ERR] Node %s:%d is not empty! Reshard data " + "away and try again.\n", node->ip, node->port); + return 0; + } + + // Send CLUSTER FORGET to all the nodes but the node to remove + clusterManagerLogInfo(">>> Sending CLUSTER FORGET messages to the " + "cluster...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == node) continue; + if (n->replicate && !strcasecmp(n->replicate, node_id)) { + // Reconfigure the slave to replicate with some other node + clusterManagerNode *master = clusterManagerNodeWithLeastReplicas(); + //TODO: check whether master could be the same as node + assert(master != NULL); + clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", + n->ip, n->port, master->ip, master->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s", + master->name); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER FORGET %s", + node_id); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + + // Finally shutdown the node + clusterManagerLogInfo(">>> SHUTDOWN the node.\n"); + redisReply *r = redisCommand(node->context, "SHUTDOWN"); + success = clusterManagerCheckRedisReply(node, r, NULL); + if (r) freeReplyObject(r); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -5026,6 +5095,9 @@ static int clusterManagerCommandHelp(int argc, char **argv) { } } } + fprintf(stderr, "\nFor check, fix, reshard, del-node, set-timeout you " + "can specify the host and port of any working node in " + "the cluster.\n\n"); return 0; } From 08e41b49cf2506f2a8bb628e4c33a9d42866ee49 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 13 Apr 2018 16:09:22 +0200 Subject: [PATCH 56/66] Cluster Manager: set-timeout command --- src/redis-cli.c | 70 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9a1ab0fdb..dba8781f1 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1890,6 +1890,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandSetTimeout(int argc, char **argv); static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1905,21 +1906,23 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, - {"add-node", clusterManagerCommandAddNode, 2, - "new_host:new_port existing_host:existing_port", "slave,master-id "}, - {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, - {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, - {"import", clusterManagerCommandImport, 1, "host:port", - "from ,copy,replace"}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, + {"set-timeout", clusterManagerCommandSetTimeout, 2, + "host:port milliseconds", NULL}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -4882,6 +4885,61 @@ invalid_args: return 0; } +static int clusterManagerCommandSetTimeout(int argc, char **argv) { + UNUSED(argc); + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + int timeout = atoi(argv[1]); + if (timeout < 100) { + fprintf(stderr, "Setting a node timeout of less than 100 " + "milliseconds is a bad idea.\n"); + return 0; + } + // Load cluster information + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int ok_count = 0, err_count = 0; + + clusterManagerLogInfo(">>> Reconfiguring node timeout in every " + "cluster node...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + char *err = NULL; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s %s %d", + "SET", + "cluster-node-timeout", + timeout); + if (reply == NULL) goto reply_err; + int ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s", "REWRITE"); + if (reply == NULL) goto reply_err; + ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + clusterManagerLogWarn("*** New timeout set for %s:%d\n", n->ip, + n->port); + ok_count++; + continue; +reply_err: + if (err == NULL) err = ""; + clusterManagerLogErr("ERR setting node-timeot for %s:%d: %s\n", n->ip, + n->port, err); + err_count++; + } + clusterManagerLogInfo(">>> New node timeout set. %d OK, %d ERR.\n", + ok_count, err_count); + return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandImport(int argc, char **argv) { int success = 1; int port = 0, src_port = 0; From be2f5c3ee34f62008fd53d4ddff63809a86cea40 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 19 Apr 2018 18:52:01 +0200 Subject: [PATCH 57/66] Cluster Manager: code improvements and more comments added. --- src/redis-cli.c | 66 +++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index dba8781f1..07732367a 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -68,7 +68,7 @@ #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" -#define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 @@ -172,6 +172,7 @@ typedef struct clusterManagerCommand { int pipeline; float threshold; } clusterManagerCommand; + static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1788,7 +1789,7 @@ static int evalMode(int argc, char **argv) { /* The Cluster Manager global structure */ static struct clusterManager { - list *nodes; /* List of nodes int he configuration. */ + list *nodes; /* List of nodes in the configuration. */ list *errors; } cluster_manager; @@ -1821,7 +1822,7 @@ typedef struct clusterManagerNode { int balance; /* Used by rebalance */ } clusterManagerNode; -/* Data structure used to represent a sequence of nodes. */ +/* Data structure used to represent a sequence of cluster nodes. */ typedef struct clusterManagerNodeArray { clusterManagerNode **nodes; /* Actual nodes array */ clusterManagerNode **alloc; /* Pointer to the allocated memory */ @@ -1829,7 +1830,7 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; -/* Used for reshard table. */ +/* Used for the reshard table. */ typedef struct clusterManagerReshardTableItem { clusterManagerNode *source; int slot; @@ -1865,7 +1866,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); +static void clusterManagerShowClusterInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static int clusterManagerCheckCluster(int quiet); @@ -2067,8 +2068,9 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } + /* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the - * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * latest case, if the 'err' arg is not NULL, it gets allocated with a copy * of reply error (it's up to the caller function to free it), elsewhere * the error is directly printed. */ static int clusterManagerCheckRedisReply(clusterManagerNode *n, @@ -2100,7 +2102,7 @@ static void clusterManagerRemoveNodeFromList(list *nodelist, } } -/* Return the node with the specified ID or NULL. */ +/* Return the node with the specified name (ID) or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -2121,7 +2123,7 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -/* Like get_node_by_name but the specified name can be just the first +/* Like clusterManagerNodeByName but the specified name can be just the first * part of the node ID as long as the prefix in unique across the * cluster. */ @@ -2152,6 +2154,7 @@ static void clusterManagerNodeResetSlots(clusterManagerNode *node) { node->slots_count = 0; } +/* Call "INFO" redis command on the specified node and return the reply. */ static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, char **err) { @@ -2181,7 +2184,7 @@ static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { redisReply *info = clusterManagerGetNodeRedisInfo(node, err); - int is_err = 0, is_empty = 1; + int is_empty = 1; if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; @@ -2190,11 +2193,7 @@ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); if (err != NULL) *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } + if (!clusterManagerCheckRedisReply(node, info, err)) { is_empty = 0; goto result; } @@ -2422,7 +2421,7 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { * However if the key contains the {...} pattern, only the part between * { and } is hashed. This may be useful in the future to force certain * keys to be in the same node (assuming no resharding is in progress). */ -static unsigned int keyHashSlot(char *key, int keylen) { +static unsigned int clusterManagerKeyHashSlot(char *key, int keylen) { int s, e; /* start-end indexes of { and } */ for (s = 0; s < keylen; s++) @@ -2443,6 +2442,7 @@ static unsigned int keyHashSlot(char *key, int keylen) { return crc16(key+s+1,e-s-1) & 0x3FFF; } +/* Return a string representation of the cluster node. */ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -2484,7 +2484,7 @@ static void clusterManagerShowNodes(void) { } } -static void clusterManagerShowInfo(void) { +static void clusterManagerShowClusterInfo(void) { int masters = 0; int keys = 0; listIter li; @@ -2533,11 +2533,12 @@ static void clusterManagerShowInfo(void) { printf("%.2f keys per slot on average.\n", keys_per_slot); } +/* Flush dirty slots configuration of the node by calling CLUSTER ADDSLOTS */ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0, success = 1; + int success = 1; /* First two args are used for the command itself. */ int argc = node->slots_count + 2; sds *argv = zmalloc(argc * sizeof(*argv)); @@ -2566,14 +2567,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } reply = (redisReply*) _reply; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } - success = 0; - goto cleanup; - } + success = clusterManagerCheckRedisReply(node, reply, err); cleanup: zfree(argvlen); if (argv != NULL) { @@ -2821,7 +2815,7 @@ static int clusterManagerMoveSlot(clusterManagerNode *source, } /* Flush the dirty node configuration by calling replicate for slaves or - * adding the slots for masters. */ + * adding the slots defined in the masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; @@ -2852,6 +2846,7 @@ cleanup: return success; } +/* Wait until the cluster configuration is consistent. */ static void clusterManagerWaitForClusterJoin(void) { printf("Waiting for the cluster to join\n"); while(!clusterManagerIsConfigConsistent()) { @@ -2871,13 +2866,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0, success = 1; + int success = 1; *err = NULL; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } + if (!clusterManagerCheckRedisReply(node, reply, err)) { success = 0; goto cleanup; } @@ -3114,6 +3105,7 @@ invalid_friend: return 1; } +/* Compare functions used by various sorting operations. */ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { const char **i1 = (const char **)slot1; const char **i2 = (const char **)slot2; @@ -3252,6 +3244,7 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +/* Add the error string to cluster_manager.errors and print it. */ static void clusterManagerOnError(sds err) { if (cluster_manager.errors == NULL) cluster_manager.errors = listCreate(); @@ -3259,6 +3252,9 @@ static void clusterManagerOnError(sds err) { clusterManagerLogErr("%s\n", (char *) err); } +/* Check the slots coverage of the cluster. The 'all_slots' argument must be + * and array of 16384 bytes. Every covered slot will be set to 1 in the + * 'all_slots' array. The function returns the total number if covered slots.*/ static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -4482,7 +4478,7 @@ static int clusterManagerCommandInfo(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return 1; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -4495,7 +4491,7 @@ static int clusterManagerCommandCheck(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5047,7 +5043,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { redisReply *kr = src_reply->element[1]->element[i]; assert(kr->type == REDIS_REPLY_STRING); char *key = kr->str; - uint16_t slot = keyHashSlot(key, kr->len); + uint16_t slot = clusterManagerKeyHashSlot(key, kr->len); clusterManagerNode *target = slots_map[slot]; printf("Migrating %s to %s:%d: ", key, target->ip, target->port); redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, From d7ae96327b7bcee4f45e95334089832c560daa22 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 18:08:30 +0200 Subject: [PATCH 58/66] Cluster Manager: fixed bug when parsing CLUSTER NODES reply (clusterManagerNodeLoadInfo) --- src/redis-cli.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 07732367a..adb2095e1 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2922,6 +2922,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, line = p + 1; remaining--; } else line = p; + char *dash = NULL; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating @@ -2953,7 +2954,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, node->importing[node->importing_count - 1] = src; } - } else if ((p = strchr(slotsdef, '-')) != NULL) { + } else if ((dash = strchr(slotsdef, '-')) != NULL) { + p = dash; int start, stop; *p = '\0'; start = atoi(slotsdef); @@ -5078,7 +5080,7 @@ invalid_args: static int clusterManagerCommandCall(int argc, char **argv) { int port = 0, i; char *ip = NULL; - if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; From 0886db7ba6a35a13909a2fcbe520cae3ef5ff133 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:25:08 +0200 Subject: [PATCH 59/66] Cluster Manager: fixed expected slots calculation (rebalance) Cluster Manager: fixed argument parsing after --cluster-weight --- src/redis-cli.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index adb2095e1..bdc4b7b45 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1318,6 +1318,7 @@ static int parseOptions(int argc, char **argv) { if (wargc > 0) { config.cluster_manager_command.weight = weight; config.cluster_manager_command.weight_argc = wargc; + i += wargc; } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); @@ -4724,7 +4725,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { int nodes_involved = 0; int use_empty = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; - involved = listCreate(); listIter li; listNode *ln; @@ -4762,15 +4762,15 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; weightedNodes[i++] = n; - int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * - (int) n->weight); + int expected = (int) (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + n->weight); n->balance = n->slots_count - expected; total_balance += n->balance; /* Compute the percentage of difference between the * expected number of slots and the real one, to see * if it's over the threshold specified by the user. */ int over_threshold = 0; - if (config.cluster_manager_command.threshold > 0) { + if (threshold > 0) { if (n->slots_count > 0) { float err_perc = fabs((100-(100.0*expected/n->slots_count))); if (err_perc > threshold) over_threshold = 1; @@ -4784,7 +4784,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); - result = 0; goto cleanup; } /* Because of rounding, it is possible that the balance of all nodes From 2ba7fbfb94b5443a1f53aa16247f305ccf5f546c Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:29:42 +0200 Subject: [PATCH 60/66] Cluster tests now using redis-cli instead of redis-trib --- tests/cluster/tests/04-resharding.tcl | 10 +++++----- tests/cluster/tests/12-replica-migration-2.tcl | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl index 0ccbf717d..68fba135e 100644 --- a/tests/cluster/tests/04-resharding.tcl +++ b/tests/cluster/tests/04-resharding.tcl @@ -73,12 +73,12 @@ test "Cluster consistency during live resharding" { flush stdout set target [dict get [get_myself [randomInt 5]] id] set tribpid [lindex [exec \ - ../../../src/redis-trib.rb reshard \ - --from all \ - --to $target \ - --slots 100 \ - --yes \ + ../../../src/redis-cli --cluster reshard \ 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-from all \ + --cluster-to $target \ + --cluster-slots 100 \ + --cluster-yes \ | [info nameofexecutable] \ ../tests/helpers/onlydots.tcl \ &] 0] diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl index 48ecd1d50..3d8b7b04b 100644 --- a/tests/cluster/tests/12-replica-migration-2.tcl +++ b/tests/cluster/tests/12-replica-migration-2.tcl @@ -31,9 +31,9 @@ test "Each master should have at least two replicas attached" { set master0_id [dict get [get_myself 0] id] test "Resharding all the master #0 slots away from it" { set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=0 \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=0 >@ stdout ] } test "Master #0 should lose its replicas" { @@ -49,10 +49,10 @@ test "Resharding back some slot to master #0" { # new resharding. after 10000 set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=.01 \ - --use-empty-masters \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=.01 \ + --cluster-use-empty-masters >@ stdout] } test "Master #0 should re-acquire one or more replicas" { From ed6f173da4f65c146b25d62d6432ea5cca5cf041 Mon Sep 17 00:00:00 2001 From: artix Date: Mon, 7 May 2018 15:56:12 +0200 Subject: [PATCH 61/66] - Updated create-cluster with redis-cli - Updated README --- utils/create-cluster/README | 2 +- utils/create-cluster/create-cluster | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/create-cluster/README b/utils/create-cluster/README index f2a89839b..e682f6dc9 100644 --- a/utils/create-cluster/README +++ b/utils/create-cluster/README @@ -15,7 +15,7 @@ To create a cluster, follow these steps: 1. Edit create-cluster and change the start / end port, depending on the number of instances you want to create. 2. Use "./create-cluster start" in order to run the instances. -3. Use "./create-cluster create" in order to execute redis-trib create, so that +3. Use "./create-cluster create" in order to execute redis-cli --cluster create, so that an actual Redis cluster will be created. 4. Now you are ready to play with the cluster. AOF files and logs for each instances are created in the current directory. diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster index d821683f6..468f924a4 100755 --- a/utils/create-cluster/create-cluster +++ b/utils/create-cluster/create-cluster @@ -34,7 +34,7 @@ then PORT=$((PORT+1)) HOSTS="$HOSTS 127.0.0.1:$PORT" done - ../../src/redis-trib.rb create --replicas $REPLICAS $HOSTS + ../../src/redis-cli --cluster create $HOSTS --cluster-replicas $REPLICAS exit 0 fi @@ -94,7 +94,7 @@ fi echo "Usage: $0 [start|create|stop|watch|tail|clean]" echo "start -- Launch Redis Cluster instances." -echo "create -- Create a cluster using redis-trib create." +echo "create -- Create a cluster using redis-cli --cluster create." echo "stop -- Stop Redis Cluster instances." echo "watch -- Show CLUSTER NODES output (first 30 lines) of first node." echo "tail -- Run tail -f of instance at base port + ID." From 7627338fb97596ca704c51d967311f9ed807aa7a Mon Sep 17 00:00:00 2001 From: artix Date: Mon, 7 May 2018 17:31:34 +0200 Subject: [PATCH 62/66] Cluster Manager: --cluster options can now be placed everywhere --- src/redis-cli.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index bdc4b7b45..85588fe42 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1293,8 +1293,8 @@ static int parseOptions(int argc, char **argv) { if (CLUSTER_MANAGER_MODE()) usage(); char *cmd = argv[++i]; int j = i; - for (; j < argc; j++) if (argv[j][0] == '-') break; - j--; + while (j < argc && argv[j][0] != '-') j++; + if (j > i) j--; createClusterManagerCommand(cmd, j - i, argv + i + 1); i = j; } else if (!strcmp(argv[i],"--cluster") && lastarg) { @@ -1351,6 +1351,15 @@ static int parseOptions(int argc, char **argv) { printf("redis-cli %s\n", version); sdsfree(version); exit(0); + } else if (CLUSTER_MANAGER_MODE() && argv[i][0] != '-') { + if (config.cluster_manager_command.argc == 0) { + int j = i + 1; + while (j < argc && argv[j][0] != '-') j++; + int cmd_argc = j - i; + config.cluster_manager_command.argc = cmd_argc; + config.cluster_manager_command.argv = argv + i; + if (cmd_argc > 1) i = j - 1; + } } else { if (argv[i][0] == '-') { fprintf(stderr, From c84512a89e99e48801986cc1ef9fdc2782204e28 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 11 May 2018 18:28:10 +0200 Subject: [PATCH 63/66] - Fixed mistyped redis command (clusterManagerGetNodeWithMostKeysInSlot) - Cluster node structure is now updated after ADDSLOTS --- src/redis-cli.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 85588fe42..d591bcd01 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -3316,7 +3316,7 @@ static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) continue; redisReply *r = - CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOTi %d", slot); + CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOT %d", slot); int success = clusterManagerCheckRedisReply(n, r, err); if (success) { if (r->integer > numkeys || node == NULL) { @@ -3446,6 +3446,9 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; if (r) freeReplyObject(r); if (fixed < 0) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + n->slots[atoi(slot)] = 1; fixed++; } } @@ -3474,6 +3477,9 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; if (r) freeReplyObject(r); if (fixed < 0) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + n->slots[atoi(slot)] = 1; fixed++; } } @@ -3513,6 +3519,9 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; if (r) freeReplyObject(r); if (fixed < 0) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + target->slots[atoi(slot)] = 1; listIter nli; listNode *nln; listRewind(nodes, &nli); @@ -3633,6 +3642,9 @@ static int clusterManagerFixOpenSlot(int slot) { success = clusterManagerCheckRedisReply(owner, reply, NULL); if (reply) freeReplyObject(reply); if (!success) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + owner->slots[slot] = 1; /* Make sure this information will propagate. Not strictly needed * since there is no past owner, so all the other nodes will accept * whatever epoch this node will claim the slot with. */ From d7fb3f12611290d3524c41541fd375fdebb07c12 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 15 May 2018 18:41:46 +0200 Subject: [PATCH 64/66] Cluster Manager: print flags as strings. --- src/redis-cli.c | 90 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 72 insertions(+), 18 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index d591bcd01..c108e6735 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1815,6 +1815,7 @@ typedef struct clusterManagerNode { time_t ping_sent; time_t ping_recv; int flags; + list *flags_str; /* Flags string representations */ sds replicate; /* Master ID if node is a slave */ list replicas; int dirty; /* Node has changes that can be flushed */ @@ -2001,6 +2002,17 @@ static int getClusterHostFromCmdArgs(int argc, char **argv, return 1; } +static void freeClusterManagerNodeFlags(list *flags) { + listIter li; + listNode *ln; + listRewind(flags, &li); + while ((ln = listNext(&li)) != NULL) { + sds flag = ln->value; + sdsfree(flag); + } + listRelease(flags); +} + static void freeClusterManagerNode(clusterManagerNode *node) { if (node->context != NULL) redisFree(node->context); if (node->friends != NULL) { @@ -2027,6 +2039,10 @@ static void freeClusterManagerNode(clusterManagerNode *node) { for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]); zfree(node->importing); } + if (node->flags_str != NULL) { + freeClusterManagerNodeFlags(node->flags_str); + node->flags_str = NULL; + } zfree(node); } @@ -2065,6 +2081,7 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->ping_sent = 0; node->ping_recv = 0; node->flags = 0; + node->flags_str = NULL; node->replicate = NULL; node->dirty = 0; node->friends = NULL; @@ -2391,6 +2408,24 @@ cleanup: zfree(offenders); } +/* Return a representable string of the node's flags */ +static sds clusterManagerNodeFlagString(clusterManagerNode *node) { + sds flags = sdsempty(); + if (!node->flags_str) return flags; + int empty = 1; + listIter li; + listNode *ln; + listRewind(node->flags_str, &li); + while ((ln = listNext(&li)) != NULL) { + sds flag = ln->value; + if (strcmp(flag, "myself") == 0) continue; + if (!empty) flags = sdscat(flags, ","); + flags = sdscatfmt(flags, "%S", flag); + empty = 0; + } + return flags; +} + /* Return a representable string of the node's slots */ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { sds slots = sdsempty(); @@ -2466,12 +2501,14 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port); else { slots = clusterManagerNodeSlotsString(node); + sds flags = clusterManagerNodeFlagString(node); info = sdscatfmt(info, "%s: %S %s:%u\n" "%s slots:%S (%u slots) " - "", //TODO: flags string + "%S", role, node->name, node->ip, node->port, spaces, - slots, node->slots_count); + slots, node->slots_count, flags); sdsfree(slots); + sdsfree(flags); } if (node->replicate != NULL) info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); @@ -3008,18 +3045,35 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (currentNode->name) sdsfree(currentNode->name); currentNode->name = sdsnew(name); } - if (strstr(flags, "noaddr") != NULL) - currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; - if (strstr(flags, "disconnected") != NULL) - currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; - if (strstr(flags, "fail") != NULL) - currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; - if (strstr(flags, "slave") != NULL) { - currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; - if (master_id != NULL) { - if (currentNode->replicate) sdsfree(currentNode->replicate); - currentNode->replicate = sdsnew(master_id); + if (currentNode->flags_str != NULL) + freeClusterManagerNodeFlags(currentNode->flags_str); + currentNode->flags_str = listCreate(); + int flag_len; + while ((flag_len = strlen(flags)) > 0) { + sds flag = NULL; + char *fp = strchr(flags, ','); + if (fp) { + *fp = '\0'; + flag = sdsnew(flags); + flags = fp + 1; + } else { + flag = sdsnew(flags); + flags += flag_len; } + if (strcmp(flag, "noaddr") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + else if (strcmp(flag, "disconnected") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + else if (strcmp(flag, "fail") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; + else if (strcmp(flag, "slave") == 0) { + currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; + if (master_id == 0) { + if (currentNode->replicate) sdsfree(currentNode->replicate); + currentNode->replicate = sdsnew(master_id); + } + } + listAddNodeTail(currentNode->flags_str, flag); } if (config_epoch != NULL) currentNode->current_epoch = atoll(config_epoch); @@ -4283,12 +4337,12 @@ assign_replicas: goto cleanup; } } - // Give one second for the join to start, in order to avoid that - // waiting for cluster join will find all the nodes agree about - // the config as they are still empty with unassigned slots. + /* Give one second for the join to start, in order to avoid that + * waiting for cluster join will find all the nodes agree about + * the config as they are still empty with unassigned slots. */ sleep(1); clusterManagerWaitForClusterJoin(); - // Useful for the replicas //TODO: create a function for this? + /* Useful for the replicas */ listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -4315,7 +4369,7 @@ assign_replicas: listEmpty(cluster_manager.nodes); if (!clusterManagerLoadInfoFromNode(first_node, 0)) { success = 0; - goto cleanup; //TODO: msg? + goto cleanup; } clusterManagerCheckCluster(0); } From 6b1be4105568a33ca5448b2797dad16513a10913 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 16 May 2018 17:49:18 +0200 Subject: [PATCH 65/66] Cluster Manager: fixed unprinted reply error --- src/redis-cli.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c108e6735..9ea47ab07 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2773,7 +2773,8 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, strcpy(*err, migrate_reply->str); } printf("\n"); - CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, + migrate_reply->str); } goto next; } @@ -3021,7 +3022,6 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, else break; } else { if (addr == NULL) { - // TODO: find a better err message fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); success = 0; goto cleanup; @@ -4602,7 +4602,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { fflush(stdout); char buf[6]; int nread = read(fileno(stdin),buf,6); - if (!nread) continue; //TODO: nread < 0 + if (nread <= 0) continue; int last_idx = nread - 1; if (buf[last_idx] != '\n') { int ch; @@ -4619,7 +4619,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { printf("What is the receiving node ID? "); fflush(stdout); int nread = read(fileno(stdin),buf,255); - if (!nread) continue; //TODO: nread < 0 + if (nread <= 0) continue; int last_idx = nread - 1; if (buf[last_idx] != '\n') { int ch; @@ -4643,7 +4643,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { printf("Source node #%lu: ", listLength(sources) + 1); fflush(stdout); int nread = read(fileno(stdin),buf,255); - if (!nread) continue; //TODO: nread < 0 + if (nread <= 0) continue; int last_idx = nread - 1; if (buf[last_idx] != '\n') { int ch; @@ -5176,7 +5176,7 @@ static int clusterManagerCommandCall(int argc, char **argv) { redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen); int status = redisGetReply(n->context, (void **)(&reply)); if (status != REDIS_OK || reply == NULL ) - printf("%s:%d: Failed!\n", n->ip, n->port); //TODO: better message? + printf("%s:%d: Failed!\n", n->ip, n->port); else { sds formatted_reply = cliFormatReplyTTY(reply, ""); printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply); From a41999e513dc65111591c447cf69c722ac31d46d Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 16 May 2018 18:04:13 +0200 Subject: [PATCH 66/66] Removed TODO in redis-cli --- src/redis-cli.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9ea47ab07..850b10241 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4522,7 +4522,6 @@ static int clusterManagerCommandDeleteNode(int argc, char **argv) { if (n->replicate && !strcasecmp(n->replicate, node_id)) { // Reconfigure the slave to replicate with some other node clusterManagerNode *master = clusterManagerNodeWithLeastReplicas(); - //TODO: check whether master could be the same as node assert(master != NULL); clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", n->ip, n->port, master->ip, master->port);