From 6a04295fd444b4140563af4bfb65f4aef9058329 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 12 Jan 2018 11:06:24 +0100 Subject: [PATCH 01/66] Cluster Manager mode --- src/redis-cli.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 372d02d97..59abd571e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -65,6 +65,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -77,6 +78,16 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; +/* Cluster Manager command info */ +struct clusterManagerCommand { + char *name; + int argc; + char **argv; + int flags; + int replicas; +}; + + static redisContext *context; static struct config { char *hostip; @@ -119,8 +130,29 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + struct clusterManagerCommand cluster_manager_command; } config; +/* Cluster Manager commands. */ +typedef int clusterManagerCommandProc(int argc, char **argv); +static struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; +}; + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("CLUSTER: create\n"); + printf("Arguments: %d\n", argc); + printf("Replicas: %d\n", config.cluster_manager_command.replicas); + fprintf(stderr, "Not implemented yet!\n"); + return 0; +} + +struct clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2} +}; + /* User preferences. */ static struct pref { int hints; @@ -1061,6 +1093,13 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + struct clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; +} + static int parseOptions(int argc, char **argv) { int i; @@ -1146,6 +1185,18 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--cluster") && !lastarg) { + if (CLUSTER_MANAGER_MODE()) usage(); + char *cmd = argv[++i]; + int j = i; + for (; j < argc; j++) if (argv[j][0] == '-') break; + j--; + createClusterManagerCommand(cmd, j - i, argv + i); + i = j; + } else if (!strcmp(argv[i],"--cluster") && lastarg) { + usage(); + } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { + config.cluster_manager_command.replicas = atoi(argv[++i]); } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1243,9 +1294,13 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" +" --cluster [args...]\n" +" Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" +"Cluster Manager Commands:\n" +"\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" " redis-cli get mypasswd\n" @@ -1569,6 +1624,43 @@ static int evalMode(int argc, char **argv) { return retval; } +/*------------------------------------------------------------------------------ + * Cluster Manager mode + *--------------------------------------------------------------------------- */ + +static clusterManagerCommandProc *validateClusterManagerCommand(void) { + int i, commands_count = sizeof(clusterManagerCommands) / + sizeof(struct clusterManagerCommandDef); + clusterManagerCommandProc *proc = NULL; + char *cmdname = config.cluster_manager_command.name; + int argc = config.cluster_manager_command.argc; + for (i = 0; i < commands_count; i++) { + struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + if (!strcmp(cmddef.name, cmdname)) { + if ((cmddef.arity > 0 && argc != cmddef.arity) || + (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { + fprintf(stderr, "[ERR] Wrong number of arguments for " + "specified --cluster sub command\n"); + return NULL; + } + proc = cmddef.proc; + } + } + if (!proc) fprintf(stderr, "Unknown --cluster subcommand\n"); + return proc; +} + +static void clusterManagerMode(clusterManagerCommandProc *proc) { + int argc = config.cluster_manager_command.argc; + char **argv = config.cluster_manager_command.argv; + if (!proc(argc, argv)) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + exit(0); +} + /*------------------------------------------------------------------------------ * Latency and latency history modes *--------------------------------------------------------------------------- */ @@ -2861,7 +2953,11 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; - + config.cluster_manager_command.name = NULL; + config.cluster_manager_command.argc = 0; + config.cluster_manager_command.argv = NULL; + config.cluster_manager_command.flags = 0; + config.cluster_manager_command.replicas = 0; pref.hints = 1; spectrum_palette = spectrum_palette_color; @@ -2877,6 +2973,17 @@ int main(int argc, char **argv) { argc -= firstarg; argv += firstarg; + /* Cluster Manager mode */ + if (CLUSTER_MANAGER_MODE()) { + clusterManagerCommandProc *proc = validateClusterManagerCommand(); + if (!proc) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + clusterManagerMode(proc); + } + /* Latency mode */ if (config.latency_mode) { if (cliConnect(0) == REDIS_ERR) exit(1); From cb4cfa8eeea1d55ea096416579f962783a69fc92 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 16:26:21 +0100 Subject: [PATCH 02/66] Cluster Manager: 'create', 'info' and 'check' commands --- src/Makefile | 2 +- src/redis-cli.c | 1297 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 1272 insertions(+), 27 deletions(-) diff --git a/src/Makefile b/src/Makefile index b896b1263..a5e0e231a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.c REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 59abd571e..ef917cca5 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -41,13 +41,15 @@ #include #include #include -#include +#include #include #include #include #include #include /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */ +#include "dict.h" +#include "adlist.h" #include "zmalloc.h" #include "linenoise.h" #include "help.h" @@ -65,7 +67,64 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) +#define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_COMMAND(n,...) \ + (reconnectingRedisCommand(n->context, __VA_ARGS__)) +#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) + +#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ + memset(n->slots, 0, sizeof(n->slots)); \ + n->slots_count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ + array->alloc = array->nodes; \ + array->len = alloc_len; \ + array->count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ + if (array->nodes > array->alloc) { \ + array->len = array->nodes - array->alloc; \ + array->nodes = array->alloc; \ + array->count = 0; \ + int i = 0; \ + for(; i < array->len; i++) { \ + if (array->nodes[i] != NULL) array->count++;\ + } \ + } \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) + +#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ + assert(array->nodes < (array->nodes + array->len)); \ + if (*array->nodes != NULL) array->count--; \ + nodeptr = *array->nodes; \ + array->nodes++; \ + array->len--; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ + assert(array->nodes < (array->nodes + array->len)); \ + assert(nodeptr != NULL); \ + array->nodes[array->count++] = nodeptr; \ +} while(0) + +#define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ + fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + +#define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 +#define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_FLAG_FRIEND 1 << 2 +#define CLUSTER_MANAGER_FLAG_NOADDR 1 << 3 +#define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 +#define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -79,13 +138,13 @@ int *spectrum_palette; int spectrum_palette_size; /* Cluster Manager command info */ -struct clusterManagerCommand { +typedef struct clusterManagerCommand { char *name; int argc; char **argv; int flags; int replicas; -}; +} clusterManagerCommand; static redisContext *context; @@ -130,28 +189,70 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; - struct clusterManagerCommand cluster_manager_command; + clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager commands. */ +/* Cluster Manager */ + +static struct clusterManager { + list *nodes; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; + int dirty; + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + list *friends; +} clusterManagerNode; + +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; + clusterManagerNode **alloc; + int len; + int count; +} clusterManagerNodeArray; + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); typedef int clusterManagerCommandProc(int argc, char **argv); -static struct clusterManagerCommandDef { +typedef struct clusterManagerCommandDef { char *name; clusterManagerCommandProc *proc; int arity; -}; + char *args; + char *options; +} clusterManagerCommandDef; +static int clusterManagerIsConfigConsistent(void); -static int clusterManagerCommandCreate(int argc, char **argv) { - printf("CLUSTER: create\n"); - printf("Arguments: %d\n", argc); - printf("Replicas: %d\n", config.cluster_manager_command.replicas); - fprintf(stderr, "Not implemented yet!\n"); - return 0; -} +/* Cluster Manager commands. */ -struct clusterManagerCommandDef clusterManagerCommands[] = { - {"create", clusterManagerCommandCreate, -2} -}; +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ static struct pref { @@ -165,6 +266,9 @@ char *redisGitSHA1(void); char *redisGitDirty(void); static int cliConnect(int force); +static char *getInfoField(char *info, char *field); +static long getLongInfoField(char *info, char *field); + /*------------------------------------------------------------------------------ * Utility functions *--------------------------------------------------------------------------- */ @@ -317,6 +421,36 @@ static void parseRedisUri(const char *uri) { config.dbnum = atoi(curr); } +static uint64_t dictSdsHash(const void *key) { + return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + int l1,l2; + DICT_NOTUSED(privdata); + + l1 = sdslen((sds)key1); + l2 = sdslen((sds)key2); + if (l1 != l2) return 0; + return memcmp(key1, key2, l1) == 0; +} + +static void dictSdsDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + sdsfree(val); +} + +/* _serverAssert is needed by dict */ +void _serverAssert(const char *estr, const char *file, int line) { + fprintf(stderr, "=== ASSERTION FAILED ==="); + fprintf(stderr, "==> %s:%d '%s' is not true",file,line,estr); + *((char*)-1) = 'x'; +} + /*------------------------------------------------------------------------------ * Help functions *--------------------------------------------------------------------------- */ @@ -1094,7 +1228,7 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. *--------------------------------------------------------------------------- */ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - struct clusterManagerCommand *cmd = &config.cluster_manager_command; + clusterManagerCommand *cmd = &config.cluster_manager_command; cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; @@ -1191,7 +1325,7 @@ static int parseOptions(int argc, char **argv) { int j = i; for (; j < argc; j++) if (argv[j][0] == '-') break; j--; - createClusterManagerCommand(cmd, j - i, argv + i); + createClusterManagerCommand(cmd, j - i, argv + i + 1); i = j; } else if (!strcmp(argv[i],"--cluster") && lastarg) { usage(); @@ -1300,6 +1434,7 @@ static void usage(void) { " --version Output version and exit.\n" "\n" "Cluster Manager Commands:\n" +" Use --cluster help to list all available cluster manager commands.\n" "\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" @@ -1628,14 +1763,22 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", + "cluster-replicas"}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"help", clusterManagerCommandHelp, 0, NULL, NULL} +}; + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / - sizeof(struct clusterManagerCommandDef); + sizeof(clusterManagerCommandDef); clusterManagerCommandProc *proc = NULL; char *cmdname = config.cluster_manager_command.name; int argc = config.cluster_manager_command.argc; for (i = 0; i < commands_count; i++) { - struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + clusterManagerCommandDef cmddef = clusterManagerCommands[i]; if (!strcmp(cmddef.name, cmdname)) { if ((cmddef.arity > 0 && argc != cmddef.arity) || (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { @@ -1650,15 +1793,1117 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +static void freeClusterManagerNode(clusterManagerNode *node) { + if (node->context != NULL) redisFree(node->context); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *fn = ln->value; + freeClusterManagerNode(fn); + } + listRelease(node->friends); + node->friends = NULL; + } + if (node->name != NULL) sdsfree(node->name); + if (node->replicate != NULL) sdsfree(node->replicate); + if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) + sdsfree(node->ip); + zfree(node); +} + +static void freeClusterManager(void) { + if (cluster_manager.nodes != NULL) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + freeClusterManagerNode(n); + } + listRelease(cluster_manager.nodes); + cluster_manager.nodes = NULL; + } +} + +static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { + clusterManagerNode *node = zmalloc(sizeof(*node)); + node->context = NULL; + node->name = NULL; + node->ip = ip; + node->port = port; + node->current_epoch = 0; + node->ping_sent = 0; + node->ping_recv = 0; + node->flags = 0; + node->replicate = NULL; + node->dirty = 0; + node->friends = NULL; + CLUSTER_MANAGER_RESET_SLOTS(node); + return node; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + freeReplyObject(info); + return 0; + } + int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); + freeReplyObject(info); + return is_cluster; +} + +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0, is_empty = 1; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + if (strstr(info->str, "db0:") != NULL) { + is_empty = 0; + goto result; + } + freeReplyObject(info); + info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + long known_nodes = getLongInfoField(info->str, "cluster_known_nodes"); + is_empty = (known_nodes == 1); +result: + freeReplyObject(info); + return is_empty; +} + +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len) +{ + assert(offending != NULL); + int score = 0, i, j; + int node_len = cluster_manager.nodes->len; + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + clusterManagerNode **offending_p = *offending; + dictType dtype = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ + }; + for (i = 0; i < ip_len; i++) { + clusterManagerNodeArray *node_array = &(ipnodes[i]); + dict *related = dictCreate(&dtype, NULL); + char *ip = NULL; + for (j = 0; j < node_array->len; j++) { + clusterManagerNode *node = node_array->nodes[j]; + if (node == NULL) continue; + if (!ip) ip = node->ip; + sds types; + if (!node->replicate) { + assert(node->name != NULL); + dictEntry *entry = dictFind(related, node->name); + if (entry) types = (sds) dictGetVal(entry); + else types = sdsempty(); + types = sdscatprintf(types, "m%s", types); + dictReplace(related, node->name, types); + } else { + dictEntry *entry = dictFind(related, node->replicate); + if (entry) types = (sds) dictGetVal(entry); + else { + types = sdsempty(); + dictAdd(related, node->replicate, types); + } + sdscat(types, "s"); + } + } + dictIterator *iter = dictGetIterator(related); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds types = (sds) dictGetVal(entry); + sds name = (sds) dictGetKey(entry); + int typeslen = sdslen(types); + if (typeslen < 2) continue; + if (types[0] == 'm') score += (10000 * (typeslen - 1)); + else score += (1 * typeslen); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate == NULL) continue; + if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { + *(offending_p++) = n; + break; + } + } + } + if (offending_len != NULL) *offending_len = offending_p - *offending; + dictReleaseIterator(iter); + dictRelease(related); + } + return score; +} + +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len) +{ + clusterManagerNode **offenders = NULL, **aux; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + if (score == 0) goto cleanup; + printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + int node_len = cluster_manager.nodes->len; + int maxiter = 500 * node_len; + srand(time(NULL)); + while (maxiter > 0) { + int offending_len = 0; + if (offenders != NULL) { + zfree(offenders); + offenders = NULL; + } + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + &offending_len); + if (score == 0) break; + int rand_idx = rand() % offending_len; + clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode **other_replicas = zcalloc((node_len - 1) * + sizeof(*other_replicas)); + int other_replicas_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n != first && n->replicate != NULL) + other_replicas[other_replicas_count++] = n; + } + if (other_replicas_count == 0) { + zfree(other_replicas); + break; + } + rand_idx = rand() % other_replicas_count; + second = other_replicas[rand_idx]; + char *first_master = first->replicate, + *second_master = second->replicate; + first->replicate = second_master, first->dirty = 1; + second->replicate = first_master, second->dirty = 1; + zfree(aux), aux = NULL; + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + &aux, NULL); + if (new_score > score) { + first->replicate = first_master; + second->replicate = second_master; + } + zfree(other_replicas); + maxiter--; + } + zfree(aux), aux = NULL; + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + char *msg; + if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + else if (score >= 10000) + msg = ("[WARNING] Some slaves are in the same host as their master"); + else + msg=("[WARNING] Some slaves of the same master are in the same host"); + printf("%s\n", msg); +cleanup: + zfree(offenders); + zfree(aux); +} + +static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { + sds slots = sdsempty(); + int first_range_idx = -1, last_slot_idx = -1, i; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int has_slot = node->slots[i]; + if (has_slot) { + if (first_range_idx == -1) { + if (sdslen(slots)) slots = sdscat(slots, ","); + first_range_idx = i; + slots = sdscatfmt(slots, "[%u", i); + } + last_slot_idx = i; + } else { + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) + slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + last_slot_idx = -1; + first_range_idx = -1; + } + } + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + return slots; +} + +static sds clusterManagerNodeInfo(clusterManagerNode *node) { + sds info = sdsempty(); + int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); + char *role = (is_master ? "M" : "S"); + sds slots = NULL; + if (node->dirty && node->replicate != NULL) + info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port); + else { + slots = clusterManagerNodeSlotsString(node); + info = sdscatfmt(info, "%s: %S %s:%u\n" + " slots:%S (%u slots) " + "", //TODO: flags string + role, node->name, node->ip, node->port, + slots, node->slots_count); + sdsfree(slots); + } + if (node->replicate != NULL) + info = sdscatfmt(info, "\n replicates %S", node->replicate); + //else if () {} //TODO: add replicas info + return info; +} + +static void clusterManagerShowNodes(void) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds info = clusterManagerNodeInfo(node); + printf("%s\n", info); + sdsfree(info); + } +} + +static void clusterManagerShowInfo(void) { + int masters = 0; + int keys = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) { + if (!node->name) continue; + int replicas = 0; + int dbsize = -1; + char name[9]; + memcpy(name, node->name, 8); + name[8] = '\0'; + listIter ri; + listNode *rn; + listRewind(cluster_manager.nodes, &ri); + while ((rn = listNext(&ri)) != NULL) { + clusterManagerNode *n = rn->value; + if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_SLAVE)) + continue; + if (n->replicate && !strcmp(n->replicate, node->name)) + replicas++; + } + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "DBSIZE"); + if (reply != NULL || reply->type == REDIS_REPLY_INTEGER) + dbsize = reply->integer; + if (dbsize < 0) { + char *err = ""; + if (reply != NULL && reply->type == REDIS_REPLY_ERROR) + err = reply->str; + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + if (reply != NULL) freeReplyObject(reply); + return; + }; + if (reply != NULL) freeReplyObject(reply); + printf("%s:%d (%s...) -> %d keys | %d slots | %d slaves.\n", + node->ip, node->port, name, dbsize, + node->slots_count, replicas); + masters++; + keys += dbsize; + } + } + printf("[OK] %d keys in %d masters.\n", keys, masters); + float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; + printf("%.2f keys per slot on average.\n", keys_per_slot); +} + +static int clusterManagerAddSlots(clusterManagerNode *node, char**err) +{ + redisReply *reply = NULL; + void *_reply = NULL; + int is_err = 0; + int argc; + sds *argv = NULL; + size_t *argvlen = NULL; + *err = NULL; + sds cmd = sdsnew("CLUSTER ADDSLOTS "); + int i, added = 0; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (node->slots[i]) { + char *fmt = (!last_slot ? "%u " : "%u"); + cmd = sdscatfmt(cmd, fmt, i); + added++; + } + } + if (!added) goto node_cmd_err; + argv = cliSplitArgs(cmd, &argc); + if (argc == 0 || argv == NULL) goto node_cmd_err; + argvlen = zmalloc(argc*sizeof(size_t)); + for (i = 0; i < argc; i++) + argvlen[i] = sdslen(argv[i]); + redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); + if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + reply = (redisReply*) _reply; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + sdsfree(cmd); + zfree(argvlen); + sdsfreesplitres(argv,argc); + freeReplyObject(reply); + return 1; +node_cmd_err: + sdsfree(cmd); + zfree(argvlen); + if (argv != NULL) sdsfreesplitres(argv,argc); + if (reply != NULL) freeReplyObject(reply); + return 0; +} + +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { + if (!node->dirty) return 0; + redisReply *reply = NULL; + int is_err = 0; + *err = NULL; + if (node->replicate != NULL) { + reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", + node->replicate); + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + } else { + int added = clusterManagerAddSlots(node, err); + if (!added || *err != NULL) goto node_cmd_err; + } + node->dirty = 0; + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static void clusterManagerWaitForClusterJoin(void) { + printf("Waiting for the cluster to join\n"); + while(!clusterManagerIsConfigConsistent()) { + printf("."); + fflush(stdout); + sleep(1); + } + printf("\n"); +} + +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err) +{ + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + int is_err = 0; + *err = NULL; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); + char *lines = reply->str, *p, *line; + while ((p = strstr(lines, "\n")) != NULL) { + *p = '\0'; + line = lines; + lines = p + 1; + char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, + *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, + *link_status = NULL; + int i = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + switch(i++){ + case 0: name = token; break; + case 1: addr = token; break; + case 2: flags = token; break; + case 3: master_id = token; break; + case 4: ping_sent = token; break; + case 5: ping_recv = token; break; + case 6: config_epoch = token; break; + case 7: link_status = token; break; + } + if (i == 8) break; // Slots + } + if (!flags) goto node_cmd_err; + int myself = (strstr(flags, "myself") != NULL); + if (strstr(flags, "noaddr") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_FAIL; + clusterManagerNode *currentNode = NULL; + if (myself) { + node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; + currentNode = node; + CLUSTER_MANAGER_RESET_SLOTS(node); + if (i == 8) { + int remaining = strlen(line); + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + remaining -= (p - line); + + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] == '[') { + //TODO: migrating/importing + } else if ((p = strchr(slotsdef, '-')) != NULL) { + int start, stop; + *p = '\0'; + start = atoi(slotsdef); + stop = atoi(p + 1); + node->slots_count += (stop - (start - 1)); + while (start <= stop) node->slots[start++] = 1; + } else if (p > slotsdef) { + node->slots[atoi(slotsdef)] = 1; + node->slots_count++; + } + } + } + node->dirty = 0; + } else if (!getfriends) { + if (!(node->flags & CLUSTER_MANAGER_FLAG_MYSELF)) continue; + else break; + } else { + if (addr == NULL) { + // TODO: find a better err message + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + *c = '\0'; + int port = atoi(++c); + currentNode = clusterManagerNewNode(sdsnew(addr), port); + currentNode->flags |= CLUSTER_MANAGER_FLAG_FRIEND; + if (node->friends == NULL) node->friends = listCreate(); + listAddNodeTail(node->friends, currentNode); + } + if (name != NULL) currentNode->name = sdsnew(name); + if (strstr(flags, "slave") != NULL) { + currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; + if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + } + if (config_epoch != NULL) + currentNode->current_epoch = atoll(config_epoch); + if (ping_sent != NULL) currentNode->ping_sent = atoll(ping_sent); + if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); + if (!getfriends && myself) break; + } + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { + if (node->context == NULL) + node->context = redisConnect(node->ip, node->port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, + node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; + char *e = NULL; + if (!clusterManagerNodeIsCluster(node, &e)) { + char *msg = (e ? e : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + if (e) zfree(e); + freeClusterManagerNode(node); + return 0; + } + e = NULL; + if (!clusterManagerNodeLoadInfo(node, opts, &e)) { + if (e) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, e); + zfree(e); + } + freeClusterManagerNode(node); + return 0; + } + cluster_manager.nodes = listCreate(); + listAddNodeTail(cluster_manager.nodes, node); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *friend = ln->value; + if (!friend->ip || !friend->port) continue; + if (!friend->context) + friend->context = redisConnect(friend->ip, friend->port); + if (friend->context->err) continue; + e = NULL; + if (clusterManagerNodeLoadInfo(friend, 0, &e)) { + if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | + CLUSTER_MANAGER_FLAG_DISCONNECT | + CLUSTER_MANAGER_FLAG_FAIL)) continue; + listAddNodeTail(cluster_manager.nodes, friend); + + } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + } + listRelease(node->friends); + node->friends = NULL; + } + return 1; +} + +int clusterManagerSlotCompare(const void *slot1, const void *slot2) { + const char **i1 = (const char **)slot1; + const char **i2 = (const char **)slot2; + return strcmp(*i1, *i2); +} + +static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { + sds signature = NULL; + int node_count = 0, i = 0, name_len = 0; + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + if (reply == NULL || reply->type == REDIS_REPLY_ERROR) + goto cleanup; + char *lines = reply->str, *p, *line; + char **node_configs = NULL; + while ((p = strstr(lines, "\n")) != NULL) { + i = 0; + *p = '\0'; + line = lines; + lines = p + 1; + char *nodename = NULL; + int tot_size = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + if (i == 0) { + nodename = token; + tot_size = p - token; + name_len = tot_size; + } else if (i == 8) break; + i++; + } + if (i != 8) continue; + if (nodename == NULL) continue; + int remaining = strlen(line); + if (remaining == 0) continue; + char **slots = NULL; + int c = 0; + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + int size = (p - line); + remaining -= size; + tot_size += size; + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] != '[') { + c++; + slots = zrealloc(slots, (c * sizeof(char *))); + slots[c - 1] = slotsdef; + } + } + if (c > 0) { + if (c > 1) + qsort(slots, c, sizeof(char *), clusterManagerSlotCompare); + node_count++; + node_configs = + zrealloc(node_configs, (node_count * sizeof(char *))); + tot_size += (sizeof(char) * (c - 1)); + char *cfg = zmalloc((sizeof(char) * tot_size) + 1); + memcpy(cfg, nodename, name_len); + char *sp = cfg + name_len; + *(sp++) = ':'; + for (i = 0; i < c; i++) { + if (i > 0) *(sp++) = '|'; + int slen = strlen(slots[i]); + memcpy(sp, slots[i], slen); + sp += slen; + } + *(sp++) = '\0'; + node_configs[node_count - 1] = cfg; + } + zfree(slots); + } + if (node_count > 0) { + if (node_count > 1) { + qsort(node_configs, node_count, sizeof(char *), + clusterManagerSlotCompare); + } + signature = sdsempty(); + for (i = 0; i < node_count; i++) { + if (i > 0) signature = sdscatprintf(signature, "%c", '|'); + signature = sdscatfmt(signature, "%s", node_configs[i]); + } + } +cleanup: + if (reply != NULL) freeReplyObject(reply); + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + return signature; +} + +static int clusterManagerIsConfigConsistent(void) { + if (cluster_manager.nodes == NULL) return 0; + int consistent = 0; + sds first_cfg = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds cfg = clusterManagerGetConfigSignature(node); + if (cfg == NULL) { + consistent = 0; + break; + } + if (first_cfg == NULL) first_cfg = cfg; + else { + consistent = !sdscmp(first_cfg, cfg); + sdsfree(cfg); + if (!consistent) break; + } + } + if (first_cfg != NULL) sdsfree(first_cfg); + return consistent; +} + +static void clusterManagerCheckCluster(int quiet) { + listNode *ln = listFirst(cluster_manager.nodes); + if (!ln) return; + clusterManagerNode *node = ln->value; + printf(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); + if (!quiet) clusterManagerShowNodes(); + if (!clusterManagerIsConfigConsistent()) + printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array + else + printf("[OK] All nodes agree about slots configuration.\n"); + //TODO:check_open_slots + //TODO:check_slots_coverage +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; - if (!proc(argc, argv)) { - sdsfree(config.hostip); - sdsfree(config.mb_delim); - exit(1); - } + cluster_manager.nodes = NULL; + if (!proc(argc, argv)) goto cluster_manager_err; + freeClusterManager(); exit(0); +cluster_manager_err: + freeClusterManager(); + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); +} + +/* Cluster Manager Commands */ + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("Cluster Manager: Creating Cluster\n"); + int i, j; + cluster_manager.nodes = listCreate(); + for (i = 0; i < argc; i++) { + char *addr = argv[i]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Invalid address format: %s\n", addr); + return 0; + } + *c = '\0'; + char *ip = addr; + int port = atoi(++c); + clusterManagerNode *node = clusterManagerNewNode(ip, port); + node->context = redisConnect(ip, port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + char *err = NULL; + if (!clusterManagerNodeIsCluster(node, &err)) { + char *msg = (err ? err : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeLoadInfo(node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeIsEmpty(node, &err)) { + char *msg; + if (err) msg = err; + else { + msg = " is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + listAddNodeTail(cluster_manager.nodes, node); + } + int node_len = cluster_manager.nodes->len; + int replicas = config.cluster_manager_command.replicas; + int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); + if (masters_count < 3) { + fprintf(stderr, + "*** ERROR: Invalid configuration for cluster creation.\n"); + fprintf(stderr, + "*** Redis Cluster requires at least 3 master nodes.\n"); + fprintf(stderr, + "*** This is not possible with %d nodes and %d replicas per node.", + node_len, replicas); + fprintf(stderr, "\n*** At least %d nodes are required.\n", + (3 * (replicas + 1))); + return 0; + } + printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + int interleaved_len = 0, ips_len = 0; + clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); + char **ips = zcalloc(node_len * sizeof(char*)); + clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + int found = 0; + for (i = 0; i < ips_len; i++) { + char *ip = ips[i]; + if (!strcmp(ip, n->ip)) { + found = 1; + break; + } + } + if (!found) { + ips[ips_len++] = n->ip; + } + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->nodes == NULL) + CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); + CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + } + while (interleaved_len < node_len) { + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->count > 0) { + clusterManagerNode *n; + CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + interleaved[interleaved_len++] = n; + } + } + } + clusterManagerNode **masters = interleaved; + interleaved += masters_count; + interleaved_len -= masters_count; + float slots_per_node = CLUSTER_MANAGER_SLOTS / (float) masters_count; + long first = 0; + float cursor = 0.0f; + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + long last = lround(cursor + slots_per_node - 1); + if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1)) + last = CLUSTER_MANAGER_SLOTS - 1; + if (last < first) last = first; + printf("Master[%d] -> Slots %lu - %lu\n", i, first, last); + master->slots_count = 0; + for (j = first; j <= last; j++) { + master->slots[j] = 1; + master->slots_count++; + } + master->dirty = 1; + first = last + 1; + cursor += slots_per_node; + } + + int assign_unused = 0, available_count = interleaved_len; +assign_replicas: + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + int assigned_replicas = 0; + while (assigned_replicas < replicas) { + if (available_count == 0) break; + clusterManagerNode *found = NULL, *slave = NULL; + int firstNodeIdx = -1; + for (j = 0; j < interleaved_len; j++) { + clusterManagerNode *n = interleaved[j]; + if (n == NULL) continue; + if (strcmp(n->ip, master->ip)) { + found = n; + interleaved[j] = NULL; + break; + } + if (firstNodeIdx < 0) firstNodeIdx = j; + } + if (found) slave = found; + else if (firstNodeIdx >= 0) { + slave = interleaved[firstNodeIdx]; + interleaved_len -= (interleaved - (interleaved + firstNodeIdx)); + interleaved += (firstNodeIdx + 1); + } + if (slave != NULL) { + assigned_replicas++; + available_count--; + slave->replicate = sdsnew(master->name); + slave->dirty = 1; + } else break; + printf("Adding replica %s:%d to %s:%d\n", slave->ip, slave->port, + master->ip, master->port); + if (assign_unused) break; + } + } + if (!assign_unused && available_count > 0) { + assign_unused = 1; + printf("Adding extra replicas...\n"); + goto assign_replicas; + } + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + } + clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerShowNodes(); + printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread != 0 && !strcmp("yes", buf)) { + printf("\nFlushing configuration!\n"); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && node->dirty && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + printf(">>> Nodes configuration updated\n"); + printf(">>> Assign a different config epoch to each node\n"); + int config_epoch = 1; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, + "cluster set-config-epoch %d", + config_epoch++); + if (reply != NULL) freeReplyObject(reply); + } + printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerNode *first = NULL; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (first == NULL) { + first = node; + continue; + } + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", + first->ip, first->port); + if (reply != NULL) freeReplyObject(reply); + } + // Give one second for the join to start, in order to avoid that + // waiting for cluster join will find all the nodes agree about + // the config as they are still empty with unassigned slots. + sleep(1); + clusterManagerWaitForClusterJoin(); + // Useful for the replicas //TODO: create a function for this? + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!node->dirty) continue; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + // Reset Nodes + listRewind(cluster_manager.nodes, &li); + clusterManagerNode *first_node = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!first_node) first_node = node; + else freeClusterManagerNode(node); + } + listEmpty(cluster_manager.nodes); + if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + clusterManagerCheckCluster(0); + } + /* Free everything */ + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 1; +cmd_err: + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 0; +} + +static int clusterManagerCommandInfo(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandCheck(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + clusterManagerCheckCluster(0); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandHelp(int argc, char **argv) { + UNUSED(argc); + UNUSED(argv); + int commands_count = sizeof(clusterManagerCommands) / + sizeof(clusterManagerCommandDef); + int i = 0, j; + fprintf(stderr, "Cluster Manager Commands:\n"); + for (; i < commands_count; i++) { + clusterManagerCommandDef *def = &(clusterManagerCommands[i]); + int namelen = strlen(def->name), padlen = 15 - namelen; + fprintf(stderr, " %s", def->name); + for (j = 0; j < padlen; j++) fprintf(stderr, " "); + fprintf(stderr, "%s\n", (def->args ? def->args : "")); + //TODO: if (def->options) + } + return 0; } /*------------------------------------------------------------------------------ From c761124e19121c99a579cda98a8c235f67cfa062 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 17:57:16 +0100 Subject: [PATCH 03/66] Added check for open slots (clusterManagerCheckCluster) --- src/redis-cli.c | 162 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 143 insertions(+), 19 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index ef917cca5..456751f58 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,6 +74,13 @@ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) +#define CLUSTER_MANAGER_ERROR(err) do { \ + if (cluster_manager.errors == NULL) \ + cluster_manager.errors = listCreate(); \ + listAddNodeTail(cluster_manager.errors, err); \ + fprintf(stderr, "%s\n", (char *) err); \ +} while(0) + #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ memset(n->slots, 0, sizeof(n->slots)); \ n->slots_count = 0; \ @@ -137,7 +144,14 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; -/* Cluster Manager command info */ +/* Dict Helpers */ + +static uint64_t dictSdsHash(const void *key); +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2); +static void dictSdsDestructor(void *privdata, void *val); + +/* Cluster Manager Command Info */ typedef struct clusterManagerCommand { char *name; int argc; @@ -196,6 +210,7 @@ static struct config { static struct clusterManager { list *nodes; + list *errors; } cluster_manager; typedef struct clusterManagerNode { @@ -212,6 +227,10 @@ typedef struct clusterManagerNode { uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; } clusterManagerNode; typedef struct clusterManagerNodeArray { @@ -221,6 +240,15 @@ typedef struct clusterManagerNodeArray { int count; } clusterManagerNodeArray; +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1810,13 +1838,22 @@ static void freeClusterManagerNode(clusterManagerNode *node) { if (node->replicate != NULL) sdsfree(node->replicate); if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) sdsfree(node->ip); + int i; + if (node->migrating != NULL) { + for (i = 0; i < node->migrating_count; i++) sdsfree(node->migrating[i]); + zfree(node->migrating); + } + if (node->importing != NULL) { + for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]); + zfree(node->importing); + } zfree(node); } static void freeClusterManager(void) { + listIter li; + listNode *ln; if (cluster_manager.nodes != NULL) { - listIter li; - listNode *ln; listRewind(cluster_manager.nodes,&li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; @@ -1825,9 +1862,18 @@ static void freeClusterManager(void) { listRelease(cluster_manager.nodes); cluster_manager.nodes = NULL; } + if (cluster_manager.errors != NULL) { + listRewind(cluster_manager.errors,&li); + while ((ln = listNext(&li)) != NULL) { + sds err = ln->value; + sdsfree(err); + } + listRelease(cluster_manager.errors); + cluster_manager.errors = NULL; + } } -static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { +static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNode *node = zmalloc(sizeof(*node)); node->context = NULL; node->name = NULL; @@ -1840,6 +1886,10 @@ static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { node->replicate = NULL; node->dirty = 0; node->friends = NULL; + node->migrating = NULL; + node->importing = NULL; + node->migrating_count = 0; + node->importing_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } @@ -1902,17 +1952,9 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int node_len = cluster_manager.nodes->len; *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); clusterManagerNode **offending_p = *offending; - dictType dtype = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ - }; for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); - dict *related = dictCreate(&dtype, NULL); + dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; for (j = 0; j < node_array->len; j++) { clusterManagerNode *node = node_array->nodes[j]; @@ -2291,7 +2333,32 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (remaining) line = p + 1; else line = p; if (slotsdef[0] == '[') { - //TODO: migrating/importing + slotsdef++; + if ((p = strstr(slotsdef, "->-"))) { // Migrating + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds dst = sdsnew(p); + node->migrating_count += 2; + node->migrating = zrealloc(node->migrating, + (node->migrating_count * sizeof(sds))); + node->migrating[node->migrating_count - 2] = + slot; + node->migrating[node->migrating_count - 1] = + dst; + } else if ((p = strstr(slotsdef, "-<-"))) {//Importing + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds src = sdsnew(p); + node->importing_count += 2; + node->importing = zrealloc(node->importing, + (node->importing_count * sizeof(sds))); + node->importing[node->importing_count - 2] = + slot; + node->importing[node->importing_count - 1] = + src; + } } else if ((p = strchr(slotsdef, '-')) != NULL) { int start, stop; *p = '\0'; @@ -2529,11 +2596,68 @@ static void clusterManagerCheckCluster(int quiet) { printf(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); if (!quiet) clusterManagerShowNodes(); - if (!clusterManagerIsConfigConsistent()) - printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array - else - printf("[OK] All nodes agree about slots configuration.\n"); - //TODO:check_open_slots + if (!clusterManagerIsConfigConsistent()) { + sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); + CLUSTER_MANAGER_ERROR(err); + } else printf("[OK] All nodes agree about slots configuration.\n"); + // Check open slots + listIter li; + listRewind(cluster_manager.nodes, &li); + int i; + dict *open_slots = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->migrating != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "migrating state ", + n->ip, + n->port); + for (i = 0; i < n->migrating_count; i += 2) { + sds slot = n->migrating[i]; + dictAdd(open_slots, slot, n->migrating[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + if (n->importing != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "importing state ", + n->ip, + n->port); + for (i = 0; i < n->importing_count; i += 2) { + sds slot = n->importing[i]; + dictAdd(open_slots, slot, n->importing[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + } + if (open_slots != NULL) { + dictIterator *iter = dictGetIterator(open_slots); + dictEntry *entry; + sds errstr = sdsnew("[WARNING] The following slots are open: "); + i = 0; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + char *fmt = (i++ > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + fprintf(stderr, "%s.\n", (char *) errstr); + sdsfree(errstr); + dictRelease(open_slots); + } //TODO:check_slots_coverage } From 06ca2f203e530dab8c240854eb53e316d9167642 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 19:25:02 +0100 Subject: [PATCH 04/66] - Cluster Manager: fixed various memory leaks - Cluster Manager: fixed flags assignment in clusterManagerNodeLoadInfo --- src/redis-cli.c | 54 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 456751f58..4c30067b3 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2310,12 +2310,6 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (!flags) goto node_cmd_err; int myself = (strstr(flags, "myself") != NULL); - if (strstr(flags, "noaddr") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; - if (strstr(flags, "disconnected") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; - if (strstr(flags, "fail") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_FAIL; clusterManagerNode *currentNode = NULL; if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; @@ -2396,10 +2390,22 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (node->friends == NULL) node->friends = listCreate(); listAddNodeTail(node->friends, currentNode); } - if (name != NULL) currentNode->name = sdsnew(name); + if (name != NULL) { + if (currentNode->name) sdsfree(currentNode->name); + currentNode->name = sdsnew(name); + } + if (strstr(flags, "noaddr") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; if (strstr(flags, "slave") != NULL) { currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; - if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + if (master_id != NULL) { + if (currentNode->replicate) sdsfree(currentNode->replicate); + currentNode->replicate = sdsnew(master_id); + } } if (config_epoch != NULL) currentNode->current_epoch = atoll(config_epoch); @@ -2442,27 +2448,39 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { freeClusterManagerNode(node); return 0; } + listIter li; + listNode *ln; + if (cluster_manager.nodes != NULL) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) + freeClusterManagerNode((clusterManagerNode *) ln->value); + listRelease(cluster_manager.nodes); + } cluster_manager.nodes = listCreate(); listAddNodeTail(cluster_manager.nodes, node); if (node->friends != NULL) { - listIter li; - listNode *ln; listRewind(node->friends, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *friend = ln->value; - if (!friend->ip || !friend->port) continue; + if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) friend->context = redisConnect(friend->ip, friend->port); - if (friend->context->err) continue; + if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | CLUSTER_MANAGER_FLAG_DISCONNECT | - CLUSTER_MANAGER_FLAG_FAIL)) continue; + CLUSTER_MANAGER_FLAG_FAIL)) + goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); - - } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + } else { + fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + goto invalid_friend; + } + continue; +invalid_friend: + freeClusterManagerNode(friend); } listRelease(node->friends); node->friends = NULL; @@ -2601,6 +2619,7 @@ static void clusterManagerCheckCluster(int quiet) { CLUSTER_MANAGER_ERROR(err); } else printf("[OK] All nodes agree about slots configuration.\n"); // Check open slots + printf(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2836,6 +2855,7 @@ assign_replicas: if (slave != NULL) { assigned_replicas++; available_count--; + if (slave->replicate) sdsfree(slave->replicate); slave->replicate = sdsnew(master->name); slave->dirty = 1; } else break; @@ -2873,7 +2893,7 @@ assign_replicas: zfree(err); } goto cmd_err; - } + } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); printf(">>> Assign a different config epoch to each node\n"); From a659068dcc9667da96797362b000c1afe844ed4d Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 17:43:36 +0100 Subject: [PATCH 05/66] Cluster Manager: slots coverage check. --- src/redis-cli.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4c30067b3..51eb137e8 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2607,6 +2607,24 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static int clusterManagerGetCoveredSlots(char *all_slots) { + if (cluster_manager.nodes == NULL) return 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + int totslots = 0, i; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + if (node->slots[i] && !all_slots[i]) { + all_slots[i] = 1; + totslots++; + } + } + } + return totslots; +} + static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; @@ -2677,7 +2695,19 @@ static void clusterManagerCheckCluster(int quiet) { sdsfree(errstr); dictRelease(open_slots); } - //TODO:check_slots_coverage + printf(">>> Check slots coverage...\n"); + char slots[CLUSTER_MANAGER_SLOTS]; + memset(slots, 0, CLUSTER_MANAGER_SLOTS); + int coverage = clusterManagerGetCoveredSlots(slots); + if (coverage == CLUSTER_MANAGER_SLOTS) + printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); + else { + sds err = sdsempty(); + err = sdscatprintf(err, "[ERR] Not all %d slots are " + "covered by nodes.\n", + CLUSTER_MANAGER_SLOTS); + CLUSTER_MANAGER_ERROR(err); + } } static void clusterManagerMode(clusterManagerCommandProc *proc) { From b4db8f5f68ad44b9c6d1cb86a90ba458d66b73b4 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 20:09:30 +0100 Subject: [PATCH 06/66] Cluster Manager: reply error catch for MEET command --- src/redis-cli.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 51eb137e8..b5c80a5e8 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2949,7 +2949,16 @@ assign_replicas: redisReply *reply = NULL; reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", first->ip, first->port); - if (reply != NULL) freeReplyObject(reply); + int is_err = 0; + if (reply != NULL) { + if ((is_err = reply->type == REDIS_REPLY_ERROR)) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, reply->str); + freeReplyObject(reply); + } else { + is_err = 1; + fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); + } + if (is_err) goto cmd_err; } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about From 1aa1a6e130c4da66f3c16649ecc11780c1461f40 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 11:29:25 +0100 Subject: [PATCH 07/66] Cluster Manager: cluster is considered consistent if only one node has been found --- src/redis-cli.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b5c80a5e8..7128dd979 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2584,7 +2584,10 @@ cleanup: static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; - int consistent = 0; + int consistent = (listLength(cluster_manager.nodes) <= 1); + // If the Cluster has only one node, it's always consistent + // Does it make sense? + if (consistent) return 1; sds first_cfg = NULL; listIter li; listNode *ln; From 48f404ab60522a44b7600b8f03a7a5f1213d2f67 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 12:02:56 +0100 Subject: [PATCH 08/66] ClusterManager: added replicas count to clusterManagerNode --- src/redis-cli.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 7128dd979..de7ba2511 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -223,9 +223,11 @@ typedef struct clusterManagerNode { time_t ping_recv; int flags; sds replicate; + list replicas; int dirty; uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; + int replicas_count; list *friends; sds *migrating; sds *importing; @@ -250,6 +252,7 @@ static dictType clusterManagerDictType = { }; static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); @@ -265,6 +268,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); + typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { char *name; @@ -1890,10 +1894,31 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->importing = NULL; node->migrating_count = 0; node->importing_count = 0; + node->replicas_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } +static clusterManagerNode *clusterManagerNodeByName(const char *name) { + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && !sdscmp(n->name, lcname)) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); int is_err = 0; @@ -2119,7 +2144,9 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { } if (node->replicate != NULL) info = sdscatfmt(info, "\n replicates %S", node->replicate); - //else if () {} //TODO: add replicas info + else if (node->replicas_count) + info = sdscatfmt(info, "\n %U additional replica(s)", + node->replicas_count); return info; } @@ -2485,6 +2512,18 @@ invalid_friend: listRelease(node->friends); node->friends = NULL; } + // Count replicas for each node + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate != NULL) { + clusterManagerNode *master = clusterManagerNodeByName(n->replicate); + if (master == NULL) { + printf("*** WARNING: %s:%d claims to be slave of unknown " + "node ID %s.\n", n->ip, n->port, n->replicate); + } else master->replicas_count++; + } + } return 1; } From ffdf3c3e2f70ed7c85036dfd50fbf3aade0a852f Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 9 Feb 2018 13:02:37 +0100 Subject: [PATCH 09/66] Cluster Manager: CLUSTER_MANAGER_NODE_CONNECT macro --- src/redis-cli.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index de7ba2511..fd3bdf988 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -70,6 +70,8 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_NODE_CONNECT(n) \ + (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) @@ -2449,7 +2451,7 @@ node_cmd_err: static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) - node->context = redisConnect(node->ip, node->port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, @@ -2491,7 +2493,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { clusterManagerNode *friend = ln->value; if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) - friend->context = redisConnect(friend->ip, friend->port); + CLUSTER_MANAGER_NODE_CONNECT(friend); if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { @@ -2785,7 +2787,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *ip = addr; int port = atoi(++c); clusterManagerNode *node = clusterManagerNewNode(ip, port); - node->context = redisConnect(ip, port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); From d123d0c65438069a8f77f2bc95a7e818f81cfceb Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 13 Feb 2018 12:00:06 +0100 Subject: [PATCH 10/66] Cluster Manager: 'call' command. --- src/redis-cli.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index fd3bdf988..308bd08c6 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -286,6 +286,7 @@ static int clusterManagerIsConfigConsistent(void); static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ @@ -1802,6 +1803,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { "cluster-replicas"}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"call", clusterManagerCommandCall, -2, + "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -2449,6 +2452,11 @@ node_cmd_err: return 0; } +/* Retrieves info about the cluster using argument 'node' as the starting + * point. All nodes will be loaded inside the cluster_manager.nodes list. + * Warning: if something goes wrong, it will free the starting node before + * returning 0. */ + static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -3115,6 +3123,56 @@ invalid_args: return 0; } +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0; + char *ip = NULL; + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + int i; + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else { + fprintf(stderr, + "Invalid arguments: first agrumnt must be host:port.\n"); + return 0; + } + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + argc--; + argv++; + size_t *argvlen = zmalloc(argc*sizeof(size_t)); + printf(">>> Calling"); + for (i = 0; i < argc; i++) { + argvlen[i] = strlen(argv[i]); + printf(" %s", argv[i]); + } + printf("\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (!n->context) CLUSTER_MANAGER_NODE_CONNECT(n); + redisReply *reply = NULL; + redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen); + int status = redisGetReply(n->context, (void **)(&reply)); + if (status != REDIS_OK || reply == NULL ) + printf("%s:%d: Failed!\n", n->ip, n->port); //TODO: better message? + else { + sds formatted_reply = cliFormatReplyTTY(reply, ""); + printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply); + sdsfree(formatted_reply); + } + if (reply != NULL) freeReplyObject(reply); + } + zfree(argvlen); + return 1; +} + static int clusterManagerCommandHelp(int argc, char **argv) { UNUSED(argc); UNUSED(argv); From 169e706519d14606087f24d26307de8c69c2bed9 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 17:54:46 +0100 Subject: [PATCH 11/66] Cluster Manager: improved cleanup/error handling in various functions --- src/redis-cli.c | 101 +++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 308bd08c6..280e6c9e3 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2220,7 +2220,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; int argc; sds *argv = NULL; size_t *argvlen = NULL; @@ -2235,39 +2235,44 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) added++; } } - if (!added) goto node_cmd_err; + if (!added) { + success = 0; + goto cleanup; + } argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) goto node_cmd_err; + if (argc == 0 || argv == NULL) { + success = 0; + goto cleanup; + } argvlen = zmalloc(argc*sizeof(size_t)); for (i = 0; i < argc; i++) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); - if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + if (redisGetReply(node->context, &_reply) != REDIS_OK) { + success = 1; + goto cleanup; + } reply = (redisReply*) _reply; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } - sdsfree(cmd); - zfree(argvlen); - sdsfreesplitres(argv,argc); - freeReplyObject(reply); - return 1; -node_cmd_err: +cleanup: sdsfree(cmd); zfree(argvlen); if (argv != NULL) sdsfreesplitres(argv,argc); if (reply != NULL) freeReplyObject(reply); - return 0; + return success; } static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", @@ -2277,18 +2282,20 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) goto node_cmd_err; + if (!added || *err != NULL) { + success = 0; + goto cleanup; + } } node->dirty = 0; - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply != NULL) freeReplyObject(reply); + return success; } static void clusterManagerWaitForClusterJoin(void) { @@ -2305,14 +2312,15 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); char *lines = reply->str, *p, *line; @@ -2340,7 +2348,10 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (i == 8) break; // Slots } - if (!flags) goto node_cmd_err; + if (!flags) { + success = 0; + goto cleanup; + } int myself = (strstr(flags, "myself") != NULL); clusterManagerNode *currentNode = NULL; if (myself) { @@ -2406,14 +2417,16 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (addr == NULL) { // TODO: find a better err message fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } char *c = strrchr(addr, '@'); if (c != NULL) *c = '\0'; c = strrchr(addr, ':'); if (c == NULL) { fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } *c = '\0'; int port = atoi(++c); @@ -2445,11 +2458,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); if (!getfriends && myself) break; } - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply) freeReplyObject(reply); + return success; } /* Retrieves info about the cluster using argument 'node' as the starting @@ -2780,7 +2791,7 @@ cluster_manager_err: static int clusterManagerCommandCreate(int argc, char **argv) { printf("Cluster Manager: Creating Cluster\n"); - int i, j; + int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { char *addr = argv[i]; @@ -2974,7 +2985,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); @@ -3010,7 +3022,10 @@ assign_replicas: is_err = 1; fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); } - if (is_err) goto cmd_err; + if (is_err) { + success = 0; + goto cleanup; + } } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about @@ -3029,7 +3044,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } } // Reset Nodes @@ -3041,9 +3057,13 @@ assign_replicas: else freeClusterManagerNode(node); } listEmpty(cluster_manager.nodes); - if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + if (!clusterManagerLoadInfoFromNode(first_node, 0)) { + success = 0; + goto cleanup; //TODO: msg? + } clusterManagerCheckCluster(0); } +cleanup: /* Free everything */ zfree(masters); zfree(ips); @@ -3052,16 +3072,7 @@ assign_replicas: CLUSTER_MANAGER_NODEARRAY_FREE(node_array); } zfree(ip_nodes); - return 1; -cmd_err: - zfree(masters); - zfree(ips); - for (i = 0; i < node_len; i++) { - clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); - } - zfree(ip_nodes); - return 0; + return success; } static int clusterManagerCommandInfo(int argc, char **argv) { From bc96805e06d7f4f473a50b436d68028d5a9c29c9 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 19:29:28 +0100 Subject: [PATCH 12/66] Cluster Manager: colorized output --- src/redis-cli.c | 130 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 280e6c9e3..6ea44f83f 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -67,6 +67,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" + #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) @@ -80,7 +81,7 @@ if (cluster_manager.errors == NULL) \ cluster_manager.errors = listCreate(); \ listAddNodeTail(cluster_manager.errors, err); \ - fprintf(stderr, "%s\n", (char *) err); \ + clusterManagerLogErr("%s\n", (char *) err); \ } while(0) #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ @@ -124,7 +125,20 @@ } while(0) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ - fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ + n->ip, n->port, err); + +#define clusterManagerLogInfo(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_INFO,__VA_ARGS__) + +#define clusterManagerLogErr(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_ERR,__VA_ARGS__) + +#define clusterManagerLogWarn(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_WARN,__VA_ARGS__) + +#define clusterManagerLogOk(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_SUCCESS,__VA_ARGS__) #define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 #define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 @@ -133,7 +147,22 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 + +#define CLUSTER_MANAGER_LOG_LVL_INFO 1 +#define CLUSTER_MANAGER_LOG_LVL_WARN 2 +#define CLUSTER_MANAGER_LOG_LVL_ERR 3 +#define CLUSTER_MANAGER_LOG_LVL_SUCCESS 4 + +#define LOG_COLOR_BOLD "29;1m" +#define LOG_COLOR_RED "31;1m" +#define LOG_COLOR_GREEN "32;1m" +#define LOG_COLOR_YELLOW "33;1m" +#define LOG_COLOR_RESET "0m" /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -270,6 +299,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { @@ -1267,6 +1297,7 @@ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; } static int parseOptions(int argc, char **argv) { @@ -2042,7 +2073,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerNode **offenders = NULL, **aux; int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); if (score == 0) goto cleanup; - printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + clusterManagerLogInfo(">>> Trying to optimize slaves allocation " + "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; int maxiter = 500 * node_len; srand(time(NULL)); @@ -2091,12 +2123,15 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(aux), aux = NULL; score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); char *msg; - if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + int perfect = (score == 0); + int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : + CLUSTER_MANAGER_LOG_LVL_WARN); + if (perfect) msg = "[OK] Perfect anti-affinity obtained!"; else if (score >= 10000) msg = ("[WARNING] Some slaves are in the same host as their master"); else msg=("[WARNING] Some slaves of the same master are in the same host"); - printf("%s\n", msg); + clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); zfree(aux); @@ -2211,7 +2246,7 @@ static void clusterManagerShowInfo(void) { keys += dbsize; } } - printf("[OK] %d keys in %d masters.\n", keys, masters); + clusterManagerLogOk("[OK] %d keys in %d masters.\n", keys, masters); float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; printf("%.2f keys per slot on average.\n", keys_per_slot); } @@ -2482,7 +2517,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { char *msg = (e ? e : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -2522,8 +2557,9 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); } else { - fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + clusterManagerLogErr("[ERR] Unable to load info for " + "node %s:%d\n", + friend->ip, friend->port); goto invalid_friend; } continue; @@ -2692,15 +2728,18 @@ static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; clusterManagerNode *node = ln->value; - printf(">>> Performing Cluster Check (using node %s:%d)\n", - node->ip, node->port); + clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); CLUSTER_MANAGER_ERROR(err); - } else printf("[OK] All nodes agree about slots configuration.\n"); + } else { + clusterManagerLogOk("[OK] All nodes agree about slots " + "configuration.\n"); + } // Check open slots - printf(">>> Check for open slots...\n"); + clusterManagerLogInfo(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2754,17 +2793,18 @@ static void clusterManagerCheckCluster(int quiet) { char *fmt = (i++ > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } - fprintf(stderr, "%s.\n", (char *) errstr); + clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); dictRelease(open_slots); } - printf(">>> Check slots coverage...\n"); + clusterManagerLogInfo(">>> Check slots coverage...\n"); char slots[CLUSTER_MANAGER_SLOTS]; memset(slots, 0, CLUSTER_MANAGER_SLOTS); int coverage = clusterManagerGetCoveredSlots(slots); - if (coverage == CLUSTER_MANAGER_SLOTS) - printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); - else { + if (coverage == CLUSTER_MANAGER_SLOTS) { + clusterManagerLogOk("[OK] All %d slots covered.\n", + CLUSTER_MANAGER_SLOTS); + } else { sds err = sdsempty(); err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", @@ -2773,6 +2813,26 @@ static void clusterManagerCheckCluster(int quiet) { } } +static void clusterManagerLog(int level, const char* fmt, ...) { + int use_colors = + (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); + if (use_colors) { + printf("\033["); + switch (level) { + case CLUSTER_MANAGER_LOG_LVL_INFO: printf(LOG_COLOR_BOLD); break; + case CLUSTER_MANAGER_LOG_LVL_WARN: printf(LOG_COLOR_YELLOW); break; + case CLUSTER_MANAGER_LOG_LVL_ERR: printf(LOG_COLOR_RED); break; + case CLUSTER_MANAGER_LOG_LVL_SUCCESS: printf(LOG_COLOR_GREEN); break; + default: printf(LOG_COLOR_RESET); break; + } + } + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + if (use_colors) printf("\033[" LOG_COLOR_RESET); +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2790,7 +2850,6 @@ cluster_manager_err: /* Cluster Manager Commands */ static int clusterManagerCommandCreate(int argc, char **argv) { - printf("Cluster Manager: Creating Cluster\n"); int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { @@ -2816,7 +2875,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { char *msg = (err ? err : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2835,11 +2894,11 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *msg; if (err) msg = err; else { - msg = " is not empty. Either the node already knows other " + msg = "is not empty. Either the node already knows other " "nodes (check with CLUSTER NODES) or contains some " "key in database 0."; } - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2850,18 +2909,17 @@ static int clusterManagerCommandCreate(int argc, char **argv) { int replicas = config.cluster_manager_command.replicas; int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); if (masters_count < 3) { - fprintf(stderr, - "*** ERROR: Invalid configuration for cluster creation.\n"); - fprintf(stderr, - "*** Redis Cluster requires at least 3 master nodes.\n"); - fprintf(stderr, + clusterManagerLogErr( + "*** ERROR: Invalid configuration for cluster creation.\n" + "*** Redis Cluster requires at least 3 master nodes.\n" "*** This is not possible with %d nodes and %d replicas per node.", node_len, replicas); - fprintf(stderr, "\n*** At least %d nodes are required.\n", - (3 * (replicas + 1))); + clusterManagerLogErr("\n*** At least %d nodes are required.\n", + 3 * (replicas + 1)); return 0; } - printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + clusterManagerLogInfo(">>> Performing hash slots allocation " + "on %d nodes...\n", node_len); int interleaved_len = 0, ips_len = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); @@ -2989,8 +3047,9 @@ assign_replicas: goto cleanup; } else if (err != NULL) zfree(err); } - printf(">>> Nodes configuration updated\n"); - printf(">>> Assign a different config epoch to each node\n"); + clusterManagerLogInfo(">>> Nodes configuration updated\n"); + clusterManagerLogInfo(">>> Assign a different config epoch to " + "each node\n"); int config_epoch = 1; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3001,7 +3060,8 @@ assign_replicas: config_epoch++); if (reply != NULL) freeReplyObject(reply); } - printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerLogInfo(">>> Sending CLUSTER MEET messages to join " + "the cluster\n"); clusterManagerNode *first = NULL; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3156,7 +3216,7 @@ static int clusterManagerCommandCall(int argc, char **argv) { argc--; argv++; size_t *argvlen = zmalloc(argc*sizeof(size_t)); - printf(">>> Calling"); + clusterManagerLogInfo(">>> Calling"); for (i = 0; i < argc; i++) { argvlen[i] = strlen(argv[i]); printf(" %s", argv[i]); From f3882021c07aabc57b006c2efd2365721185fa0c Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 20 Feb 2018 12:01:13 +0100 Subject: [PATCH 13/66] - Fixed bug in clusterManagerGetAntiAffinityScore - Code improvements --- src/redis-cli.c | 57 ++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 6ea44f83f..b222f5a88 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -505,7 +505,6 @@ static int dictSdsKeyCompare(void *privdata, const void *key1, static void dictSdsDestructor(void *privdata, void *val) { DICT_NOTUSED(privdata); - sdsfree(val); } @@ -2008,11 +2007,13 @@ result: static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_len, clusterManagerNode ***offending, int *offending_len) { - assert(offending != NULL); int score = 0, i, j; int node_len = cluster_manager.nodes->len; - *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); - clusterManagerNode **offending_p = *offending; + clusterManagerNode **offending_p = NULL; + if (offending != NULL) { + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + offending_p = *offending; + } for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); @@ -2021,23 +2022,21 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, clusterManagerNode *node = node_array->nodes[j]; if (node == NULL) continue; if (!ip) ip = node->ip; - sds types; - if (!node->replicate) { - assert(node->name != NULL); - dictEntry *entry = dictFind(related, node->name); - if (entry) types = (sds) dictGetVal(entry); - else types = sdsempty(); - types = sdscatprintf(types, "m%s", types); - dictReplace(related, node->name, types); - } else { - dictEntry *entry = dictFind(related, node->replicate); - if (entry) types = (sds) dictGetVal(entry); - else { - types = sdsempty(); - dictAdd(related, node->replicate, types); - } - sdscat(types, "s"); + sds types, otypes; + // We always use the Master ID as key + sds key = (!node->replicate ? node->name : node->replicate); + assert(key != NULL); + dictEntry *entry = dictFind(related, key); + if (entry) otypes = (sds) dictGetVal(entry); + else { + otypes = sdsempty(); + dictAdd(related, key, otypes); } + // Master type 'm' is always set as the first character of the + // types string. + if (!node->replicate) types = sdscatprintf(otypes, "m%s", otypes); + else types = sdscat(otypes, "s"); + if (types != otypes) dictReplace(related, key, types); } dictIterator *iter = dictGetIterator(related); dictEntry *entry; @@ -2048,6 +2047,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (typeslen < 2) continue; if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); + if (offending == NULL) continue; listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2056,11 +2056,12 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (n->replicate == NULL) continue; if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { *(offending_p++) = n; + if (offending_len != NULL) (*offending_len)++; break; } } } - if (offending_len != NULL) *offending_len = offending_p - *offending; + //if (offending_len != NULL) *offending_len = offending_p - *offending; dictReleaseIterator(iter); dictRelease(related); } @@ -2070,8 +2071,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_len) { - clusterManagerNode **offenders = NULL, **aux; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + clusterManagerNode **offenders = NULL; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); @@ -2088,7 +2089,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, &offending_len); if (score == 0) break; int rand_idx = rand() % offending_len; - clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode *first = offenders[rand_idx], + *second = NULL; clusterManagerNode **other_replicas = zcalloc((node_len - 1) * sizeof(*other_replicas)); int other_replicas_count = 0; @@ -2110,9 +2112,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - zfree(aux), aux = NULL; int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, - &aux, NULL); + NULL, NULL); if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2120,8 +2121,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - zfree(aux), aux = NULL; - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2134,7 +2134,6 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); - zfree(aux); } static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { From d5861925496e67996e4f523dd314f79d41c2fe28 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 22 Feb 2018 18:32:39 +0100 Subject: [PATCH 14/66] Cluster Manager: - Almost all Cluster Manager related code moved to the same section. - Many macroes converted to functions - Added various comments - Little code restyling --- src/redis-cli.c | 460 ++++++++++++++++++++++++++++-------------------- 1 file changed, 271 insertions(+), 189 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b222f5a88..b72c31cff 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -75,54 +75,8 @@ (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) -#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) -#define CLUSTER_MANAGER_ERROR(err) do { \ - if (cluster_manager.errors == NULL) \ - cluster_manager.errors = listCreate(); \ - listAddNodeTail(cluster_manager.errors, err); \ - clusterManagerLogErr("%s\n", (char *) err); \ -} while(0) - -#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ - memset(n->slots, 0, sizeof(n->slots)); \ - n->slots_count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ - array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ - array->alloc = array->nodes; \ - array->len = alloc_len; \ - array->count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ - if (array->nodes > array->alloc) { \ - array->len = array->nodes - array->alloc; \ - array->nodes = array->alloc; \ - array->count = 0; \ - int i = 0; \ - for(; i < array->len; i++) { \ - if (array->nodes[i] != NULL) array->count++;\ - } \ - } \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) - -#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ - assert(array->nodes < (array->nodes + array->len)); \ - if (*array->nodes != NULL) array->count--; \ - nodeptr = *array->nodes; \ - array->nodes++; \ - array->len--; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ - assert(array->nodes < (array->nodes + array->len)); \ - assert(nodeptr != NULL); \ - array->nodes[array->count++] = nodeptr; \ -} while(0) +#define CLUSTER_MANAGER_NODE_ARRAY_FREE(array) zfree(array->alloc) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ @@ -190,6 +144,7 @@ typedef struct clusterManagerCommand { int flags; int replicas; } clusterManagerCommand; +static void createClusterManagerCommand(char *cmdname, int argc, char **argv); static redisContext *context; @@ -237,88 +192,6 @@ static struct config { clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager */ - -static struct clusterManager { - list *nodes; - list *errors; -} cluster_manager; - -typedef struct clusterManagerNode { - redisContext *context; - sds name; - char *ip; - int port; - uint64_t current_epoch; - time_t ping_sent; - time_t ping_recv; - int flags; - sds replicate; - list replicas; - int dirty; - uint8_t slots[CLUSTER_MANAGER_SLOTS]; - int slots_count; - int replicas_count; - list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; -} clusterManagerNode; - -typedef struct clusterManagerNodeArray { - clusterManagerNode **nodes; - clusterManagerNode **alloc; - int len; - int count; -} clusterManagerNodeArray; - -static dictType clusterManagerDictType = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ -}; - -static clusterManagerNode *clusterManagerNewNode(char *ip, int port); -static clusterManagerNode *clusterManagerNodeByName(const char *name); -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); -static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, - char **err); -static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); -static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); -static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len); -static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len); -static sds clusterManagerNodeInfo(clusterManagerNode *node); -static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); -static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); -static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); -static void clusterManagerLog(int level, const char* fmt, ...); - -typedef int clusterManagerCommandProc(int argc, char **argv); -typedef struct clusterManagerCommandDef { - char *name; - clusterManagerCommandProc *proc; - int arity; - char *args; - char *options; -} clusterManagerCommandDef; -static int clusterManagerIsConfigConsistent(void); - -/* Cluster Manager commands. */ - -static int clusterManagerCommandCreate(int argc, char **argv); -static int clusterManagerCommandInfo(int argc, char **argv); -static int clusterManagerCommandCheck(int argc, char **argv); -static int clusterManagerCommandCall(int argc, char **argv); -static int clusterManagerCommandHelp(int argc, char **argv); - /* User preferences. */ static struct pref { int hints; @@ -1291,14 +1164,6 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ -static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - clusterManagerCommand *cmd = &config.cluster_manager_command; - cmd->name = cmdname; - cmd->argc = argc; - cmd->argv = argc ? argv : NULL; - if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; -} - static int parseOptions(int argc, char **argv) { int i; @@ -1828,6 +1693,100 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +/* The Cluster Manager global structure */ +static struct clusterManager { + list *nodes; /* List of nodes int he configuration. */ + list *errors; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; /* Master ID if node is a slave */ + list replicas; + int dirty; /* Node has changes that can be flushed */ + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + int replicas_count; + list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; +} clusterManagerNode; + +/* Data structure used to represent a sequence of nodes. */ +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; /* Actual nodes array */ + clusterManagerNode **alloc; /* Pointer to the allocated memory */ + int len; /* Actual length of the array */ + int count; /* Non-NULL nodes count */ +} clusterManagerNodeArray; + +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + +typedef int clusterManagerCommandProc(int argc, char **argv); + +/* Cluster Manager helper functions */ + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); +static void clusterManagerNodeResetSlots(clusterManagerNode *node); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_count, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_count); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); +static int clusterManagerIsConfigConsistent(void); +static void clusterManagerOnError(sds err); +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int len); +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array); +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr); +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node); + +/* Cluster Manager commands. */ + +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); + +typedef struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; + char *args; + char *options; +} clusterManagerCommandDef; + clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "cluster-replicas"}, @@ -1838,6 +1797,16 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; + +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; +} + + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / sizeof(clusterManagerCommandDef); @@ -1930,7 +1899,7 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); return node; } @@ -1954,41 +1923,49 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); - int is_err = 0; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { +static void clusterManagerNodeResetSlots(clusterManagerNode *node) { + memset(node->slots, 0, sizeof(node->slots)); + node->slots_count = 0; +} + +static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, + char **err) +{ + redisReply *info = CLUSTER_MANAGER_COMMAND(node, "INFO"); + if (err != NULL) *err = NULL; + if (info == NULL) return NULL; + if (info->type == REDIS_REPLY_ERROR) { + if (err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); strcpy(*err, info->str); } freeReplyObject(info); - return 0; + return NULL; } + return info; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); + if (info == NULL) return 0; int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); freeReplyObject(info); return is_cluster; } +/* Checks whether the node is empty. Node is considered not-empty if it has + * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); int is_err = 0, is_empty = 1; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } - is_empty = 0; - goto result; - } + if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; goto result; } freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (err != NULL) *err = NULL; if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); @@ -2004,8 +1981,37 @@ result: return is_empty; } +/* Return the anti-affinity score, which is a measure of the amount of + * violations of anti-affinity in the current cluster layout, that is, how + * badly the masters and slaves are distributed in the different IP + * addresses so that slaves of the same master are not in the master + * host and are also in different hosts. + * + * The score is calculated as follows: + * + * SAME_AS_MASTER = 10000 * each slave in the same IP of its master. + * SAME_AS_SLAVE = 1 * each slave having the same IP as another slave + of the same master. + * FINAL_SCORE = SAME_AS_MASTER + SAME_AS_SLAVE + * + * So a greater score means a worse anti-affinity level, while zero + * means perfect anti-affinity. + * + * The anti affinity optimizator will try to get a score as low as + * possible. Since we do not want to sacrifice the fact that slaves should + * not be in the same host as the master, we assign 10000 times the score + * to this violation, so that we'll optimize for the second factor only + * if it does not impact the first one. + * + * The ipnodes argument is an array of clusterManagerNodeArray, one for + * each IP, while ip_count is the total number of IPs in the configuration. + * + * The function returns the above score, and the list of + * offending slaves can be stored into the 'offending' argument, + * so that the optimizer can try changing the configuration of the + * slaves violating the anti-affinity goals. */ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len) + int ip_count, clusterManagerNode ***offending, int *offending_len) { int score = 0, i, j; int node_len = cluster_manager.nodes->len; @@ -2014,7 +2020,10 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); offending_p = *offending; } - for (i = 0; i < ip_len; i++) { + /* For each set of nodes in the same host, split by + * related nodes (masters and slaves which are involved in + * replication of each other) */ + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; @@ -2038,6 +2047,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, else types = sdscat(otypes, "s"); if (types != otypes) dictReplace(related, key, types); } + /* Now it's trivial to check, for each related group having the + * same host, what is their local score. */ dictIterator *iter = dictGetIterator(related); dictEntry *entry; while ((entry = dictNext(iter)) != NULL) { @@ -2048,6 +2059,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); if (offending == NULL) continue; + /* Populate the list of offending nodes. */ listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2069,15 +2081,16 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, } static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len) + int ip_count) { clusterManagerNode **offenders = NULL; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, + NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; - int maxiter = 500 * node_len; + int maxiter = 500 * node_len; // Effort is proportional to cluster size... srand(time(NULL)); while (maxiter > 0) { int offending_len = 0; @@ -2085,9 +2098,14 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(offenders); offenders = NULL; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, + &offenders, &offending_len); - if (score == 0) break; + if (score == 0) break; // Optimal anti affinity reached + /* We'll try to randomly swap a slave's assigned master causing + * an affinity problem with another random slave, to see if we + * can improve the affinity. */ int rand_idx = rand() % offending_len; clusterManagerNode *first = offenders[rand_idx], *second = NULL; @@ -2112,8 +2130,12 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, NULL, NULL); + /* If the change actually makes thing worse, revert. Otherwise + * leave as it is becuase the best solution may need a few + * combined swaps. */ if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2121,7 +2143,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2136,6 +2158,7 @@ cleanup: zfree(offenders); } +/* Return a representable string of the node's slots */ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { sds slots = sdsempty(); int first_range_idx = -1, last_slot_idx = -1, i; @@ -2303,11 +2326,13 @@ cleanup: return success; } +/* Flush the dirty node configuration by calling replicate for slaves or + * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; int is_err = 0, success = 1; - *err = NULL; + if (err != NULL) *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", node->replicate); @@ -2317,14 +2342,15 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { strcpy(*err, reply->str); } success = 0; + /* If the cluster did not already joined it is possible that + * the slave does not know the master node yet. So on errors + * we return ASAP leaving the dirty flag set, to flush the + * config later. */ goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) { - success = 0; - goto cleanup; - } + if (!added || *err != NULL) success = 0; } node->dirty = 0; cleanup: @@ -2342,6 +2368,11 @@ static void clusterManagerWaitForClusterJoin(void) { printf("\n"); } +/* Load node's cluster configuration by calling "CLUSTER NODES" command. + * Node's configuration (name, replicate, slots, ...) is then updated. + * If CLUSTER_MANAGER_OPT_GETFRIENDS flag is set into 'opts' argument, + * and node already knows other nodes, the node's friends list is populated + * with the other nodes info. */ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { @@ -2391,7 +2422,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; currentNode = node; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); //TODO: just while(remaining) && assign p inside the block @@ -2501,7 +2532,6 @@ cleanup: * point. All nodes will be loaded inside the cluster_manager.nodes list. * Warning: if something goes wrong, it will free the starting node before * returning 0. */ - static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -2681,7 +2711,6 @@ static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; int consistent = (listLength(cluster_manager.nodes) <= 1); // If the Cluster has only one node, it's always consistent - // Does it make sense? if (consistent) return 1; sds first_cfg = NULL; listIter li; @@ -2705,6 +2734,13 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static void clusterManagerOnError(sds err) { + if (cluster_manager.errors == NULL) + cluster_manager.errors = listCreate(); + listAddNodeTail(cluster_manager.errors, err); + clusterManagerLogErr("%s\n", (char *) err); +} + static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -2732,7 +2768,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -2761,7 +2797,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } if (n->importing != NULL) { if (open_slots == NULL) @@ -2779,7 +2815,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } } if (open_slots != NULL) { @@ -2808,7 +2844,7 @@ static void clusterManagerCheckCluster(int quiet) { err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } } @@ -2832,6 +2868,53 @@ static void clusterManagerLog(int level, const char* fmt, ...) { if (use_colors) printf("\033[" LOG_COLOR_RESET); } +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int alloc_len) +{ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*)); + array->alloc = array->nodes; + array->len = alloc_len; + array->count = 0; +} + +/* Reset array->nodes to the original array allocation and re-count non-NULL + * nodes. */ +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array) { + if (array->nodes > array->alloc) { + array->len = array->nodes - array->alloc; + array->nodes = array->alloc; + array->count = 0; + int i = 0; + for(; i < array->len; i++) { + if (array->nodes[i] != NULL) array->count++; + } + } +} + +/* Shift array->nodes and store the shifted node into 'nodeptr'. */ +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr) +{ + assert(array->nodes < (array->nodes + array->len)); + /* If the first node to be shifted is not NULL, decrement count. */ + if (*array->nodes != NULL) array->count--; + /* Store the first node to be shifted into 'nodeptr'. */ + *nodeptr = *array->nodes; + /* Shift the nodes array and decrement length. */ + array->nodes++; + array->len--; +} + +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node) +{ + assert(array->nodes < (array->nodes + array->len)); + assert(node != NULL); + assert(array->count < array->len); + array->nodes[array->count++] = node; +} + +/* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2919,7 +3002,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } clusterManagerLogInfo(">>> Performing hash slots allocation " "on %d nodes...\n", node_len); - int interleaved_len = 0, ips_len = 0; + int interleaved_len = 0, ip_count = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); @@ -2929,7 +3012,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; int found = 0; - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { char *ip = ips[i]; if (!strcmp(ip, n->ip)) { found = 1; @@ -2937,19 +3020,19 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } } if (!found) { - ips[ips_len++] = n->ip; + ips[ip_count++] = n->ip; } clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->nodes == NULL) - CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); - CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + clusterManagerNodeArrayInit(node_array, node_len); + clusterManagerNodeArrayAdd(node_array, n); } while (interleaved_len < node_len) { - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->count > 0) { - clusterManagerNode *n; - CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + clusterManagerNode *n = NULL; + clusterManagerNodeArrayShift(node_array, &n); interleaved[interleaved_len++] = n; } } @@ -3019,11 +3102,11 @@ assign_replicas: printf("Adding extra replicas...\n"); goto assign_replicas; } - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + clusterManagerNodeArrayReset(node_array); } - clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); fflush(stdout); @@ -3031,7 +3114,6 @@ assign_replicas: int nread = read(fileno(stdin),buf,4); buf[3] = '\0'; if (nread != 0 && !strcmp("yes", buf)) { - printf("\nFlushing configuration!\n"); listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3128,7 +3210,7 @@ cleanup: zfree(ips); for (i = 0; i < node_len; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + CLUSTER_MANAGER_NODE_ARRAY_FREE(node_array); } zfree(ip_nodes); return success; From 3c665bf6278e5899988b9739f39784848b8cc683 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 10:44:11 +0100 Subject: [PATCH 15/66] Cluster Manager: reshard command, fixed slots parsing bug and other minor bugs. --- src/redis-cli.c | 655 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 593 insertions(+), 62 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b72c31cff..68ae7cfa6 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -69,6 +69,13 @@ #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" #define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 +#define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 + +#define CLUSTER_MANAGER_INVALID_HOST_ARG \ + "Invalid arguments: you need to pass either a valid " \ + "address (ie. 120.0.0.1:7000) or space separated IP " \ + "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) #define CLUSTER_MANAGER_NODE_CONNECT(n) \ @@ -103,9 +110,14 @@ #define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 #define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 #define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_OPT_COLD 1 << 1 +#define CLUSTER_MANAGER_OPT_UPDATE 1 << 2 +#define CLUSTER_MANAGER_OPT_QUIET 1 << 6 +#define CLUSTER_MANAGER_OPT_VERBOSE 1 << 7 #define CLUSTER_MANAGER_LOG_LVL_INFO 1 #define CLUSTER_MANAGER_LOG_LVL_WARN 2 @@ -143,6 +155,11 @@ typedef struct clusterManagerCommand { char **argv; int flags; int replicas; + char *from; + char *to; + int slots; + int timeout; + int pipeline; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1261,6 +1278,19 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { + config.cluster_manager_command.from = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { + config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { + config.cluster_manager_command.slots = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { + config.cluster_manager_command.timeout = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { + config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-yes")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_YES; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1358,7 +1388,7 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" -" --cluster [args...]\n" +" --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" @@ -1729,6 +1759,12 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; +/* Used for reshard table. */ +typedef struct clusterManagerReshardTableItem { + clusterManagerNode *source; + int slot; +} clusterManagerReshardTableItem; + static dictType clusterManagerDictType = { dictSdsHash, /* hash function */ NULL, /* key dup */ @@ -1754,7 +1790,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_count, clusterManagerNode ***offending, int *offending_len); static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); -static sds clusterManagerNodeInfo(clusterManagerNode *node); +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); @@ -1776,6 +1812,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1789,9 +1826,11 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", - "cluster-replicas"}, - {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"reshard", clusterManagerCommandReshard, -1, "host:port", + "from ,to ,slots ,yes,timeout ,pipeline "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1829,6 +1868,38 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +/* Get host ip and port from command arguments. If only one argument has + * been provided it must be in the form of 'ip:port', elsewhere + * the first argument must be the ip and the second one the port. + * If host and port can be detected, it returns 1 and it stores host and + * port into variables referenced by'ip_ptr' and 'port_ptr' pointers, + * elsewhere it returns 0. */ +static int getClusterHostFromCmdArgs(int argc, char **argv, + char **ip_ptr, int *port_ptr) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else return 0; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) return 0; + else { + *ip_ptr = ip; + *port_ptr = port; + } + return 1; +} + static void freeClusterManagerNode(clusterManagerNode *node) { if (node->context != NULL) redisFree(node->context); if (node->friends != NULL) { @@ -2188,8 +2259,12 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } -static sds clusterManagerNodeInfo(clusterManagerNode *node) { +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); + sds spaces = sdsempty(); + int i; + for (i = 0; i < indent; i++) spaces = sdscat(spaces, " "); + if (indent) info = sdscat(info, spaces); int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); char *role = (is_master ? "M" : "S"); sds slots = NULL; @@ -2198,17 +2273,18 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { else { slots = clusterManagerNodeSlotsString(node); info = sdscatfmt(info, "%s: %S %s:%u\n" - " slots:%S (%u slots) " + "%s slots:%S (%u slots) " "", //TODO: flags string - role, node->name, node->ip, node->port, + role, node->name, node->ip, node->port, spaces, slots, node->slots_count); sdsfree(slots); } if (node->replicate != NULL) - info = sdscatfmt(info, "\n replicates %S", node->replicate); + info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); else if (node->replicas_count) - info = sdscatfmt(info, "\n %U additional replica(s)", - node->replicas_count); + info = sdscatfmt(info, "\n%s %U additional replica(s)", + spaces, node->replicas_count); + sdsfree(spaces); return info; } @@ -2218,7 +2294,7 @@ static void clusterManagerShowNodes(void) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; - sds info = clusterManagerNodeInfo(node); + sds info = clusterManagerNodeInfo(node, 0); printf("%s\n", info); sdsfree(info); } @@ -2306,7 +2382,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { - success = 1; + success = 0; goto cleanup; } reply = (redisReply*) _reply; @@ -2326,6 +2402,193 @@ cleanup: return success; } +/* Set slot status to "importing" or "migrating" */ +static int clusterManagerSetSlot(clusterManagerNode *node1, + clusterManagerNode *node2, + int slot, const char *mode, char **err) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " + "SETSLOT %d %s %s", + slot, mode, + (char *) node2->name); + if (err != NULL) *err = NULL; + if (!reply) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + return 0; + } + return 1; +} + +static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int timeout, + int pipeline, int verbose, + char **err) +{ + int success = 1; + while (1) { + redisReply *reply = NULL, *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " + "GETKEYSINSLOT %d %d", slot, + pipeline); + success = (reply != NULL); + if (!success) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + assert(reply->type == REDIS_REPLY_ARRAY); + size_t count = reply->elements; + if (count == 0) { + freeReplyObject(reply); + break; + } + char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + /* Calling MIGRATE command. */ + size_t argc = count + 8; + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + argv[6] = "REPLACE"; + argv_len[6] = 7; + argv[7] = "KEYS"; + argv_len[7] = 4; + for (size_t i = 0; i < count; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + 8; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (verbose) dots[i] = '.'; + } + if (verbose) dots[count] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); + if (!success) goto next; + migrate_reply = (redisReply *) _reply; + if (migrate_reply->type == REDIS_REPLY_ERROR) { + // TODO: Implement fix. + success = 0; + if (err != NULL) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + if (verbose) { + printf("%s", dots); + fflush(stdout); + } +next: + if (reply != NULL) freeReplyObject(reply); + if (migrate_reply != NULL) freeReplyObject(migrate_reply); + zfree(argv); + zfree(argv_len); + if (!success) break; + } + return success; +} + +/* Move slots between source and target nodes using MIGRATE. + * + * Options: + * CLUSTER_MANAGER_OPT_VERBOSE -- Print a dot for every moved key. + * CLUSTER_MANAGER_OPT_COLD -- Move keys without opening slots / + * reconfiguring the nodes. + * CLUSTER_MANAGER_OPT_UPDATE -- Update node->slots for source/target nodes. + * CLUSTER_MANAGER_OPT_QUIET -- Don't print info messages. +*/ +static int clusterManagerMoveSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int opts, char**err) +{ + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) { + printf("Moving slot %d from %s:%d to %s:%d: ", slot, source->ip, + source->port, target->ip, target->port); + fflush(stdout); + } + if (err != NULL) *err = NULL; + int pipeline = config.cluster_manager_command.pipeline, + timeout = config.cluster_manager_command.timeout, + print_dots = (opts & CLUSTER_MANAGER_OPT_VERBOSE), + option_cold = (opts & CLUSTER_MANAGER_OPT_COLD), + success = 1; + if (!option_cold) { + success = clusterManagerSetSlot(target, source, slot, + "importing", err); + if (!success) return 0; + success = clusterManagerSetSlot(source, target, slot, + "migrating", err); + if (!success) return 0; + } + success = clusterManagerMigrateKeysInSlot(source, target, slot, timeout, + pipeline, print_dots, err); + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) printf("\n"); + if (!success) return 0; + /* Set the new node as the owner of the slot in all the known nodes. */ + if (!option_cold) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER " + "SETSLOT %d %s %s", + slot, "node", + target->name); + success = (r != NULL); + if (!success) return 0; + if (r->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + } + } + freeReplyObject(r); + if (!success) return 0; + } + } + /* Update the node logical config */ + if (opts & CLUSTER_MANAGER_OPT_UPDATE) { + source->slots[slot] = 0; + target->slots[slot] = 1; + } + return 1; +} + /* Flush the dirty node configuration by calling replicate for slaves or * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { @@ -2425,20 +2688,24 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; remaining -= (p - line); char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds dst = sdsnew(p); node->migrating_count += 2; @@ -2451,6 +2718,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } else if ((p = strstr(slotsdef, "-<-"))) {//Importing *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds src = sdsnew(p); node->importing_count += 2; @@ -2605,8 +2874,9 @@ invalid_friend: if (n->replicate != NULL) { clusterManagerNode *master = clusterManagerNodeByName(n->replicate); if (master == NULL) { - printf("*** WARNING: %s:%d claims to be slave of unknown " - "node ID %s.\n", n->ip, n->port, n->replicate); + clusterManagerLogWarn("*** WARNING: %s:%d claims to be " + "slave of unknown node ID %s.\n", + n->ip, n->port, n->replicate); } else master->replicas_count++; } } @@ -2619,6 +2889,12 @@ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { return strcmp(*i1, *i2); } +int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node2->slots_count - node1->slots_count; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -2651,16 +2927,18 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { if (remaining == 0) continue; char **slots = NULL; int c = 0; - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; int size = (p - line); remaining -= size; tot_size += size; char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] != '[') { c++; slots = zrealloc(slots, (c * sizeof(char *))); @@ -2792,7 +3070,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->migrating_count; i += 2) { sds slot = n->migrating[i]; - dictAdd(open_slots, slot, n->migrating[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->migrating[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2810,7 +3088,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->importing_count; i += 2) { sds slot = n->importing[i]; - dictAdd(open_slots, slot, n->importing[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->importing[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2848,6 +3126,76 @@ static void clusterManagerCheckCluster(int quiet) { } } +static clusterManagerNode *clusterNodeForResharding(char *id, + clusterManagerNode *target, + int *raise_err) +{ + clusterManagerNode *node = NULL; + const char *invalid_node_msg = "*** The specified node is not known or " + "not a master, please retry.\n"; + node = clusterManagerNodeByName(id); + *raise_err = 0; + if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { + clusterManagerLogErr(invalid_node_msg); + *raise_err = 1; + return NULL; + } else if (node != NULL && target != NULL) { + if (!strcmp(node->name, target->name)) { + clusterManagerLogErr( "*** It is not possible to use " + "the target node as " + "source node.\n"); + return NULL; + } + } + return node; +} + +static list *clusterManagerComputeReshardTable(list *sources, int numslots) { + list *moved = listCreate(); + int src_count = listLength(sources), i = 0, tot_slots = 0, j; + clusterManagerNode **sorted = zmalloc(src_count * sizeof(**sorted)); + listIter li; + listNode *ln; + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + tot_slots += node->slots_count; + sorted[i++] = node; + } + qsort(sorted, src_count, sizeof(clusterManagerNode *), + clusterManagerSlotCountCompareDesc); + for (i = 0; i < src_count; i++) { + clusterManagerNode *node = sorted[i]; + float n = ((float) numslots / tot_slots * node->slots_count); + if (i == 0) n = ceil(n); + else n = floor(n); + int max = (int) n, count = 0; + for (j = 0; j < CLUSTER_MANAGER_SLOTS; j++) { + int slot = node->slots[j]; + if (!slot) continue; + if (count >= max || (int)listLength(moved) >= numslots) break; + clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + item->source = node; + item->slot = j; + listAddNodeTail(moved, item); + count++; + } + } + zfree(sorted); + return moved; +} + +static void clusterManagerShowReshardTable(list *table) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + clusterManagerNode *n = item->source; + printf(" Moving slot %d from %s\n", item->slot, (char *) n->name); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3219,59 +3567,218 @@ cleanup: static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } static int clusterManagerCommandCheck(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); clusterManagerCheckCluster(0); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandReshard(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerCheckCluster(0); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + fflush(stdout); + fprintf(stderr, + "*** Please fix your cluster problems before resharding\n"); + return 0; + } + int slots = config.cluster_manager_command.slots; + if (!slots) { + while (slots <= 0 || slots > CLUSTER_MANAGER_SLOTS) { + printf("How many slots do you want to move (from 1 to %d)? ", + CLUSTER_MANAGER_SLOTS); + fflush(stdout); + char buf[6]; + int nread = read(fileno(stdin),buf,6); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + slots = atoi(buf); + } + } + char buf[255]; + char *to = config.cluster_manager_command.to, + *from = config.cluster_manager_command.from; + while (to == NULL) { + printf("What is the receiving node ID? "); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (strlen(buf) > 0) to = buf; + } + int raise_err = 0; + clusterManagerNode *target = clusterNodeForResharding(to, NULL, &raise_err); + if (target == NULL) return 0; + list *sources = listCreate(); + list *table = NULL; + int all = 0, result = 1; + if (from == NULL) { + printf("Please enter all the source node IDs.\n"); + printf(" Type 'all' to use all the nodes as source nodes for " + "the hash slots.\n"); + printf(" Type 'done' once you entered all the source nodes IDs.\n"); + while (1) { + printf("Source node #%lu: ", listLength(sources) + 1); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (!strcmp(buf, "done")) break; + else if (!strcmp(buf, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(buf, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + } else { + char *p; + while((p = strchr(from, ',')) != NULL) { + *p = '\0'; + if (!strcmp(from, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + from = p + 1; + } + /* Check if there's still another source to process. */ + if (!all && strlen(from) > 0) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + listIter li; + listNode *ln; + if (all) { + listEmpty(sources); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!sdscmp(n->name, target->name)) continue; + listAddNodeTail(sources, n); + } + } + if (listLength(sources) == 0) { + fprintf(stderr, "*** No source nodes given, operation aborted.\n"); + result = 0; + goto cleanup; + } + printf("\nReady to move %d slots.\n", slots); + printf(" Source nodes:\n"); + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *src = ln->value; + sds info = clusterManagerNodeInfo(src, 4); + printf("%s\n", info); + sdsfree(info); + } + printf(" Destination node:\n"); + sds info = clusterManagerNodeInfo(target, 4); + printf("%s\n", info); + sdsfree(info); + table = clusterManagerComputeReshardTable(sources, slots); + printf(" Resharding plan:\n"); + clusterManagerShowReshardTable(table); + if (!(config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_YES)) + { + printf("Do you want to proceed with the proposed " + "reshard plan (yes/no)? "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread <= 0 || strcmp("yes", buf) != 0) { + result = 0; + goto cleanup; + } + } + int opts = CLUSTER_MANAGER_OPT_VERBOSE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + char *err = NULL; + result = clusterManagerMoveSlot(item->source, target, item->slot, + opts, &err); + if (!result) { + if (err != NULL) { + clusterManagerLogErr("\n%s\n", err); + zfree(err); + } + goto cleanup; + } + } +cleanup: + listRelease(sources); + if (table) { + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } @@ -3332,13 +3839,32 @@ static int clusterManagerCommandHelp(int argc, char **argv) { sizeof(clusterManagerCommandDef); int i = 0, j; fprintf(stderr, "Cluster Manager Commands:\n"); + int padding = 15; for (; i < commands_count; i++) { clusterManagerCommandDef *def = &(clusterManagerCommands[i]); - int namelen = strlen(def->name), padlen = 15 - namelen; + int namelen = strlen(def->name), padlen = padding - namelen; fprintf(stderr, " %s", def->name); for (j = 0; j < padlen; j++) fprintf(stderr, " "); fprintf(stderr, "%s\n", (def->args ? def->args : "")); - //TODO: if (def->options) + if (def->options != NULL) { + int optslen = strlen(def->options); + char *p = def->options, *eos = p + optslen; + char *comma = NULL; + while ((comma = strchr(p, ',')) != NULL) { + int deflen = (int)(comma - p); + char buf[255]; + memcpy(buf, p, deflen); + buf[deflen] = '\0'; + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", buf); + p = comma + 1; + if (p >= eos) break; + } + if (p < eos) { + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", p); + } + } } return 0; } @@ -4640,6 +5166,11 @@ int main(int argc, char **argv) { config.cluster_manager_command.argv = NULL; config.cluster_manager_command.flags = 0; config.cluster_manager_command.replicas = 0; + config.cluster_manager_command.from = NULL; + config.cluster_manager_command.to = NULL; + config.cluster_manager_command.slots = 0; + config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; + config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; pref.hints = 1; spectrum_palette = spectrum_palette_color; From 9fe244f1e2a77b736420d368dda39ff29e02e104 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 11:49:10 +0100 Subject: [PATCH 16/66] Fixed memory write error in clusterManagerGetConfigSignature --- src/redis-cli.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 68ae7cfa6..366c36fad 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2295,7 +2295,7 @@ static void clusterManagerShowNodes(void) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; sds info = clusterManagerNodeInfo(node, 0); - printf("%s\n", info); + printf("%s\n", (char *) info); sdsfree(info); } } @@ -2916,8 +2916,8 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { line = p + 1; if (i == 0) { nodename = token; - tot_size = p - token; - name_len = tot_size; + tot_size = (p - token); + name_len = tot_size++; // Make room for ':' in tot_size } else if (i == 8) break; i++; } @@ -2951,6 +2951,7 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { node_count++; node_configs = zrealloc(node_configs, (node_count * sizeof(char *))); + /* Make room for '|' separators. */ tot_size += (sizeof(char) * (c - 1)); char *cfg = zmalloc((sizeof(char) * tot_size) + 1); memcpy(cfg, nodename, name_len); @@ -3760,7 +3761,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { opts, &err); if (!result) { if (err != NULL) { - clusterManagerLogErr("\n%s\n", err); + //clusterManagerLogErr("\n%s\n", err); zfree(err); } goto cleanup; From e5ffa66b1fee11582d8c09ab4ea9cad0d392a8e2 Mon Sep 17 00:00:00 2001 From: Artix Date: Wed, 28 Feb 2018 15:21:08 +0100 Subject: [PATCH 17/66] Cluster Manager: fixed some memory error --- src/redis-cli.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 366c36fad..64ec48b5d 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2412,14 +2412,19 @@ static int clusterManagerSetSlot(clusterManagerNode *node1, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; + int success = 1; if (reply->type == REDIS_REPLY_ERROR) { + success = 0; if (err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node1, err); } - return 0; + goto cleanup; } - return 1; +cleanup: + freeReplyObject(reply); + return success; } static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, @@ -3175,7 +3180,7 @@ static list *clusterManagerComputeReshardTable(list *sources, int numslots) { int slot = node->slots[j]; if (!slot) continue; if (count >= max || (int)listLength(moved) >= numslots) break; - clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + clusterManagerReshardTableItem *item = zmalloc(sizeof(*item)); item->source = node; item->slot = j; listAddNodeTail(moved, item); From 2ede60d23625c84f21a3002c504c0f34f7c5a67e Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 2 Mar 2018 17:06:50 +0100 Subject: [PATCH 18/66] ClusterManager: fixed --cluster-from 'all' parsing --- src/redis-cli.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 64ec48b5d..fe73f4a46 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -3137,12 +3137,12 @@ static clusterManagerNode *clusterNodeForResharding(char *id, int *raise_err) { clusterManagerNode *node = NULL; - const char *invalid_node_msg = "*** The specified node is not known or " - "not a master, please retry.\n"; + const char *invalid_node_msg = "*** The specified node (%s) is not known " + "or not a master, please retry.\n"; node = clusterManagerNodeByName(id); *raise_err = 0; if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { - clusterManagerLogErr(invalid_node_msg); + clusterManagerLogErr(invalid_node_msg, id); *raise_err = 1; return NULL; } else if (node != NULL && target != NULL) { @@ -3700,12 +3700,15 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } /* Check if there's still another source to process. */ if (!all && strlen(from) > 0) { - clusterManagerNode *src = - clusterNodeForResharding(from, target, &raise_err); - if (src != NULL) listAddNodeTail(sources, src); - else if (raise_err) { - result = 0; - goto cleanup; + if (!strcmp(from, "all")) all = 1; + if (!all) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } } } } From 919b80c019361b731c81b44faffb4a2cbf663c15 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 6 Mar 2018 13:06:04 +0200 Subject: [PATCH 19/66] clusterManagerAddSlots: changed the way ADDSLOTS command is built --- src/redis-cli.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index fe73f4a46..e2b1fb2f5 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2354,32 +2354,28 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) redisReply *reply = NULL; void *_reply = NULL; int is_err = 0, success = 1; - int argc; - sds *argv = NULL; - size_t *argvlen = NULL; + /* First two args are used for the command itself. */ + int argc = node->slots_count + 2; + sds *argv = zmalloc(argc * sizeof(*argv)); + size_t *argvlen = zmalloc(argc * sizeof(*argvlen)); + argv[0] = "CLUSTER"; + argv[1] = "ADDSLOTS"; + argvlen[0] = 7; + argvlen[1] = 8; *err = NULL; - sds cmd = sdsnew("CLUSTER ADDSLOTS "); - int i, added = 0; + int i, argv_idx = 2; for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { - int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (argv_idx >= argc) break; if (node->slots[i]) { - char *fmt = (!last_slot ? "%u " : "%u"); - cmd = sdscatfmt(cmd, fmt, i); - added++; + argv[argv_idx] = sdsfromlonglong((long long) i); + argvlen[argv_idx] = sdslen(argv[argv_idx]); + argv_idx++; } } - if (!added) { + if (!argv_idx) { success = 0; goto cleanup; } - argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) { - success = 0; - goto cleanup; - } - argvlen = zmalloc(argc*sizeof(size_t)); - for (i = 0; i < argc; i++) - argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { success = 0; @@ -2395,9 +2391,11 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } cleanup: - sdsfree(cmd); zfree(argvlen); - if (argv != NULL) sdsfreesplitres(argv,argc); + if (argv != NULL) { + for (i = 2; i < argc; i++) sdsfree(argv[i]); + zfree(argv); + } if (reply != NULL) freeReplyObject(reply); return success; } From 4c5419aa91193f52efc95edaee49eb96676fb85a Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 23 Mar 2018 16:46:43 +0100 Subject: [PATCH 20/66] Cluster Manager: rebalance command --- src/redis-cli.c | 297 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 286 insertions(+), 11 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index e2b1fb2f5..69ba39acc 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -71,6 +71,7 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 +#define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ "Invalid arguments: you need to pass either a valid " \ @@ -108,10 +109,13 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 -#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 -#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 +#define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 +#define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 +#define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -157,9 +161,12 @@ typedef struct clusterManagerCommand { int replicas; char *from; char *to; + char **weight; + int weight_argc; int slots; int timeout; int pipeline; + float threshold; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -206,6 +213,7 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + int verbose; clusterManagerCommand cluster_manager_command; } config; @@ -1266,6 +1274,8 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--verbose")) { + config.verbose = 1; } else if (!strcmp(argv[i],"--cluster") && !lastarg) { if (CLUSTER_MANAGER_MODE()) usage(); char *cmd = argv[++i]; @@ -1282,15 +1292,35 @@ static int parseOptions(int argc, char **argv) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-weight") && !lastarg) { + int widx = i + 1; + char **weight = argv + widx; + int wargc = 0; + for (; widx < argc; widx++) { + if (strstr(argv[widx], "--") == argv[widx]) break; + wargc++; + } + if (wargc > 0) { + config.cluster_manager_command.weight = weight; + config.cluster_manager_command.weight_argc = wargc; + } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { config.cluster_manager_command.timeout = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-threshold") && !lastarg) { + config.cluster_manager_command.threshold = atof(argv[++i]); } else if (!strcmp(argv[i],"--cluster-yes")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_YES; + } else if (!strcmp(argv[i],"--cluster-simulate")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1390,6 +1420,7 @@ static void usage(void) { " are not rolled back from the server memory.\n" " --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" +" --verbose Verbose mode.\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" @@ -1749,6 +1780,8 @@ typedef struct clusterManagerNode { sds *importing; int migrating_count; int importing_count; + float weight; /* Weight used by rebalance */ + int balance; /* Used by rebalance */ } clusterManagerNode; /* Data structure used to represent a sequence of nodes. */ @@ -1780,6 +1813,7 @@ typedef int clusterManagerCommandProc(int argc, char **argv); static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static clusterManagerNode *clusterManagerNodeByName(const char *name); +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1813,6 +1847,7 @@ static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); +static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1831,6 +1866,9 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, + {"rebalance", clusterManagerCommandRebalance, -1, "host:port", + "weight ,use-empty-masters," + "timeout ,simulate,pipeline ,threshold "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1970,10 +2008,13 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; + node->weight = 1.0f; + node->balance = 0; clusterManagerNodeResetSlots(node); return node; } +/* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -1994,6 +2035,32 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } +/* Like get_node_by_name but the specified name can be just the first + * part of the node ID as long as the prefix in unique across the + * cluster. + */ +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char*name) +{ + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && + strstr(n->name, lcname) == n->name) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static void clusterManagerNodeResetSlots(clusterManagerNode *node) { memset(node->slots, 0, sizeof(node->slots)); node->slots_count = 0; @@ -2898,6 +2965,12 @@ int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { return node2->slots_count - node1->slots_count; } +int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node1->balance - node2->balance; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -3200,6 +3273,19 @@ static void clusterManagerShowReshardTable(list *table) { } } +static void clusterManagerReleaseReshardTable(list *table) { + if (table != NULL) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3775,14 +3861,199 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } cleanup: listRelease(sources); - if (table) { - listRewind(table, &li); - while ((ln = listNext(&li)) != NULL) { - clusterManagerReshardTableItem *item = ln->value; - zfree(item); - } - listRelease(table); + clusterManagerReleaseReshardTable(table); + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandRebalance(int argc, char **argv) { + int port = 0; + char *ip = NULL; + clusterManagerNode **weightedNodes = NULL; + list *involved = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int result = 1, i; + if (config.cluster_manager_command.weight != NULL) { + for (i = 0; i < config.cluster_manager_command.weight_argc; i++) { + char *name = config.cluster_manager_command.weight[i]; + char *p = strchr(name, '='); + if (p == NULL) { + result = 0; + goto cleanup; + } + *p = '\0'; + float w = atof(++p); + clusterManagerNode *n = clusterManagerNodeByAbbreviatedName(name); + if (n == NULL) { + clusterManagerLogErr("*** No such master node %s\n", name); + result = 0; + goto cleanup; + } + n->weight = w; + } } + float total_weight = 0; + int nodes_involved = 0; + int use_empty = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; + + involved = listCreate(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + /* Compute the total cluster weight. */ + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!use_empty && n->slots_count == 0) { + n->weight = 0; + continue; + } + total_weight += n->weight; + nodes_involved++; + listAddNodeTail(involved, n); + } + weightedNodes = zmalloc(nodes_involved * + sizeof(clusterManagerNode *)); + if (weightedNodes == NULL) goto cleanup; + /* Check cluster, only proceed if it looks sane. */ + clusterManagerCheckCluster(1); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + clusterManagerLogErr("*** Please fix your cluster problems " + "before rebalancing" ); + result = 0; + goto cleanup; + } + /* Calculate the slots balance for each node. It's the number of + * slots the node should lose (if positive) or gain (if negative) + * in order to be balanced. */ + int threshold_reached = 0, total_balance = 0; + float threshold = config.cluster_manager_command.threshold; + i = 0; + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + weightedNodes[i++] = n; + int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + (int) n->weight); + n->balance = n->slots_count - expected; + total_balance += n->balance; + /* Compute the percentage of difference between the + * expected number of slots and the real one, to see + * if it's over the threshold specified by the user. */ + int over_threshold = 0; + if (config.cluster_manager_command.threshold > 0) { + if (n->slots_count > 0) { + float err_perc = fabs((100-(100.0*expected/n->slots_count))); + if (err_perc > threshold) over_threshold = 1; + } else if (expected > 1) { + over_threshold = 1; + } + } + if (over_threshold) threshold_reached = 1; + } + if (!threshold_reached) { + clusterManagerLogErr("*** No rebalancing needed! " + "All nodes are within the %.2f%% threshold.\n", + config.cluster_manager_command.threshold); + result = 0; + goto cleanup; + } + /* Because of rounding, it is possible that the balance of all nodes + * summed does not give 0. Make sure that nodes that have to provide + * slots are always matched by nodes receiving slots. */ + while (total_balance > 0) { + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->balance < 0 && total_balance > 0) { + n->balance--; + total_balance--; + } + } + } + /* Sort nodes by their slots balance. */ + qsort(weightedNodes, nodes_involved, sizeof(clusterManagerNode *), + clusterManagerCompareNodeBalance); + clusterManagerLogInfo(">>> Rebalancing across %d nodes. " + "Total weight = %.2f\n", + nodes_involved, total_weight); + if (config.verbose) { + for (i = 0; i < nodes_involved; i++) { + clusterManagerNode *n = weightedNodes[i]; + printf("%s:%d balance is %d slots\n", n->ip, n->port, n->balance); + } + } + /* Now we have at the start of the 'sn' array nodes that should get + * slots, at the end nodes that must give slots. + * We take two indexes, one at the start, and one at the end, + * incrementing or decrementing the indexes accordingly til we + * find nodes that need to get/provide slots. */ + int dst_idx = 0; + int src_idx = nodes_involved - 1; + int simulate = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + while (dst_idx < src_idx) { + clusterManagerNode *dst = weightedNodes[dst_idx]; + clusterManagerNode *src = weightedNodes[src_idx]; + int db = abs(dst->balance); + int sb = abs(src->balance); + int numslots = (db < sb ? db : sb); + if (numslots > 0) { + printf("Moving %d slots from %s:%d to %s:%d\n", numslots, + src->ip, + src->port, + dst->ip, + dst->port); + /* Actaully move the slots. */ + list *lsrc = listCreate(), *table = NULL; + listAddNodeTail(lsrc, src); + table = clusterManagerComputeReshardTable(lsrc, numslots); + listRelease(lsrc); + int table_len = (int) listLength(table); + if (!table || table_len != numslots) { + clusterManagerLogErr("*** Assertio failed: Reshard table " + "!= number of slots"); + result = 0; + goto end_move; + } + if (simulate) { + for (i = 0; i < table_len; i++) printf("#"); + } else { + int opts = CLUSTER_MANAGER_OPT_QUIET | + CLUSTER_MANAGER_OPT_UPDATE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + result = clusterManagerMoveSlot(item->source, + dst, + item->slot, + opts, NULL); + if (!result) goto end_move; + printf("#"); + fflush(stdout); + } + + } + printf("\n"); +end_move: + clusterManagerReleaseReshardTable(table); + if (!result) goto cleanup; + } + /* Update nodes balance. */ + dst->balance += numslots; + src->balance -= numslots; + if (dst->balance == 0) dst_idx++; + if (src->balance == 0) src_idx --; + } +cleanup: + if (involved != NULL) listRelease(involved); + if (weightedNodes != NULL) zfree(weightedNodes); return result; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5168,6 +5439,7 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; + config.verbose = 0; config.cluster_manager_command.name = NULL; config.cluster_manager_command.argc = 0; config.cluster_manager_command.argv = NULL; @@ -5175,9 +5447,12 @@ int main(int argc, char **argv) { config.cluster_manager_command.replicas = 0; config.cluster_manager_command.from = NULL; config.cluster_manager_command.to = NULL; + config.cluster_manager_command.weight = NULL; config.cluster_manager_command.slots = 0; config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; + config.cluster_manager_command.threshold = + CLUSTER_MANAGER_REBALANCE_THRESHOLD; pref.hints = 1; spectrum_palette = spectrum_palette_color; From 420fc2e42aebe9452c97327b85eaad8cb713d1ac Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 6 Apr 2018 18:02:40 +0200 Subject: [PATCH 21/66] Cluster Manager: fix command. --- src/redis-cli.c | 715 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 642 insertions(+), 73 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 69ba39acc..8af1130c3 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -151,6 +151,7 @@ static uint64_t dictSdsHash(const void *key); static int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2); static void dictSdsDestructor(void *privdata, void *val); +static void dictListDestructor(void *privdata, void *val); /* Cluster Manager Command Info */ typedef struct clusterManagerCommand { @@ -406,6 +407,12 @@ static void dictSdsDestructor(void *privdata, void *val) sdsfree(val); } +void dictListDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + listRelease((list*)val); +} + /* _serverAssert is needed by dict */ void _serverAssert(const char *estr, const char *file, int line) { fprintf(stderr, "=== ASSERTION FAILED ==="); @@ -1446,6 +1453,15 @@ static void usage(void) { exit(1); } +static int confirmWithYes(char *msg) { + printf("%s (type 'yes' to accept): ", msg); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + return (nread != 0 && !strcmp("yes", buf)); +} + /* Turn the plain C strings into Sds strings */ static char **convertToSds(int count, char** args) { int j; @@ -1751,7 +1767,7 @@ static int evalMode(int argc, char **argv) { } /*------------------------------------------------------------------------------ - * Cluster Manager mode + * Cluster Manager *--------------------------------------------------------------------------- */ /* The Cluster Manager global structure */ @@ -1760,6 +1776,9 @@ static struct clusterManager { list *errors; } cluster_manager; +/* Used by clusterManagerFixSlotsCoverage */ +dict *clusterManagerUncoveredSlots = NULL; + typedef struct clusterManagerNode { redisContext *context; sds name; @@ -1776,10 +1795,12 @@ typedef struct clusterManagerNode { int slots_count; int replicas_count; list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; + sds *migrating; /* An array of sds where even strings are slots and odd + * strings are the destination node IDs. */ + sds *importing; /* An array of sds where even strings are slots and odd + * strings are the source node IDs. */ + int migrating_count; /* Length of the migrating array (migrating slots*2) */ + int importing_count; /* Length of the importing array (importing slots*2) */ float weight; /* Weight used by rebalance */ int balance; /* Used by rebalance */ } clusterManagerNode; @@ -1829,7 +1850,7 @@ static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); +static int clusterManagerCheckCluster(int quiet); static void clusterManagerLog(int level, const char* fmt, ...); static int clusterManagerIsConfigConsistent(void); static void clusterManagerOnError(sds err); @@ -1846,6 +1867,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); @@ -1863,6 +1885,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, @@ -1988,6 +2011,8 @@ static void freeClusterManager(void) { listRelease(cluster_manager.errors); cluster_manager.errors = NULL; } + if (clusterManagerUncoveredSlots != NULL) + dictRelease(clusterManagerUncoveredSlots); } static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { @@ -2013,6 +2038,38 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } +/* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the + * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * of reply error (it's up to the caller function to free it), elsewhere + * the error is directly printed. */ +static int clusterManagerCheckRedisReply(clusterManagerNode *n, + redisReply *r, char **err) +{ + int is_err = 0; + if (!r || (is_err = (r->type == REDIS_REPLY_ERROR))) { + if (is_err) { + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, r->str); + } + return 0; + } + return 1; +} + +static void clusterManagerRemoveNodeFromList(list *nodelist, + clusterManagerNode *node) { + listIter li; + listNode *ln; + listRewind(nodelist, &li); + while ((ln = listNext(&li)) != NULL) { + if (node == ln->value) { + listDelNode(nodelist, ln); + break; + } + } +} /* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { @@ -2470,10 +2527,10 @@ cleanup: /* Set slot status to "importing" or "migrating" */ static int clusterManagerSetSlot(clusterManagerNode *node1, clusterManagerNode *node2, - int slot, const char *mode, char **err) { + int slot, const char *status, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " "SETSLOT %d %s %s", - slot, mode, + slot, status, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; @@ -2492,6 +2549,70 @@ cleanup: return success; } +/* Migrate keys taken from reply->elements. It returns the reply from the + * MIGRATE command, or NULL if something goes wrong. If the argument 'dots' + * is not NULL, a dot will be printed for every migrated key. */ +static redisReply *clusterManagerMigrateKeysInReply(clusterManagerNode *source, + clusterManagerNode *target, + redisReply *reply, + int replace, int timeout, + char *dots) +{ + redisReply *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + int c = (replace ? 8 : 7); + size_t argc = c + reply->elements; + size_t i, offset = 6; // Keys Offset + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + if (replace) { + argv[offset] = "REPLACE"; + argv_len[offset] = 7; + offset++; + } + argv[offset] = "KEYS"; + argv_len[offset] = 4; + offset++; + for (i = 0; i < reply->elements; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + offset; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (dots) dots[i] = '.'; + } + if (dots) dots[reply->elements] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + int success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (i = 0; i < reply->elements; i++) sdsfree(argv[i + offset]); + if (!success) goto cleanup; + migrate_reply = (redisReply *) _reply; +cleanup: + zfree(argv); + zfree(argv_len); + return migrate_reply; +} + +/* Migrate all keys in the given slot from source to target.*/ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, clusterManagerNode *target, int slot, int timeout, @@ -2499,10 +2620,11 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, char **err) { int success = 1; + int do_fix = (config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX); while (1) { + char *dots = NULL; redisReply *reply = NULL, *migrate_reply = NULL; - char **argv = NULL; - size_t *argv_len = NULL; reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " "GETKEYSINSLOT %d %d", slot, pipeline); @@ -2523,57 +2645,37 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, freeReplyObject(reply); break; } - char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + if (verbose) dots = zmalloc((count+1) * sizeof(char)); /* Calling MIGRATE command. */ - size_t argc = count + 8; - argv = zcalloc(argc * sizeof(char *)); - argv_len = zcalloc(argc * sizeof(size_t)); - char portstr[255]; - char timeoutstr[255]; - snprintf(portstr, 10, "%d", target->port); - snprintf(timeoutstr, 10, "%d", timeout); - argv[0] = "MIGRATE"; - argv_len[0] = 7; - argv[1] = target->ip; - argv_len[1] = strlen(target->ip); - argv[2] = portstr; - argv_len[2] = strlen(portstr); - argv[3] = ""; - argv_len[3] = 0; - argv[4] = "0"; - argv_len[4] = 1; - argv[5] = timeoutstr; - argv_len[5] = strlen(timeoutstr); - argv[6] = "REPLACE"; - argv_len[6] = 7; - argv[7] = "KEYS"; - argv_len[7] = 4; - for (size_t i = 0; i < count; i++) { - redisReply *entry = reply->element[i]; - size_t idx = i + 8; - assert(entry->type == REDIS_REPLY_STRING); - argv[idx] = (char *) sdsnew(entry->str); - argv_len[idx] = entry->len; - if (verbose) dots[i] = '.'; - } - if (verbose) dots[count] = '\0'; - void *_reply = NULL; - redisAppendCommandArgv(source->context,argc, - (const char**)argv,argv_len); - success = (redisGetReply(source->context, &_reply) == REDIS_OK); - for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); - if (!success) goto next; - migrate_reply = (redisReply *) _reply; + migrate_reply = clusterManagerMigrateKeysInReply(source, target, + reply, 0, timeout, + dots); + if (migrate_reply == NULL) goto next; if (migrate_reply->type == REDIS_REPLY_ERROR) { - // TODO: Implement fix. - success = 0; - if (err != NULL) { - *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); - strcpy(*err, migrate_reply->str); - printf("\n"); - CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + if (do_fix && strstr(migrate_reply->str, "BUSYKEY")) { + clusterManagerLogWarn("*** Target key exists. " + "Replacing it for FIX.\n"); + freeReplyObject(migrate_reply); + /* Try to migrate keys adding REPLACE option. */ + migrate_reply = clusterManagerMigrateKeysInReply(source, + target, + reply, + 1, timeout, + NULL); + success = (migrate_reply != NULL && + migrate_reply->type != REDIS_REPLY_ERROR); + } else success = 0; + if (!success) { + if (migrate_reply != NULL) { + if (err) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + } + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; } - goto next; } if (verbose) { printf("%s", dots); @@ -2582,8 +2684,7 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, next: if (reply != NULL) freeReplyObject(reply); if (migrate_reply != NULL) freeReplyObject(migrate_reply); - zfree(argv); - zfree(argv_len); + if (dots) zfree(dots); if (!success) break; } return success; @@ -2729,6 +2830,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, *link_status = NULL; + UNUSED(link_status); int i = 0; while ((p = strchr(line, ' ')) != NULL) { *p = '\0'; @@ -2974,11 +3076,11 @@ int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; + char **node_configs = NULL; redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); if (reply == NULL || reply->type == REDIS_REPLY_ERROR) goto cleanup; char *lines = reply->str, *p, *line; - char **node_configs = NULL; while ((p = strstr(lines, "\n")) != NULL) { i = 0; *p = '\0'; @@ -3057,8 +3159,10 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { } cleanup: if (reply != NULL) freeReplyObject(reply); - for (i = 0; i < node_count; i++) zfree(node_configs[i]); - zfree(node_configs); + if (node_configs != NULL) { + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + } return signature; } @@ -3114,9 +3218,453 @@ static int clusterManagerGetCoveredSlots(char *all_slots) { return totslots; } -static void clusterManagerCheckCluster(int quiet) { +static void clusterManagerPrintSlotsList(list *slots) { + listIter li; + listNode *ln; + listRewind(slots, &li); + sds first = NULL; + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + if (!first) first = slot; + else printf(", "); + printf("%s", slot); + } + printf("\n"); +} + +/* Return the node, among 'nodes' with the greatest number of keys + * in the specified slot. */ +static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, + int slot, + char **err) +{ + clusterManagerNode *node = NULL; + int numkeys = 0; + listIter li; + listNode *ln; + listRewind(nodes, &li); + if (err) *err = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *r = + CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOTi %d", slot); + int success = clusterManagerCheckRedisReply(n, r, err); + if (success) { + if (r->integer > numkeys || node == NULL) { + numkeys = r->integer; + node = n; + } + } + if (r != NULL) freeReplyObject(r); + /* If the reply contains errors */ + if (!success) { + if (err != NULL && *err != NULL) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + node = NULL; + break; + } + } + return node; +} + +static int clusterManagerFixSlotsCoverage(char *all_slots) { + int i, fixed = 0; + list *none = NULL, *single = NULL, *multi = NULL; + clusterManagerLogInfo(">>> Fixing slots coverage...\n"); + printf("List of not covered slots: \n"); + int uncovered_count = 0; + sds log = sdsempty(); + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int covered = all_slots[i]; + if (!covered) { + sds key = sdsfromlonglong((long long) i); + if (uncovered_count++ > 0) printf(","); + printf("%s", (char *) key); + list *slot_nodes = listCreate(); + sds slot_nodes_str = sdsempty(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", i, 1); + if (!clusterManagerCheckRedisReply(n, reply, NULL)) { + fixed = -1; + if (reply) freeReplyObject(reply); + goto cleanup; + } + assert(reply->type == REDIS_REPLY_ARRAY); + if (reply->elements > 0) { + listAddNodeTail(slot_nodes, n); + if (listLength(slot_nodes) > 1) + slot_nodes_str = sdscat(slot_nodes_str, ", "); + slot_nodes_str = sdscatfmt(slot_nodes_str, + "%s:%u", n->ip, n->port); + } + freeReplyObject(reply); + } + log = sdscatfmt(log, "\nSlot %S has keys in %u nodes: %S", + key, listLength(slot_nodes), slot_nodes_str); + sdsfree(slot_nodes_str); + dictAdd(clusterManagerUncoveredSlots, key, slot_nodes); + } + } + printf("\n%s\n", log); + /* For every slot, take action depending on the actual condition: + * 1) No node has keys for this slot. + * 2) A single node has keys for this slot. + * 3) Multiple nodes have keys for this slot. */ + none = listCreate(); + single = listCreate(); + multi = listCreate(); + dictIterator *iter = dictGetIterator(clusterManagerUncoveredSlots); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + list *nodes = (list *) dictGetVal(entry); + switch (listLength(nodes)){ + case 0: listAddNodeTail(none, slot); break; + case 1: listAddNodeTail(single, slot); break; + default: listAddNodeTail(multi, slot); break; + } + } + dictReleaseIterator(iter); + + /* Handle case "1": keys in no node. */ + if (listLength(none) > 0) { + printf("The following uncovered slots have no keys " + "across the cluster:\n"); + clusterManagerPrintSlotsList(none); + if (confirmWithYes("Fix these slots by covering with a random node?")){ + srand(time(NULL)); + listIter li; + listNode *ln; + listRewind(none, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + long idx = (long) (rand() % listLength(cluster_manager.nodes)); + listNode *node_n = listIndex(cluster_manager.nodes, idx); + assert(node_n != NULL); + clusterManagerNode *n = node_n->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "2": keys only in one node. */ + if (listLength(single) > 0) { + printf("The following uncovered slots have keys in just one node:\n"); + clusterManagerPrintSlotsList(single); + if (confirmWithYes("Fix these slots by covering with those nodes?")){ + listIter li; + listNode *ln; + listRewind(single, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + listNode *fn = listFirst(nodes); + assert(fn != NULL); + clusterManagerNode *n = fn->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "3": keys in multiple nodes. */ + if (listLength(multi) > 0) { + printf("The folowing uncovered slots have keys in multiple nodes:\n"); + clusterManagerPrintSlotsList(multi); + if (confirmWithYes("Fix these slots by moving keys " + "into a single node?")) { + listIter li; + listNode *ln; + listRewind(multi, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + int s = atoi(slot); + clusterManagerNode *target = + clusterManagerGetNodeWithMostKeysInSlot(nodes, s, NULL); + if (target == NULL) { + fixed = -1; + goto cleanup; + } + clusterManagerLogInfo(">>> Covering slot %s moving keys " + "to %s:%d\n", slot, + target->ip, target->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER SETSLOT %s %s", slot, "STABLE"); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + listIter nli; + listNode *nln; + listRewind(nodes, &nli); + while ((nln = listNext(&nli)) != NULL) { + clusterManagerNode *src = nln->value; + if (src == target) continue; + /* Set the source node in 'importing' state + * (even if we will actually migrate keys away) + * in order to avoid receiving redirections + * for MIGRATE. */ + redisReply *r = CLUSTER_MANAGER_COMMAND(src, + "CLUSTER SETSLOT %s %s %s", slot, + "IMPORTING", target->name); + if (!clusterManagerCheckRedisReply(target, r, NULL)) + fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + int opts = CLUSTER_MANAGER_OPT_VERBOSE | + CLUSTER_MANAGER_OPT_COLD; + if (!clusterManagerMoveSlot(src, target, s, opts, NULL)) { + fixed = -1; + goto cleanup; + } + } + fixed++; + } + } + } +cleanup: + sdsfree(log); + if (none) listRelease(none); + if (single) listRelease(single); + if (multi) listRelease(multi); + return fixed; +} + +/* Slot 'slot' was found to be in importing or migrating state in one or + * more nodes. This function fixes this condition by migrating keys where + * it seems more sensible. */ +static int clusterManagerFixOpenSlot(int slot) { + clusterManagerLogInfo(">>> Fixing open slot %d\n", slot); + /* Try to obtain the current slot owner, according to the current + * nodes configuration. */ + int success = 1; + list *owners = listCreate(); + list *migrating = listCreate(); + list *importing = listCreate(); + sds migrating_str = sdsempty(); + sds importing_str = sdsempty(); + clusterManagerNode *owner = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots[slot]) { + if (owner == NULL) owner = n; + listAddNodeTail(owners, n); + } + } + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->migrating) { + for (int i = 0; i < n->migrating_count; i += 2) { + sds migrating_slot = n->migrating[i]; + if (atoi(migrating_slot) == slot) { + char *sep = (listLength(migrating) == 0 ? "" : ","); + migrating_str = sdscatfmt(migrating_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(migrating, n); + break; + } + } + } + if (n->importing) { + for (int i = 0; i < n->importing_count; i += 2) { + sds importing_slot = n->importing[i]; + if (atoi(importing_slot) == slot) { + char *sep = (listLength(importing) == 0 ? "" : ","); + importing_str = sdscatfmt(importing_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(importing, n); + break; + } + } + } + } + printf("Set as migrating in: %s\n", migrating_str); + printf("Set as importing in: %s\n", importing_str); + /* If there is no slot owner, set as owner the slot with the biggest + * number of keys, among the set of migrating / importing nodes. */ + if (owner == NULL) { + clusterManagerLogInfo(">>> Nobody claims ownership, " + "selecting an owner...\n"); + owner = clusterManagerGetNodeWithMostKeysInSlot(cluster_manager.nodes, + slot, NULL); + // If we still don't have an owner, we can't fix it. + if (owner == NULL) { + clusterManagerLogErr("[ERR] Can't select a slot owner. " + "Impossible to fix.\n"); + success = 0; + goto cleanup; + } + + // Use ADDSLOTS to assign the slot. + printf("*** Configuring %s:%d as the slot owner\n", owner->ip, + owner->port); + redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " + "SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER ADDSLOTS %d", slot); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Make sure this information will propagate. Not strictly needed + * since there is no past owner, so all the other nodes will accept + * whatever epoch this node will claim the slot with. */ + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Remove the owner from the list of migrating/importing + * nodes. */ + clusterManagerRemoveNodeFromList(migrating, owner); + clusterManagerRemoveNodeFromList(importing, owner); + } + /* If there are multiple owners of the slot, we need to fix it + * so that a single node is the owner and all the other nodes + * are in importing state. Later the fix can be handled by one + * of the base cases above. + * + * Note that this case also covers multiple nodes having the slot + * in migrating state, since migrating is a valid state only for + * slot owners. */ + if (listLength(owners) > 1) { + owner = clusterManagerGetNodeWithMostKeysInSlot(owners, slot, NULL); + listRewind(owners, &li); + redisReply *reply = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + reply = CLUSTER_MANAGER_COMMAND(n, "CLUSTER DELSLOT %d", slot); + success = clusterManagerCheckRedisReply(n, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + success = clusterManagerSetSlot(n, owner, slot, "importing", NULL); + if (!success) goto cleanup; + clusterManagerRemoveNodeFromList(importing, n); //Avoid duplicates + listAddNodeTail(importing, n); + } + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + } + int move_opts = CLUSTER_MANAGER_OPT_VERBOSE; + /* Case 1: The slot is in migrating state in one slot, and in + * importing state in 1 slot. That's trivial to address. */ + if (listLength(migrating) == 1 && listLength(importing) == 1) { + clusterManagerNode *src = listFirst(migrating)->value; + clusterManagerNode *dst = listFirst(importing)->value; + success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL); + } + /* Case 2: There are multiple nodes that claim the slot as importing, + * they probably got keys about the slot after a restart so opened + * the slot. In this case we just move all the keys to the owner + * according to the configuration. */ + else if (listLength(migrating) == 0 && listLength(importing) > 0) { + clusterManagerLogInfo(">>> Moving all the %d slot keys to its " + "owner %s:%d\n", slot, owner->ip, owner->port); + move_opts |= CLUSTER_MANAGER_OPT_COLD; + listRewind(importing, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + success = clusterManagerMoveSlot(n, owner, slot, move_opts, NULL); + if (!success) goto cleanup; + clusterManagerLogInfo(">>> Setting %d as STABLE in " + "%s:%d\n", slot, n->ip, n->port); + + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } + } else { + int try_to_close_slot = (listLength(importing) == 0 && + listLength(migrating) == 1); + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", slot, 10); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) { + if (success) try_to_close_slot = (r->elements == 0); + freeReplyObject(r); + } + if (!success) goto cleanup; + } + /* Case 3: There are no slots claiming to be in importing state, but + * there is a migrating node that actually don't have any key. We + * can just close the slot, probably a reshard interrupted in the middle. */ + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } else { + success = 0; + clusterManagerLogErr("[ERR] Sorry, redis-cli can't fix this slot " + "yet (work in progress). Slot is set as " + "migrating in %s, as importing in %s, " + "owner is %s:%d\n", migrating_str, + importing_str, owner->ip, owner->port); + } + } +cleanup: + listRelease(owners); + listRelease(migrating); + listRelease(importing); + sdsfree(migrating_str); + sdsfree(importing_str); + return success; +} + +static int clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); - if (!ln) return; + if (!ln) return 0; + int result = 1; + int do_fix = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX; clusterManagerNode *node = ln->value; clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); @@ -3124,6 +3672,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); clusterManagerOnError(err); + result = 0; } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -3174,6 +3723,7 @@ static void clusterManagerCheckCluster(int quiet) { } } if (open_slots != NULL) { + result = 0; dictIterator *iter = dictGetIterator(open_slots); dictEntry *entry; sds errstr = sdsnew("[WARNING] The following slots are open: "); @@ -3185,6 +3735,17 @@ static void clusterManagerCheckCluster(int quiet) { } clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); + if (do_fix) { + // Fix open slots. + dictReleaseIterator(iter); + iter = dictGetIterator(open_slots); + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + result = clusterManagerFixOpenSlot(atoi(slot)); + if (!result) break; + } + } + dictReleaseIterator(iter); dictRelease(open_slots); } clusterManagerLogInfo(">>> Check slots coverage...\n"); @@ -3200,7 +3761,16 @@ static void clusterManagerCheckCluster(int quiet) { "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); clusterManagerOnError(err); + result = 0; + if (do_fix/* && result*/) { + dictType dtype = clusterManagerDictType; + dtype.valDestructor = dictListDestructor; + clusterManagerUncoveredSlots = dictCreate(&dtype, NULL); + int fixed = clusterManagerFixSlotsCoverage(slots); + if (fixed > 0) result = 1; + } } + return result; } static clusterManagerNode *clusterNodeForResharding(char *id, @@ -3546,12 +4116,7 @@ assign_replicas: } clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); - printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); - fflush(stdout); - char buf[4]; - int nread = read(fileno(stdin),buf,4); - buf[3] = '\0'; - if (nread != 0 && !strcmp("yes", buf)) { + if (confirmWithYes("Can I set the above configuration?")) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3674,13 +4239,17 @@ static int clusterManagerCommandCheck(int argc, char **argv) { clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); - clusterManagerCheckCluster(0); - return 1; + return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } +static int clusterManagerCommandFix(int argc, char **argv) { + config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX; + return clusterManagerCommandCheck(argc, argv); +} + static int clusterManagerCommandReshard(int argc, char **argv) { int port = 0; char *ip = NULL; From d36ff0f73e4d9ee67484961c8b141b6dc11fbb2b Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:25:25 +0200 Subject: [PATCH 22/66] Cluster Manager: import command --- src/Makefile | 2 +- src/redis-cli.c | 216 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 195 insertions(+), 23 deletions(-) diff --git a/src/Makefile b/src/Makefile index a5e0e231a..a64454dad 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.c REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 8af1130c3..96bde3568 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,7 +74,7 @@ #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ - "Invalid arguments: you need to pass either a valid " \ + "[ERR] Invalid arguments: you need to pass either a valid " \ "address (ie. 120.0.0.1:7000) or space separated IP " \ "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) @@ -115,7 +115,9 @@ #define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 #define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 #define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_REPLACE 1 << 6 +#define CLUSTER_MANAGER_CMD_FLAG_COPY 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 8 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -237,6 +239,8 @@ static long getLongInfoField(char *info, char *field); * Utility functions *--------------------------------------------------------------------------- */ +uint16_t crc16(const char *buf, int len); + static long long ustime(void) { struct timeval tv; long long ust; @@ -1325,6 +1329,12 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-simulate")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-replace")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_REPLACE; + } else if (!strcmp(argv[i],"--cluster-copy")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_COPY; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1870,6 +1880,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1892,6 +1903,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -2383,6 +2396,37 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } +/* ----------------------------------------------------------------------------- + * Key space handling + * -------------------------------------------------------------------------- */ + +/* We have 16384 hash slots. The hash slot of a given key is obtained + * as the least significant 14 bits of the crc16 of the key. + * + * However if the key contains the {...} pattern, only the part between + * { and } is hashed. This may be useful in the future to force certain + * keys to be in the same node (assuming no resharding is in progress). */ +static unsigned int keyHashSlot(char *key, int keylen) { + int s, e; /* start-end indexes of { and } */ + + for (s = 0; s < keylen; s++) + if (key[s] == '{') break; + + /* No '{' ? Hash the whole key. This is the base case. */ + if (s == keylen) return crc16(key,keylen) & 0x3FFF; + + /* '{' found? Check if we have the corresponding '}'. */ + for (e = s+1; e < keylen; e++) + if (key[e] == '}') break; + + /* No '}' or nothing between {} ? Hash the whole key. */ + if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF; + + /* If we are here there is both a { and a } on its right. Hash + * what is in the middle between { and }. */ + return crc16(key+s+1,e-s-1) & 0x3FFF; +} + static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -3533,8 +3577,8 @@ static int clusterManagerFixOpenSlot(int slot) { } // Use ADDSLOTS to assign the slot. - printf("*** Configuring %s:%d as the slot owner\n", owner->ip, - owner->port); + clusterManagerLogWarn("*** Configuring %s:%d as the slot owner\n", + owner->ip, owner->port); redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " "SETSLOT %d %s", slot, "STABLE"); @@ -4527,7 +4571,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { if (over_threshold) threshold_reached = 1; } if (!threshold_reached) { - clusterManagerLogErr("*** No rebalancing needed! " + clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); result = 0; @@ -4586,7 +4630,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { listRelease(lsrc); int table_len = (int) listLength(table); if (!table || table_len != numslots) { - clusterManagerLogErr("*** Assertio failed: Reshard table " + clusterManagerLogErr("*** Assertion failed: Reshard table " "!= number of slots"); result = 0; goto end_move; @@ -4629,23 +4673,148 @@ invalid_args: return 0; } -static int clusterManagerCommandCall(int argc, char **argv) { - int port = 0; - char *ip = NULL; - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - int i; - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else { - fprintf(stderr, - "Invalid arguments: first agrumnt must be host:port.\n"); - return 0; +static int clusterManagerCommandImport(int argc, char **argv) { + int success = 1; + int port = 0, src_port = 0; + char *ip = NULL, *src_ip = NULL; + char *invalid_args_msg = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) { + invalid_args_msg = CLUSTER_MANAGER_INVALID_HOST_ARG; + goto invalid_args; } + if (config.cluster_manager_command.from == NULL) { + invalid_args_msg = "[ERR] Option '--cluster-from' is required for " + "subcommand 'import'.\n"; + goto invalid_args; + } + char *src_host[] = {config.cluster_manager_command.from}; + if (!getClusterHostFromCmdArgs(1, src_host, &src_ip, &src_port)) { + invalid_args_msg = "[ERR] Invalid --cluster-from host. You need to " + "pass a valid address (ie. 120.0.0.1:7000).\n"; + goto invalid_args; + } + clusterManagerLogInfo(">>> Importing data from %s:%d to cluster %s:%d\n", + src_ip, src_port, ip, port); + + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + char *reply_err = NULL; + redisReply *src_reply = NULL; + // Connect to the source node. + redisContext *src_ctx = redisConnect(src_ip, src_port); + if (src_ctx->err) { + success = 0; + fprintf(stderr,"Could not connect to Redis at %s:%d: %s.\n", src_ip, + src_port, src_ctx->errstr); + goto cleanup; + } + src_reply = reconnectingRedisCommand(src_ctx, "INFO"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + if (getLongInfoField(src_reply->str, "cluster_enabled")) { + clusterManagerLogErr("[ERR] The source node should not be a " + "cluster node.\n"); + success = 0; + goto cleanup; + } + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "DBSIZE"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + int size = src_reply->integer, i; + clusterManagerLogWarn("*** Importing %d keys from DB 0\n", size); + + // Build a slot -> node map + clusterManagerNode *slots_map[CLUSTER_MANAGER_SLOTS]; + memset(slots_map, 0, sizeof(slots_map) / sizeof(clusterManagerNode *)); + listIter li; + listNode *ln; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots_count == 0) continue; + if (n->slots[i]) { + slots_map[i] = n; + break; + } + } + } + + char cmdfmt[50] = "MIGRATE %s %d %s %d %d"; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COPY) + strcat(cmdfmt, " %s"); + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLACE) + strcat(cmdfmt, " %s"); + + /* Use SCAN to iterate over the keys, migrating to the + * right node as needed. */ + int cursor = -999, timeout = config.cluster_manager_command.timeout; + while (cursor != 0) { + if (cursor < 0) cursor = 0; + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "SCAN %d COUNT %d", + cursor, 1000); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + assert(src_reply->type == REDIS_REPLY_ARRAY); + assert(src_reply->elements >= 2); + assert(src_reply->element[1]->type == REDIS_REPLY_ARRAY); + if (src_reply->element[0]->type == REDIS_REPLY_STRING) + cursor = atoi(src_reply->element[0]->str); + else if (src_reply->element[0]->type == REDIS_REPLY_INTEGER) + cursor = src_reply->element[0]->integer; + int keycount = src_reply->element[1]->elements; + for (i = 0; i < keycount; i++) { + redisReply *kr = src_reply->element[1]->element[i]; + assert(kr->type == REDIS_REPLY_STRING); + char *key = kr->str; + uint16_t slot = keyHashSlot(key, kr->len); + clusterManagerNode *target = slots_map[slot]; + printf("Migrating %s to %s:%d: ", key, target->ip, target->port); + redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, + target->ip, target->port, + key, 0, timeout, + "COPY", "REPLACE"); + if (!r || r->type == REDIS_REPLY_ERROR) { + if (r && r->str) { + clusterManagerLogErr("Source %s:%d replied with " + "error:\n%s\n", src_ip, src_port, + r->str); + } + success = 0; + } + freeReplyObject(r); + if (!success) goto cleanup; + clusterManagerLogOk("OK\n"); + } + } +cleanup: + if (reply_err) + clusterManagerLogErr("Source %s:%d replied with error:\n%s\n", + src_ip, src_port, reply_err); + if (src_ctx) redisFree(src_ctx); + if (src_reply) freeReplyObject(src_reply); + return success; +invalid_args: + fprintf(stderr, "%s", invalid_args_msg); + return 0; +} + +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0, i; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; @@ -4677,6 +4846,9 @@ static int clusterManagerCommandCall(int argc, char **argv) { } zfree(argvlen); return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; } static int clusterManagerCommandHelp(int argc, char **argv) { From e68fb5bc099d7efa3397e91a4be42c09824c8c74 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:53:24 +0200 Subject: [PATCH 23/66] Cluster Manager: added clusterManagerCheckCluster to import command --- src/redis-cli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 96bde3568..34072b74d 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4698,6 +4698,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; char *reply_err = NULL; redisReply *src_reply = NULL; // Connect to the source node. From f2594671aa46f6cbbd577b15f1d1426a2a962fcf Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 17:08:53 +0200 Subject: [PATCH 24/66] Cluster Manager: add-node command. --- src/redis-cli.c | 168 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 154 insertions(+), 14 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 34072b74d..c0d80801d 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -165,6 +165,7 @@ typedef struct clusterManagerCommand { char *from; char *to; char **weight; + char *master_id; int weight_argc; int slots; int timeout; @@ -1299,6 +1300,8 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-master-id") && !lastarg) { + config.cluster_manager_command.master_id = argv[++i]; } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { @@ -1335,6 +1338,9 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-copy")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_COPY; + } else if (!strcmp(argv[i],"--cluster-slave")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SLAVE; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1847,6 +1853,8 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name); static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); @@ -1875,6 +1883,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, /* Cluster Manager commands. */ static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandAddNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1895,6 +1904,8 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3030,8 +3041,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { - char *msg = (e ? e : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); + clusterManagerPrintNotClusterNodeError(node, e); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -3313,6 +3323,27 @@ static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, return node; } +/* This function returns the master that has the least number of replicas + * in the cluster. If there are multiple masters with the same smaller + * number of replicas, one at random is returned. */ + +static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { + clusterManagerNode *node = NULL; + int lowest_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (node == NULL || n->replicas_count < lowest_count) { + node = n; + lowest_count = n->replicas_count; + } + } + return node; +} + static int clusterManagerFixSlotsCoverage(char *all_slots) { int i, fixed = 0; list *none = NULL, *single = NULL, *multi = NULL; @@ -3966,6 +3997,26 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, array->nodes[array->count++] = node; } +static void clusterManagerPrintNotEmptyNodeError(clusterManagerNode *node, + char *err) +{ + char *msg; + if (err) msg = err; + else { + msg = "is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err) +{ + char *msg = (err ? err : "is not configured as a cluster node."); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + /* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; @@ -4008,8 +4059,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { - char *msg = (err ? err : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotClusterNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4025,14 +4075,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } err = NULL; if (!clusterManagerNodeIsEmpty(node, &err)) { - char *msg; - if (err) msg = err; - else { - msg = "is not empty. Either the node already knows other " - "nodes (check with CLUSTER NODES) or contains some " - "key in database 0."; - } - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotEmptyNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4263,6 +4306,104 @@ cleanup: return success; } +static int clusterManagerCommandAddNode(int argc, char **argv) { + int success = 1; + redisReply *reply = NULL; + char *ref_ip = NULL, *ip = NULL; + int ref_port = 0, port = 0; + if (!getClusterHostFromCmdArgs(argc - 1, argv + 1, &ref_ip, &ref_port)) + goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) + goto invalid_args; + clusterManagerLogInfo(">>> Adding node %s:%d to cluster %s:%d\n", ip, port, + ref_ip, ref_port); + // Check the existing cluster + clusterManagerNode *refnode = clusterManagerNewNode(ref_ip, ref_port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; + + /* If --cluster-master-id was specified, try to resolve it now so that we + * abort before starting with the node configuration. */ + clusterManagerNode *master_node = NULL; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVE) { + char *master_id = config.cluster_manager_command.master_id; + if (master_id != NULL) { + master_node = clusterManagerNodeByName(master_id); + if (master_node == NULL) { + clusterManagerLogErr("[ERR] No such master ID %s\n", master_id); + return 0; + } + } else { + master_node = clusterManagerNodeWithLeastReplicas(); + assert(master_node != NULL); + printf("Automatically selected master %s:%d\n", master_node->ip, + master_node->port); + } + } + + // Add the new node + clusterManagerNode *new_node = clusterManagerNewNode(ip, port); + int added = 0; + CLUSTER_MANAGER_NODE_CONNECT(new_node); + if (new_node->context->err) { + clusterManagerLogErr("[ERR] Sorry, can't connect to node %s:%d\n", + ip, port); + success = 0; + goto cleanup; + } + char *err = NULL; + if (!(success = clusterManagerNodeIsCluster(new_node, &err))) { + clusterManagerPrintNotClusterNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + if (!clusterManagerNodeLoadInfo(new_node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(new_node, err); + zfree(err); + } + success = 0; + goto cleanup; + } + if (!(success = clusterManagerNodeIsEmpty(new_node, &err))) { + clusterManagerPrintNotEmptyNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + clusterManagerNode *first = listFirst(cluster_manager.nodes)->value; + listAddNodeTail(cluster_manager.nodes, new_node); + added = 1; + + // Send CLUSTER MEET command to the new node + clusterManagerLogInfo(">>> Send CLUSTER MEET to node %s:%d to make it " + "join the cluster.\n", ip, port); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER MEET %s %d", + first->ip, first->port); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + + /* Additional configuration is needed if the node is added as a slave. */ + if (master_node) { + sleep(1); + clusterManagerWaitForClusterJoin(); + clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n", + master_node->ip, master_node->port); + freeReplyObject(reply); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s", + master_node->name); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + } + clusterManagerLogOk("[OK] New node added correctly.\n"); +cleanup: + if (!added && new_node) freeClusterManagerNode(new_node); + if (reply) freeReplyObject(reply); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -4531,8 +4672,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { nodes_involved++; listAddNodeTail(involved, n); } - weightedNodes = zmalloc(nodes_involved * - sizeof(clusterManagerNode *)); + weightedNodes = zmalloc(nodes_involved * sizeof(clusterManagerNode *)); if (weightedNodes == NULL) goto cleanup; /* Check cluster, only proceed if it looks sane. */ clusterManagerCheckCluster(1); From 436164436d034b78e285dff84b8486ef4c9cacb4 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 18:22:44 +0200 Subject: [PATCH 25/66] - Cluster Manager: del-node command. - Cluster Manager: fixed bug in clusterManagerNodeWithLeastReplicas --- src/redis-cli.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c0d80801d..daad385dd 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1884,6 +1884,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandAddNode(int argc, char **argv); +static int clusterManagerCommandDeleteNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1906,6 +1907,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { "replicas "}, {"add-node", clusterManagerCommandAddNode, 2, "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3335,7 +3337,7 @@ static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; - if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; if (node == NULL || n->replicas_count < lowest_count) { node = n; lowest_count = n->replicas_count; @@ -4404,6 +4406,73 @@ invalid_args: return 0; } +static int clusterManagerCommandDeleteNode(int argc, char **argv) { + UNUSED(argc); + int success = 1; + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + char *node_id = argv[1]; + clusterManagerLogInfo(">>> Removing node %s from cluster %s:%d\n", + node_id, ip, port); + clusterManagerNode *ref_node = clusterManagerNewNode(ip, port); + clusterManagerNode *node = NULL; + + // Load cluster information + if (!clusterManagerLoadInfoFromNode(ref_node, 0)) return 0; + + // Check if the node exists and is not empty + node = clusterManagerNodeByName(node_id); + if (node == NULL) { + clusterManagerLogErr("[ERR] No such node ID %s\n", node_id); + return 0; + } + if (node->slots_count != 0) { + clusterManagerLogErr("[ERR] Node %s:%d is not empty! Reshard data " + "away and try again.\n", node->ip, node->port); + return 0; + } + + // Send CLUSTER FORGET to all the nodes but the node to remove + clusterManagerLogInfo(">>> Sending CLUSTER FORGET messages to the " + "cluster...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == node) continue; + if (n->replicate && !strcasecmp(n->replicate, node_id)) { + // Reconfigure the slave to replicate with some other node + clusterManagerNode *master = clusterManagerNodeWithLeastReplicas(); + //TODO: check whether master could be the same as node + assert(master != NULL); + clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", + n->ip, n->port, master->ip, master->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s", + master->name); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER FORGET %s", + node_id); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + + // Finally shutdown the node + clusterManagerLogInfo(">>> SHUTDOWN the node.\n"); + redisReply *r = redisCommand(node->context, "SHUTDOWN"); + success = clusterManagerCheckRedisReply(node, r, NULL); + if (r) freeReplyObject(r); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -5026,6 +5095,9 @@ static int clusterManagerCommandHelp(int argc, char **argv) { } } } + fprintf(stderr, "\nFor check, fix, reshard, del-node, set-timeout you " + "can specify the host and port of any working node in " + "the cluster.\n\n"); return 0; } From 1113d2d6d4414481e0169eca02aa961be822009c Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 13 Apr 2018 16:09:22 +0200 Subject: [PATCH 26/66] Cluster Manager: set-timeout command --- src/redis-cli.c | 70 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index daad385dd..e7600b91c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1890,6 +1890,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandSetTimeout(int argc, char **argv); static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1905,21 +1906,23 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, - {"add-node", clusterManagerCommandAddNode, 2, - "new_host:new_port existing_host:existing_port", "slave,master-id "}, - {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, - {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, - {"import", clusterManagerCommandImport, 1, "host:port", - "from ,copy,replace"}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, + {"set-timeout", clusterManagerCommandSetTimeout, 2, + "host:port milliseconds", NULL}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -4882,6 +4885,61 @@ invalid_args: return 0; } +static int clusterManagerCommandSetTimeout(int argc, char **argv) { + UNUSED(argc); + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + int timeout = atoi(argv[1]); + if (timeout < 100) { + fprintf(stderr, "Setting a node timeout of less than 100 " + "milliseconds is a bad idea.\n"); + return 0; + } + // Load cluster information + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int ok_count = 0, err_count = 0; + + clusterManagerLogInfo(">>> Reconfiguring node timeout in every " + "cluster node...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + char *err = NULL; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s %s %d", + "SET", + "cluster-node-timeout", + timeout); + if (reply == NULL) goto reply_err; + int ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s", "REWRITE"); + if (reply == NULL) goto reply_err; + ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + clusterManagerLogWarn("*** New timeout set for %s:%d\n", n->ip, + n->port); + ok_count++; + continue; +reply_err: + if (err == NULL) err = ""; + clusterManagerLogErr("ERR setting node-timeot for %s:%d: %s\n", n->ip, + n->port, err); + err_count++; + } + clusterManagerLogInfo(">>> New node timeout set. %d OK, %d ERR.\n", + ok_count, err_count); + return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandImport(int argc, char **argv) { int success = 1; int port = 0, src_port = 0; From 65735d60abc23046ced77f5f095a713320a64c04 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 19 Apr 2018 18:52:01 +0200 Subject: [PATCH 27/66] Cluster Manager: code improvements and more comments added. --- src/redis-cli.c | 66 +++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index e7600b91c..c0283b28c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -68,7 +68,7 @@ #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" -#define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 @@ -172,6 +172,7 @@ typedef struct clusterManagerCommand { int pipeline; float threshold; } clusterManagerCommand; + static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1788,7 +1789,7 @@ static int evalMode(int argc, char **argv) { /* The Cluster Manager global structure */ static struct clusterManager { - list *nodes; /* List of nodes int he configuration. */ + list *nodes; /* List of nodes in the configuration. */ list *errors; } cluster_manager; @@ -1821,7 +1822,7 @@ typedef struct clusterManagerNode { int balance; /* Used by rebalance */ } clusterManagerNode; -/* Data structure used to represent a sequence of nodes. */ +/* Data structure used to represent a sequence of cluster nodes. */ typedef struct clusterManagerNodeArray { clusterManagerNode **nodes; /* Actual nodes array */ clusterManagerNode **alloc; /* Pointer to the allocated memory */ @@ -1829,7 +1830,7 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; -/* Used for reshard table. */ +/* Used for the reshard table. */ typedef struct clusterManagerReshardTableItem { clusterManagerNode *source; int slot; @@ -1865,7 +1866,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); +static void clusterManagerShowClusterInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static int clusterManagerCheckCluster(int quiet); @@ -2067,8 +2068,9 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } + /* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the - * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * latest case, if the 'err' arg is not NULL, it gets allocated with a copy * of reply error (it's up to the caller function to free it), elsewhere * the error is directly printed. */ static int clusterManagerCheckRedisReply(clusterManagerNode *n, @@ -2100,7 +2102,7 @@ static void clusterManagerRemoveNodeFromList(list *nodelist, } } -/* Return the node with the specified ID or NULL. */ +/* Return the node with the specified name (ID) or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -2121,7 +2123,7 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -/* Like get_node_by_name but the specified name can be just the first +/* Like clusterManagerNodeByName but the specified name can be just the first * part of the node ID as long as the prefix in unique across the * cluster. */ @@ -2152,6 +2154,7 @@ static void clusterManagerNodeResetSlots(clusterManagerNode *node) { node->slots_count = 0; } +/* Call "INFO" redis command on the specified node and return the reply. */ static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, char **err) { @@ -2181,7 +2184,7 @@ static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { redisReply *info = clusterManagerGetNodeRedisInfo(node, err); - int is_err = 0, is_empty = 1; + int is_empty = 1; if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; @@ -2190,11 +2193,7 @@ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); if (err != NULL) *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } + if (!clusterManagerCheckRedisReply(node, info, err)) { is_empty = 0; goto result; } @@ -2422,7 +2421,7 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { * However if the key contains the {...} pattern, only the part between * { and } is hashed. This may be useful in the future to force certain * keys to be in the same node (assuming no resharding is in progress). */ -static unsigned int keyHashSlot(char *key, int keylen) { +static unsigned int clusterManagerKeyHashSlot(char *key, int keylen) { int s, e; /* start-end indexes of { and } */ for (s = 0; s < keylen; s++) @@ -2443,6 +2442,7 @@ static unsigned int keyHashSlot(char *key, int keylen) { return crc16(key+s+1,e-s-1) & 0x3FFF; } +/* Return a string representation of the cluster node. */ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -2484,7 +2484,7 @@ static void clusterManagerShowNodes(void) { } } -static void clusterManagerShowInfo(void) { +static void clusterManagerShowClusterInfo(void) { int masters = 0; int keys = 0; listIter li; @@ -2533,11 +2533,12 @@ static void clusterManagerShowInfo(void) { printf("%.2f keys per slot on average.\n", keys_per_slot); } +/* Flush dirty slots configuration of the node by calling CLUSTER ADDSLOTS */ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0, success = 1; + int success = 1; /* First two args are used for the command itself. */ int argc = node->slots_count + 2; sds *argv = zmalloc(argc * sizeof(*argv)); @@ -2566,14 +2567,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } reply = (redisReply*) _reply; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } - success = 0; - goto cleanup; - } + success = clusterManagerCheckRedisReply(node, reply, err); cleanup: zfree(argvlen); if (argv != NULL) { @@ -2821,7 +2815,7 @@ static int clusterManagerMoveSlot(clusterManagerNode *source, } /* Flush the dirty node configuration by calling replicate for slaves or - * adding the slots for masters. */ + * adding the slots defined in the masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; @@ -2852,6 +2846,7 @@ cleanup: return success; } +/* Wait until the cluster configuration is consistent. */ static void clusterManagerWaitForClusterJoin(void) { printf("Waiting for the cluster to join\n"); while(!clusterManagerIsConfigConsistent()) { @@ -2871,13 +2866,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0, success = 1; + int success = 1; *err = NULL; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } + if (!clusterManagerCheckRedisReply(node, reply, err)) { success = 0; goto cleanup; } @@ -3114,6 +3105,7 @@ invalid_friend: return 1; } +/* Compare functions used by various sorting operations. */ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { const char **i1 = (const char **)slot1; const char **i2 = (const char **)slot2; @@ -3252,6 +3244,7 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +/* Add the error string to cluster_manager.errors and print it. */ static void clusterManagerOnError(sds err) { if (cluster_manager.errors == NULL) cluster_manager.errors = listCreate(); @@ -3259,6 +3252,9 @@ static void clusterManagerOnError(sds err) { clusterManagerLogErr("%s\n", (char *) err); } +/* Check the slots coverage of the cluster. The 'all_slots' argument must be + * and array of 16384 bytes. Every covered slot will be set to 1 in the + * 'all_slots' array. The function returns the total number if covered slots.*/ static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -4482,7 +4478,7 @@ static int clusterManagerCommandInfo(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return 1; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -4495,7 +4491,7 @@ static int clusterManagerCommandCheck(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5047,7 +5043,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { redisReply *kr = src_reply->element[1]->element[i]; assert(kr->type == REDIS_REPLY_STRING); char *key = kr->str; - uint16_t slot = keyHashSlot(key, kr->len); + uint16_t slot = clusterManagerKeyHashSlot(key, kr->len); clusterManagerNode *target = slots_map[slot]; printf("Migrating %s to %s:%d: ", key, target->ip, target->port); redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, From 67c1df9d85224e8c673710e2fb2580d36c43f90e Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 18:08:30 +0200 Subject: [PATCH 28/66] Cluster Manager: fixed bug when parsing CLUSTER NODES reply (clusterManagerNodeLoadInfo) --- src/redis-cli.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c0283b28c..b55cf93e8 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2922,6 +2922,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, line = p + 1; remaining--; } else line = p; + char *dash = NULL; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating @@ -2953,7 +2954,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, node->importing[node->importing_count - 1] = src; } - } else if ((p = strchr(slotsdef, '-')) != NULL) { + } else if ((dash = strchr(slotsdef, '-')) != NULL) { + p = dash; int start, stop; *p = '\0'; start = atoi(slotsdef); @@ -5078,7 +5080,7 @@ invalid_args: static int clusterManagerCommandCall(int argc, char **argv) { int port = 0, i; char *ip = NULL; - if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; From 55c0b5f7adea5f75e2366bd54d85ae25e9c2e3e9 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:25:08 +0200 Subject: [PATCH 29/66] Cluster Manager: fixed expected slots calculation (rebalance) Cluster Manager: fixed argument parsing after --cluster-weight --- src/redis-cli.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b55cf93e8..36531f884 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1318,6 +1318,7 @@ static int parseOptions(int argc, char **argv) { if (wargc > 0) { config.cluster_manager_command.weight = weight; config.cluster_manager_command.weight_argc = wargc; + i += wargc; } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); @@ -4724,7 +4725,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { int nodes_involved = 0; int use_empty = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; - involved = listCreate(); listIter li; listNode *ln; @@ -4762,15 +4762,15 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; weightedNodes[i++] = n; - int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * - (int) n->weight); + int expected = (int) (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + n->weight); n->balance = n->slots_count - expected; total_balance += n->balance; /* Compute the percentage of difference between the * expected number of slots and the real one, to see * if it's over the threshold specified by the user. */ int over_threshold = 0; - if (config.cluster_manager_command.threshold > 0) { + if (threshold > 0) { if (n->slots_count > 0) { float err_perc = fabs((100-(100.0*expected/n->slots_count))); if (err_perc > threshold) over_threshold = 1; @@ -4784,7 +4784,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); - result = 0; goto cleanup; } /* Because of rounding, it is possible that the balance of all nodes From 9398cfc01efb7106f1bf8e33b7ae30643e1cd4bd Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:29:42 +0200 Subject: [PATCH 30/66] Cluster tests now using redis-cli instead of redis-trib --- tests/cluster/tests/04-resharding.tcl | 10 +++++----- tests/cluster/tests/12-replica-migration-2.tcl | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl index 0ccbf717d..68fba135e 100644 --- a/tests/cluster/tests/04-resharding.tcl +++ b/tests/cluster/tests/04-resharding.tcl @@ -73,12 +73,12 @@ test "Cluster consistency during live resharding" { flush stdout set target [dict get [get_myself [randomInt 5]] id] set tribpid [lindex [exec \ - ../../../src/redis-trib.rb reshard \ - --from all \ - --to $target \ - --slots 100 \ - --yes \ + ../../../src/redis-cli --cluster reshard \ 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-from all \ + --cluster-to $target \ + --cluster-slots 100 \ + --cluster-yes \ | [info nameofexecutable] \ ../tests/helpers/onlydots.tcl \ &] 0] diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl index 48ecd1d50..3d8b7b04b 100644 --- a/tests/cluster/tests/12-replica-migration-2.tcl +++ b/tests/cluster/tests/12-replica-migration-2.tcl @@ -31,9 +31,9 @@ test "Each master should have at least two replicas attached" { set master0_id [dict get [get_myself 0] id] test "Resharding all the master #0 slots away from it" { set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=0 \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=0 >@ stdout ] } test "Master #0 should lose its replicas" { @@ -49,10 +49,10 @@ test "Resharding back some slot to master #0" { # new resharding. after 10000 set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=.01 \ - --use-empty-masters \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=.01 \ + --cluster-use-empty-masters >@ stdout] } test "Master #0 should re-acquire one or more replicas" { From a617374a6550b39f82037d8a16b3770473bfe4ed Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 12 Jan 2018 11:06:24 +0100 Subject: [PATCH 31/66] Cluster Manager mode --- src/redis-cli.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index d80973e75..92467a6bf 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -65,6 +65,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -77,6 +78,16 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; +/* Cluster Manager command info */ +struct clusterManagerCommand { + char *name; + int argc; + char **argv; + int flags; + int replicas; +}; + + static redisContext *context; static struct config { char *hostip; @@ -119,8 +130,29 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + struct clusterManagerCommand cluster_manager_command; } config; +/* Cluster Manager commands. */ +typedef int clusterManagerCommandProc(int argc, char **argv); +static struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; +}; + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("CLUSTER: create\n"); + printf("Arguments: %d\n", argc); + printf("Replicas: %d\n", config.cluster_manager_command.replicas); + fprintf(stderr, "Not implemented yet!\n"); + return 0; +} + +struct clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2} +}; + /* User preferences. */ static struct pref { int hints; @@ -1061,6 +1093,13 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + struct clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; +} + static int parseOptions(int argc, char **argv) { int i; @@ -1146,6 +1185,18 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--cluster") && !lastarg) { + if (CLUSTER_MANAGER_MODE()) usage(); + char *cmd = argv[++i]; + int j = i; + for (; j < argc; j++) if (argv[j][0] == '-') break; + j--; + createClusterManagerCommand(cmd, j - i, argv + i); + i = j; + } else if (!strcmp(argv[i],"--cluster") && lastarg) { + usage(); + } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { + config.cluster_manager_command.replicas = atoi(argv[++i]); } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1243,9 +1294,13 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" +" --cluster [args...]\n" +" Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" +"Cluster Manager Commands:\n" +"\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" " redis-cli get mypasswd\n" @@ -1569,6 +1624,43 @@ static int evalMode(int argc, char **argv) { return retval; } +/*------------------------------------------------------------------------------ + * Cluster Manager mode + *--------------------------------------------------------------------------- */ + +static clusterManagerCommandProc *validateClusterManagerCommand(void) { + int i, commands_count = sizeof(clusterManagerCommands) / + sizeof(struct clusterManagerCommandDef); + clusterManagerCommandProc *proc = NULL; + char *cmdname = config.cluster_manager_command.name; + int argc = config.cluster_manager_command.argc; + for (i = 0; i < commands_count; i++) { + struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + if (!strcmp(cmddef.name, cmdname)) { + if ((cmddef.arity > 0 && argc != cmddef.arity) || + (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { + fprintf(stderr, "[ERR] Wrong number of arguments for " + "specified --cluster sub command\n"); + return NULL; + } + proc = cmddef.proc; + } + } + if (!proc) fprintf(stderr, "Unknown --cluster subcommand\n"); + return proc; +} + +static void clusterManagerMode(clusterManagerCommandProc *proc) { + int argc = config.cluster_manager_command.argc; + char **argv = config.cluster_manager_command.argv; + if (!proc(argc, argv)) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + exit(0); +} + /*------------------------------------------------------------------------------ * Latency and latency history modes *--------------------------------------------------------------------------- */ @@ -2862,7 +2954,11 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; - + config.cluster_manager_command.name = NULL; + config.cluster_manager_command.argc = 0; + config.cluster_manager_command.argv = NULL; + config.cluster_manager_command.flags = 0; + config.cluster_manager_command.replicas = 0; pref.hints = 1; spectrum_palette = spectrum_palette_color; @@ -2878,6 +2974,17 @@ int main(int argc, char **argv) { argc -= firstarg; argv += firstarg; + /* Cluster Manager mode */ + if (CLUSTER_MANAGER_MODE()) { + clusterManagerCommandProc *proc = validateClusterManagerCommand(); + if (!proc) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + clusterManagerMode(proc); + } + /* Latency mode */ if (config.latency_mode) { if (cliConnect(0) == REDIS_ERR) exit(1); From 7e715e35dc6f95315785416dec58053c473be221 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 16:26:21 +0100 Subject: [PATCH 32/66] Cluster Manager: 'create', 'info' and 'check' commands --- src/Makefile | 2 +- src/redis-cli.c | 1297 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 1272 insertions(+), 27 deletions(-) diff --git a/src/Makefile b/src/Makefile index 3f6ac4541..14112aa1f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 92467a6bf..9943d5753 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -41,13 +41,15 @@ #include #include #include -#include +#include #include #include #include #include #include /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */ +#include "dict.h" +#include "adlist.h" #include "zmalloc.h" #include "linenoise.h" #include "help.h" @@ -65,7 +67,64 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) +#define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_COMMAND(n,...) \ + (reconnectingRedisCommand(n->context, __VA_ARGS__)) +#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) + +#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ + memset(n->slots, 0, sizeof(n->slots)); \ + n->slots_count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ + array->alloc = array->nodes; \ + array->len = alloc_len; \ + array->count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ + if (array->nodes > array->alloc) { \ + array->len = array->nodes - array->alloc; \ + array->nodes = array->alloc; \ + array->count = 0; \ + int i = 0; \ + for(; i < array->len; i++) { \ + if (array->nodes[i] != NULL) array->count++;\ + } \ + } \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) + +#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ + assert(array->nodes < (array->nodes + array->len)); \ + if (*array->nodes != NULL) array->count--; \ + nodeptr = *array->nodes; \ + array->nodes++; \ + array->len--; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ + assert(array->nodes < (array->nodes + array->len)); \ + assert(nodeptr != NULL); \ + array->nodes[array->count++] = nodeptr; \ +} while(0) + +#define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ + fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + +#define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 +#define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_FLAG_FRIEND 1 << 2 +#define CLUSTER_MANAGER_FLAG_NOADDR 1 << 3 +#define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 +#define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -79,13 +138,13 @@ int *spectrum_palette; int spectrum_palette_size; /* Cluster Manager command info */ -struct clusterManagerCommand { +typedef struct clusterManagerCommand { char *name; int argc; char **argv; int flags; int replicas; -}; +} clusterManagerCommand; static redisContext *context; @@ -130,28 +189,70 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; - struct clusterManagerCommand cluster_manager_command; + clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager commands. */ +/* Cluster Manager */ + +static struct clusterManager { + list *nodes; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; + int dirty; + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + list *friends; +} clusterManagerNode; + +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; + clusterManagerNode **alloc; + int len; + int count; +} clusterManagerNodeArray; + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); typedef int clusterManagerCommandProc(int argc, char **argv); -static struct clusterManagerCommandDef { +typedef struct clusterManagerCommandDef { char *name; clusterManagerCommandProc *proc; int arity; -}; + char *args; + char *options; +} clusterManagerCommandDef; +static int clusterManagerIsConfigConsistent(void); -static int clusterManagerCommandCreate(int argc, char **argv) { - printf("CLUSTER: create\n"); - printf("Arguments: %d\n", argc); - printf("Replicas: %d\n", config.cluster_manager_command.replicas); - fprintf(stderr, "Not implemented yet!\n"); - return 0; -} +/* Cluster Manager commands. */ -struct clusterManagerCommandDef clusterManagerCommands[] = { - {"create", clusterManagerCommandCreate, -2} -}; +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ static struct pref { @@ -165,6 +266,9 @@ char *redisGitSHA1(void); char *redisGitDirty(void); static int cliConnect(int force); +static char *getInfoField(char *info, char *field); +static long getLongInfoField(char *info, char *field); + /*------------------------------------------------------------------------------ * Utility functions *--------------------------------------------------------------------------- */ @@ -317,6 +421,36 @@ static void parseRedisUri(const char *uri) { config.dbnum = atoi(curr); } +static uint64_t dictSdsHash(const void *key) { + return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + int l1,l2; + DICT_NOTUSED(privdata); + + l1 = sdslen((sds)key1); + l2 = sdslen((sds)key2); + if (l1 != l2) return 0; + return memcmp(key1, key2, l1) == 0; +} + +static void dictSdsDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + sdsfree(val); +} + +/* _serverAssert is needed by dict */ +void _serverAssert(const char *estr, const char *file, int line) { + fprintf(stderr, "=== ASSERTION FAILED ==="); + fprintf(stderr, "==> %s:%d '%s' is not true",file,line,estr); + *((char*)-1) = 'x'; +} + /*------------------------------------------------------------------------------ * Help functions *--------------------------------------------------------------------------- */ @@ -1094,7 +1228,7 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. *--------------------------------------------------------------------------- */ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - struct clusterManagerCommand *cmd = &config.cluster_manager_command; + clusterManagerCommand *cmd = &config.cluster_manager_command; cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; @@ -1191,7 +1325,7 @@ static int parseOptions(int argc, char **argv) { int j = i; for (; j < argc; j++) if (argv[j][0] == '-') break; j--; - createClusterManagerCommand(cmd, j - i, argv + i); + createClusterManagerCommand(cmd, j - i, argv + i + 1); i = j; } else if (!strcmp(argv[i],"--cluster") && lastarg) { usage(); @@ -1300,6 +1434,7 @@ static void usage(void) { " --version Output version and exit.\n" "\n" "Cluster Manager Commands:\n" +" Use --cluster help to list all available cluster manager commands.\n" "\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" @@ -1628,14 +1763,22 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", + "cluster-replicas"}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"help", clusterManagerCommandHelp, 0, NULL, NULL} +}; + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / - sizeof(struct clusterManagerCommandDef); + sizeof(clusterManagerCommandDef); clusterManagerCommandProc *proc = NULL; char *cmdname = config.cluster_manager_command.name; int argc = config.cluster_manager_command.argc; for (i = 0; i < commands_count; i++) { - struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + clusterManagerCommandDef cmddef = clusterManagerCommands[i]; if (!strcmp(cmddef.name, cmdname)) { if ((cmddef.arity > 0 && argc != cmddef.arity) || (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { @@ -1650,15 +1793,1117 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +static void freeClusterManagerNode(clusterManagerNode *node) { + if (node->context != NULL) redisFree(node->context); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *fn = ln->value; + freeClusterManagerNode(fn); + } + listRelease(node->friends); + node->friends = NULL; + } + if (node->name != NULL) sdsfree(node->name); + if (node->replicate != NULL) sdsfree(node->replicate); + if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) + sdsfree(node->ip); + zfree(node); +} + +static void freeClusterManager(void) { + if (cluster_manager.nodes != NULL) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + freeClusterManagerNode(n); + } + listRelease(cluster_manager.nodes); + cluster_manager.nodes = NULL; + } +} + +static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { + clusterManagerNode *node = zmalloc(sizeof(*node)); + node->context = NULL; + node->name = NULL; + node->ip = ip; + node->port = port; + node->current_epoch = 0; + node->ping_sent = 0; + node->ping_recv = 0; + node->flags = 0; + node->replicate = NULL; + node->dirty = 0; + node->friends = NULL; + CLUSTER_MANAGER_RESET_SLOTS(node); + return node; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + freeReplyObject(info); + return 0; + } + int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); + freeReplyObject(info); + return is_cluster; +} + +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0, is_empty = 1; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + if (strstr(info->str, "db0:") != NULL) { + is_empty = 0; + goto result; + } + freeReplyObject(info); + info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + long known_nodes = getLongInfoField(info->str, "cluster_known_nodes"); + is_empty = (known_nodes == 1); +result: + freeReplyObject(info); + return is_empty; +} + +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len) +{ + assert(offending != NULL); + int score = 0, i, j; + int node_len = cluster_manager.nodes->len; + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + clusterManagerNode **offending_p = *offending; + dictType dtype = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ + }; + for (i = 0; i < ip_len; i++) { + clusterManagerNodeArray *node_array = &(ipnodes[i]); + dict *related = dictCreate(&dtype, NULL); + char *ip = NULL; + for (j = 0; j < node_array->len; j++) { + clusterManagerNode *node = node_array->nodes[j]; + if (node == NULL) continue; + if (!ip) ip = node->ip; + sds types; + if (!node->replicate) { + assert(node->name != NULL); + dictEntry *entry = dictFind(related, node->name); + if (entry) types = (sds) dictGetVal(entry); + else types = sdsempty(); + types = sdscatprintf(types, "m%s", types); + dictReplace(related, node->name, types); + } else { + dictEntry *entry = dictFind(related, node->replicate); + if (entry) types = (sds) dictGetVal(entry); + else { + types = sdsempty(); + dictAdd(related, node->replicate, types); + } + sdscat(types, "s"); + } + } + dictIterator *iter = dictGetIterator(related); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds types = (sds) dictGetVal(entry); + sds name = (sds) dictGetKey(entry); + int typeslen = sdslen(types); + if (typeslen < 2) continue; + if (types[0] == 'm') score += (10000 * (typeslen - 1)); + else score += (1 * typeslen); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate == NULL) continue; + if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { + *(offending_p++) = n; + break; + } + } + } + if (offending_len != NULL) *offending_len = offending_p - *offending; + dictReleaseIterator(iter); + dictRelease(related); + } + return score; +} + +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len) +{ + clusterManagerNode **offenders = NULL, **aux; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + if (score == 0) goto cleanup; + printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + int node_len = cluster_manager.nodes->len; + int maxiter = 500 * node_len; + srand(time(NULL)); + while (maxiter > 0) { + int offending_len = 0; + if (offenders != NULL) { + zfree(offenders); + offenders = NULL; + } + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + &offending_len); + if (score == 0) break; + int rand_idx = rand() % offending_len; + clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode **other_replicas = zcalloc((node_len - 1) * + sizeof(*other_replicas)); + int other_replicas_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n != first && n->replicate != NULL) + other_replicas[other_replicas_count++] = n; + } + if (other_replicas_count == 0) { + zfree(other_replicas); + break; + } + rand_idx = rand() % other_replicas_count; + second = other_replicas[rand_idx]; + char *first_master = first->replicate, + *second_master = second->replicate; + first->replicate = second_master, first->dirty = 1; + second->replicate = first_master, second->dirty = 1; + zfree(aux), aux = NULL; + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + &aux, NULL); + if (new_score > score) { + first->replicate = first_master; + second->replicate = second_master; + } + zfree(other_replicas); + maxiter--; + } + zfree(aux), aux = NULL; + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + char *msg; + if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + else if (score >= 10000) + msg = ("[WARNING] Some slaves are in the same host as their master"); + else + msg=("[WARNING] Some slaves of the same master are in the same host"); + printf("%s\n", msg); +cleanup: + zfree(offenders); + zfree(aux); +} + +static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { + sds slots = sdsempty(); + int first_range_idx = -1, last_slot_idx = -1, i; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int has_slot = node->slots[i]; + if (has_slot) { + if (first_range_idx == -1) { + if (sdslen(slots)) slots = sdscat(slots, ","); + first_range_idx = i; + slots = sdscatfmt(slots, "[%u", i); + } + last_slot_idx = i; + } else { + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) + slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + last_slot_idx = -1; + first_range_idx = -1; + } + } + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + return slots; +} + +static sds clusterManagerNodeInfo(clusterManagerNode *node) { + sds info = sdsempty(); + int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); + char *role = (is_master ? "M" : "S"); + sds slots = NULL; + if (node->dirty && node->replicate != NULL) + info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port); + else { + slots = clusterManagerNodeSlotsString(node); + info = sdscatfmt(info, "%s: %S %s:%u\n" + " slots:%S (%u slots) " + "", //TODO: flags string + role, node->name, node->ip, node->port, + slots, node->slots_count); + sdsfree(slots); + } + if (node->replicate != NULL) + info = sdscatfmt(info, "\n replicates %S", node->replicate); + //else if () {} //TODO: add replicas info + return info; +} + +static void clusterManagerShowNodes(void) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds info = clusterManagerNodeInfo(node); + printf("%s\n", info); + sdsfree(info); + } +} + +static void clusterManagerShowInfo(void) { + int masters = 0; + int keys = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) { + if (!node->name) continue; + int replicas = 0; + int dbsize = -1; + char name[9]; + memcpy(name, node->name, 8); + name[8] = '\0'; + listIter ri; + listNode *rn; + listRewind(cluster_manager.nodes, &ri); + while ((rn = listNext(&ri)) != NULL) { + clusterManagerNode *n = rn->value; + if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_SLAVE)) + continue; + if (n->replicate && !strcmp(n->replicate, node->name)) + replicas++; + } + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "DBSIZE"); + if (reply != NULL || reply->type == REDIS_REPLY_INTEGER) + dbsize = reply->integer; + if (dbsize < 0) { + char *err = ""; + if (reply != NULL && reply->type == REDIS_REPLY_ERROR) + err = reply->str; + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + if (reply != NULL) freeReplyObject(reply); + return; + }; + if (reply != NULL) freeReplyObject(reply); + printf("%s:%d (%s...) -> %d keys | %d slots | %d slaves.\n", + node->ip, node->port, name, dbsize, + node->slots_count, replicas); + masters++; + keys += dbsize; + } + } + printf("[OK] %d keys in %d masters.\n", keys, masters); + float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; + printf("%.2f keys per slot on average.\n", keys_per_slot); +} + +static int clusterManagerAddSlots(clusterManagerNode *node, char**err) +{ + redisReply *reply = NULL; + void *_reply = NULL; + int is_err = 0; + int argc; + sds *argv = NULL; + size_t *argvlen = NULL; + *err = NULL; + sds cmd = sdsnew("CLUSTER ADDSLOTS "); + int i, added = 0; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (node->slots[i]) { + char *fmt = (!last_slot ? "%u " : "%u"); + cmd = sdscatfmt(cmd, fmt, i); + added++; + } + } + if (!added) goto node_cmd_err; + argv = cliSplitArgs(cmd, &argc); + if (argc == 0 || argv == NULL) goto node_cmd_err; + argvlen = zmalloc(argc*sizeof(size_t)); + for (i = 0; i < argc; i++) + argvlen[i] = sdslen(argv[i]); + redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); + if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + reply = (redisReply*) _reply; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + sdsfree(cmd); + zfree(argvlen); + sdsfreesplitres(argv,argc); + freeReplyObject(reply); + return 1; +node_cmd_err: + sdsfree(cmd); + zfree(argvlen); + if (argv != NULL) sdsfreesplitres(argv,argc); + if (reply != NULL) freeReplyObject(reply); + return 0; +} + +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { + if (!node->dirty) return 0; + redisReply *reply = NULL; + int is_err = 0; + *err = NULL; + if (node->replicate != NULL) { + reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", + node->replicate); + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + } else { + int added = clusterManagerAddSlots(node, err); + if (!added || *err != NULL) goto node_cmd_err; + } + node->dirty = 0; + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static void clusterManagerWaitForClusterJoin(void) { + printf("Waiting for the cluster to join\n"); + while(!clusterManagerIsConfigConsistent()) { + printf("."); + fflush(stdout); + sleep(1); + } + printf("\n"); +} + +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err) +{ + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + int is_err = 0; + *err = NULL; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); + char *lines = reply->str, *p, *line; + while ((p = strstr(lines, "\n")) != NULL) { + *p = '\0'; + line = lines; + lines = p + 1; + char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, + *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, + *link_status = NULL; + int i = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + switch(i++){ + case 0: name = token; break; + case 1: addr = token; break; + case 2: flags = token; break; + case 3: master_id = token; break; + case 4: ping_sent = token; break; + case 5: ping_recv = token; break; + case 6: config_epoch = token; break; + case 7: link_status = token; break; + } + if (i == 8) break; // Slots + } + if (!flags) goto node_cmd_err; + int myself = (strstr(flags, "myself") != NULL); + if (strstr(flags, "noaddr") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_FAIL; + clusterManagerNode *currentNode = NULL; + if (myself) { + node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; + currentNode = node; + CLUSTER_MANAGER_RESET_SLOTS(node); + if (i == 8) { + int remaining = strlen(line); + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + remaining -= (p - line); + + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] == '[') { + //TODO: migrating/importing + } else if ((p = strchr(slotsdef, '-')) != NULL) { + int start, stop; + *p = '\0'; + start = atoi(slotsdef); + stop = atoi(p + 1); + node->slots_count += (stop - (start - 1)); + while (start <= stop) node->slots[start++] = 1; + } else if (p > slotsdef) { + node->slots[atoi(slotsdef)] = 1; + node->slots_count++; + } + } + } + node->dirty = 0; + } else if (!getfriends) { + if (!(node->flags & CLUSTER_MANAGER_FLAG_MYSELF)) continue; + else break; + } else { + if (addr == NULL) { + // TODO: find a better err message + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + *c = '\0'; + int port = atoi(++c); + currentNode = clusterManagerNewNode(sdsnew(addr), port); + currentNode->flags |= CLUSTER_MANAGER_FLAG_FRIEND; + if (node->friends == NULL) node->friends = listCreate(); + listAddNodeTail(node->friends, currentNode); + } + if (name != NULL) currentNode->name = sdsnew(name); + if (strstr(flags, "slave") != NULL) { + currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; + if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + } + if (config_epoch != NULL) + currentNode->current_epoch = atoll(config_epoch); + if (ping_sent != NULL) currentNode->ping_sent = atoll(ping_sent); + if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); + if (!getfriends && myself) break; + } + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { + if (node->context == NULL) + node->context = redisConnect(node->ip, node->port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, + node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; + char *e = NULL; + if (!clusterManagerNodeIsCluster(node, &e)) { + char *msg = (e ? e : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + if (e) zfree(e); + freeClusterManagerNode(node); + return 0; + } + e = NULL; + if (!clusterManagerNodeLoadInfo(node, opts, &e)) { + if (e) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, e); + zfree(e); + } + freeClusterManagerNode(node); + return 0; + } + cluster_manager.nodes = listCreate(); + listAddNodeTail(cluster_manager.nodes, node); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *friend = ln->value; + if (!friend->ip || !friend->port) continue; + if (!friend->context) + friend->context = redisConnect(friend->ip, friend->port); + if (friend->context->err) continue; + e = NULL; + if (clusterManagerNodeLoadInfo(friend, 0, &e)) { + if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | + CLUSTER_MANAGER_FLAG_DISCONNECT | + CLUSTER_MANAGER_FLAG_FAIL)) continue; + listAddNodeTail(cluster_manager.nodes, friend); + + } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + } + listRelease(node->friends); + node->friends = NULL; + } + return 1; +} + +int clusterManagerSlotCompare(const void *slot1, const void *slot2) { + const char **i1 = (const char **)slot1; + const char **i2 = (const char **)slot2; + return strcmp(*i1, *i2); +} + +static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { + sds signature = NULL; + int node_count = 0, i = 0, name_len = 0; + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + if (reply == NULL || reply->type == REDIS_REPLY_ERROR) + goto cleanup; + char *lines = reply->str, *p, *line; + char **node_configs = NULL; + while ((p = strstr(lines, "\n")) != NULL) { + i = 0; + *p = '\0'; + line = lines; + lines = p + 1; + char *nodename = NULL; + int tot_size = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + if (i == 0) { + nodename = token; + tot_size = p - token; + name_len = tot_size; + } else if (i == 8) break; + i++; + } + if (i != 8) continue; + if (nodename == NULL) continue; + int remaining = strlen(line); + if (remaining == 0) continue; + char **slots = NULL; + int c = 0; + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + int size = (p - line); + remaining -= size; + tot_size += size; + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] != '[') { + c++; + slots = zrealloc(slots, (c * sizeof(char *))); + slots[c - 1] = slotsdef; + } + } + if (c > 0) { + if (c > 1) + qsort(slots, c, sizeof(char *), clusterManagerSlotCompare); + node_count++; + node_configs = + zrealloc(node_configs, (node_count * sizeof(char *))); + tot_size += (sizeof(char) * (c - 1)); + char *cfg = zmalloc((sizeof(char) * tot_size) + 1); + memcpy(cfg, nodename, name_len); + char *sp = cfg + name_len; + *(sp++) = ':'; + for (i = 0; i < c; i++) { + if (i > 0) *(sp++) = '|'; + int slen = strlen(slots[i]); + memcpy(sp, slots[i], slen); + sp += slen; + } + *(sp++) = '\0'; + node_configs[node_count - 1] = cfg; + } + zfree(slots); + } + if (node_count > 0) { + if (node_count > 1) { + qsort(node_configs, node_count, sizeof(char *), + clusterManagerSlotCompare); + } + signature = sdsempty(); + for (i = 0; i < node_count; i++) { + if (i > 0) signature = sdscatprintf(signature, "%c", '|'); + signature = sdscatfmt(signature, "%s", node_configs[i]); + } + } +cleanup: + if (reply != NULL) freeReplyObject(reply); + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + return signature; +} + +static int clusterManagerIsConfigConsistent(void) { + if (cluster_manager.nodes == NULL) return 0; + int consistent = 0; + sds first_cfg = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds cfg = clusterManagerGetConfigSignature(node); + if (cfg == NULL) { + consistent = 0; + break; + } + if (first_cfg == NULL) first_cfg = cfg; + else { + consistent = !sdscmp(first_cfg, cfg); + sdsfree(cfg); + if (!consistent) break; + } + } + if (first_cfg != NULL) sdsfree(first_cfg); + return consistent; +} + +static void clusterManagerCheckCluster(int quiet) { + listNode *ln = listFirst(cluster_manager.nodes); + if (!ln) return; + clusterManagerNode *node = ln->value; + printf(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); + if (!quiet) clusterManagerShowNodes(); + if (!clusterManagerIsConfigConsistent()) + printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array + else + printf("[OK] All nodes agree about slots configuration.\n"); + //TODO:check_open_slots + //TODO:check_slots_coverage +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; - if (!proc(argc, argv)) { - sdsfree(config.hostip); - sdsfree(config.mb_delim); - exit(1); - } + cluster_manager.nodes = NULL; + if (!proc(argc, argv)) goto cluster_manager_err; + freeClusterManager(); exit(0); +cluster_manager_err: + freeClusterManager(); + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); +} + +/* Cluster Manager Commands */ + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("Cluster Manager: Creating Cluster\n"); + int i, j; + cluster_manager.nodes = listCreate(); + for (i = 0; i < argc; i++) { + char *addr = argv[i]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Invalid address format: %s\n", addr); + return 0; + } + *c = '\0'; + char *ip = addr; + int port = atoi(++c); + clusterManagerNode *node = clusterManagerNewNode(ip, port); + node->context = redisConnect(ip, port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + char *err = NULL; + if (!clusterManagerNodeIsCluster(node, &err)) { + char *msg = (err ? err : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeLoadInfo(node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeIsEmpty(node, &err)) { + char *msg; + if (err) msg = err; + else { + msg = " is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + listAddNodeTail(cluster_manager.nodes, node); + } + int node_len = cluster_manager.nodes->len; + int replicas = config.cluster_manager_command.replicas; + int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); + if (masters_count < 3) { + fprintf(stderr, + "*** ERROR: Invalid configuration for cluster creation.\n"); + fprintf(stderr, + "*** Redis Cluster requires at least 3 master nodes.\n"); + fprintf(stderr, + "*** This is not possible with %d nodes and %d replicas per node.", + node_len, replicas); + fprintf(stderr, "\n*** At least %d nodes are required.\n", + (3 * (replicas + 1))); + return 0; + } + printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + int interleaved_len = 0, ips_len = 0; + clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); + char **ips = zcalloc(node_len * sizeof(char*)); + clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + int found = 0; + for (i = 0; i < ips_len; i++) { + char *ip = ips[i]; + if (!strcmp(ip, n->ip)) { + found = 1; + break; + } + } + if (!found) { + ips[ips_len++] = n->ip; + } + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->nodes == NULL) + CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); + CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + } + while (interleaved_len < node_len) { + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->count > 0) { + clusterManagerNode *n; + CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + interleaved[interleaved_len++] = n; + } + } + } + clusterManagerNode **masters = interleaved; + interleaved += masters_count; + interleaved_len -= masters_count; + float slots_per_node = CLUSTER_MANAGER_SLOTS / (float) masters_count; + long first = 0; + float cursor = 0.0f; + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + long last = lround(cursor + slots_per_node - 1); + if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1)) + last = CLUSTER_MANAGER_SLOTS - 1; + if (last < first) last = first; + printf("Master[%d] -> Slots %lu - %lu\n", i, first, last); + master->slots_count = 0; + for (j = first; j <= last; j++) { + master->slots[j] = 1; + master->slots_count++; + } + master->dirty = 1; + first = last + 1; + cursor += slots_per_node; + } + + int assign_unused = 0, available_count = interleaved_len; +assign_replicas: + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + int assigned_replicas = 0; + while (assigned_replicas < replicas) { + if (available_count == 0) break; + clusterManagerNode *found = NULL, *slave = NULL; + int firstNodeIdx = -1; + for (j = 0; j < interleaved_len; j++) { + clusterManagerNode *n = interleaved[j]; + if (n == NULL) continue; + if (strcmp(n->ip, master->ip)) { + found = n; + interleaved[j] = NULL; + break; + } + if (firstNodeIdx < 0) firstNodeIdx = j; + } + if (found) slave = found; + else if (firstNodeIdx >= 0) { + slave = interleaved[firstNodeIdx]; + interleaved_len -= (interleaved - (interleaved + firstNodeIdx)); + interleaved += (firstNodeIdx + 1); + } + if (slave != NULL) { + assigned_replicas++; + available_count--; + slave->replicate = sdsnew(master->name); + slave->dirty = 1; + } else break; + printf("Adding replica %s:%d to %s:%d\n", slave->ip, slave->port, + master->ip, master->port); + if (assign_unused) break; + } + } + if (!assign_unused && available_count > 0) { + assign_unused = 1; + printf("Adding extra replicas...\n"); + goto assign_replicas; + } + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + } + clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerShowNodes(); + printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread != 0 && !strcmp("yes", buf)) { + printf("\nFlushing configuration!\n"); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && node->dirty && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + printf(">>> Nodes configuration updated\n"); + printf(">>> Assign a different config epoch to each node\n"); + int config_epoch = 1; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, + "cluster set-config-epoch %d", + config_epoch++); + if (reply != NULL) freeReplyObject(reply); + } + printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerNode *first = NULL; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (first == NULL) { + first = node; + continue; + } + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", + first->ip, first->port); + if (reply != NULL) freeReplyObject(reply); + } + // Give one second for the join to start, in order to avoid that + // waiting for cluster join will find all the nodes agree about + // the config as they are still empty with unassigned slots. + sleep(1); + clusterManagerWaitForClusterJoin(); + // Useful for the replicas //TODO: create a function for this? + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!node->dirty) continue; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + // Reset Nodes + listRewind(cluster_manager.nodes, &li); + clusterManagerNode *first_node = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!first_node) first_node = node; + else freeClusterManagerNode(node); + } + listEmpty(cluster_manager.nodes); + if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + clusterManagerCheckCluster(0); + } + /* Free everything */ + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 1; +cmd_err: + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 0; +} + +static int clusterManagerCommandInfo(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandCheck(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + clusterManagerCheckCluster(0); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandHelp(int argc, char **argv) { + UNUSED(argc); + UNUSED(argv); + int commands_count = sizeof(clusterManagerCommands) / + sizeof(clusterManagerCommandDef); + int i = 0, j; + fprintf(stderr, "Cluster Manager Commands:\n"); + for (; i < commands_count; i++) { + clusterManagerCommandDef *def = &(clusterManagerCommands[i]); + int namelen = strlen(def->name), padlen = 15 - namelen; + fprintf(stderr, " %s", def->name); + for (j = 0; j < padlen; j++) fprintf(stderr, " "); + fprintf(stderr, "%s\n", (def->args ? def->args : "")); + //TODO: if (def->options) + } + return 0; } /*------------------------------------------------------------------------------ From 01c2efc5a7b359c619d67db412c12c60dcafd90f Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 17:57:16 +0100 Subject: [PATCH 33/66] Added check for open slots (clusterManagerCheckCluster) --- src/redis-cli.c | 162 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 143 insertions(+), 19 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9943d5753..b20cd31d1 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,6 +74,13 @@ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) +#define CLUSTER_MANAGER_ERROR(err) do { \ + if (cluster_manager.errors == NULL) \ + cluster_manager.errors = listCreate(); \ + listAddNodeTail(cluster_manager.errors, err); \ + fprintf(stderr, "%s\n", (char *) err); \ +} while(0) + #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ memset(n->slots, 0, sizeof(n->slots)); \ n->slots_count = 0; \ @@ -137,7 +144,14 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; -/* Cluster Manager command info */ +/* Dict Helpers */ + +static uint64_t dictSdsHash(const void *key); +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2); +static void dictSdsDestructor(void *privdata, void *val); + +/* Cluster Manager Command Info */ typedef struct clusterManagerCommand { char *name; int argc; @@ -196,6 +210,7 @@ static struct config { static struct clusterManager { list *nodes; + list *errors; } cluster_manager; typedef struct clusterManagerNode { @@ -212,6 +227,10 @@ typedef struct clusterManagerNode { uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; } clusterManagerNode; typedef struct clusterManagerNodeArray { @@ -221,6 +240,15 @@ typedef struct clusterManagerNodeArray { int count; } clusterManagerNodeArray; +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1810,13 +1838,22 @@ static void freeClusterManagerNode(clusterManagerNode *node) { if (node->replicate != NULL) sdsfree(node->replicate); if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) sdsfree(node->ip); + int i; + if (node->migrating != NULL) { + for (i = 0; i < node->migrating_count; i++) sdsfree(node->migrating[i]); + zfree(node->migrating); + } + if (node->importing != NULL) { + for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]); + zfree(node->importing); + } zfree(node); } static void freeClusterManager(void) { + listIter li; + listNode *ln; if (cluster_manager.nodes != NULL) { - listIter li; - listNode *ln; listRewind(cluster_manager.nodes,&li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; @@ -1825,9 +1862,18 @@ static void freeClusterManager(void) { listRelease(cluster_manager.nodes); cluster_manager.nodes = NULL; } + if (cluster_manager.errors != NULL) { + listRewind(cluster_manager.errors,&li); + while ((ln = listNext(&li)) != NULL) { + sds err = ln->value; + sdsfree(err); + } + listRelease(cluster_manager.errors); + cluster_manager.errors = NULL; + } } -static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { +static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNode *node = zmalloc(sizeof(*node)); node->context = NULL; node->name = NULL; @@ -1840,6 +1886,10 @@ static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { node->replicate = NULL; node->dirty = 0; node->friends = NULL; + node->migrating = NULL; + node->importing = NULL; + node->migrating_count = 0; + node->importing_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } @@ -1902,17 +1952,9 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int node_len = cluster_manager.nodes->len; *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); clusterManagerNode **offending_p = *offending; - dictType dtype = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ - }; for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); - dict *related = dictCreate(&dtype, NULL); + dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; for (j = 0; j < node_array->len; j++) { clusterManagerNode *node = node_array->nodes[j]; @@ -2291,7 +2333,32 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (remaining) line = p + 1; else line = p; if (slotsdef[0] == '[') { - //TODO: migrating/importing + slotsdef++; + if ((p = strstr(slotsdef, "->-"))) { // Migrating + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds dst = sdsnew(p); + node->migrating_count += 2; + node->migrating = zrealloc(node->migrating, + (node->migrating_count * sizeof(sds))); + node->migrating[node->migrating_count - 2] = + slot; + node->migrating[node->migrating_count - 1] = + dst; + } else if ((p = strstr(slotsdef, "-<-"))) {//Importing + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds src = sdsnew(p); + node->importing_count += 2; + node->importing = zrealloc(node->importing, + (node->importing_count * sizeof(sds))); + node->importing[node->importing_count - 2] = + slot; + node->importing[node->importing_count - 1] = + src; + } } else if ((p = strchr(slotsdef, '-')) != NULL) { int start, stop; *p = '\0'; @@ -2529,11 +2596,68 @@ static void clusterManagerCheckCluster(int quiet) { printf(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); if (!quiet) clusterManagerShowNodes(); - if (!clusterManagerIsConfigConsistent()) - printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array - else - printf("[OK] All nodes agree about slots configuration.\n"); - //TODO:check_open_slots + if (!clusterManagerIsConfigConsistent()) { + sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); + CLUSTER_MANAGER_ERROR(err); + } else printf("[OK] All nodes agree about slots configuration.\n"); + // Check open slots + listIter li; + listRewind(cluster_manager.nodes, &li); + int i; + dict *open_slots = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->migrating != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "migrating state ", + n->ip, + n->port); + for (i = 0; i < n->migrating_count; i += 2) { + sds slot = n->migrating[i]; + dictAdd(open_slots, slot, n->migrating[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + if (n->importing != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "importing state ", + n->ip, + n->port); + for (i = 0; i < n->importing_count; i += 2) { + sds slot = n->importing[i]; + dictAdd(open_slots, slot, n->importing[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + } + if (open_slots != NULL) { + dictIterator *iter = dictGetIterator(open_slots); + dictEntry *entry; + sds errstr = sdsnew("[WARNING] The following slots are open: "); + i = 0; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + char *fmt = (i++ > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + fprintf(stderr, "%s.\n", (char *) errstr); + sdsfree(errstr); + dictRelease(open_slots); + } //TODO:check_slots_coverage } From 4ae6041f7eaf0d9ab882354a69f970f97defd635 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 19:25:02 +0100 Subject: [PATCH 34/66] - Cluster Manager: fixed various memory leaks - Cluster Manager: fixed flags assignment in clusterManagerNodeLoadInfo --- src/redis-cli.c | 54 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b20cd31d1..a596afca2 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2310,12 +2310,6 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (!flags) goto node_cmd_err; int myself = (strstr(flags, "myself") != NULL); - if (strstr(flags, "noaddr") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; - if (strstr(flags, "disconnected") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; - if (strstr(flags, "fail") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_FAIL; clusterManagerNode *currentNode = NULL; if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; @@ -2396,10 +2390,22 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (node->friends == NULL) node->friends = listCreate(); listAddNodeTail(node->friends, currentNode); } - if (name != NULL) currentNode->name = sdsnew(name); + if (name != NULL) { + if (currentNode->name) sdsfree(currentNode->name); + currentNode->name = sdsnew(name); + } + if (strstr(flags, "noaddr") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; if (strstr(flags, "slave") != NULL) { currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; - if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + if (master_id != NULL) { + if (currentNode->replicate) sdsfree(currentNode->replicate); + currentNode->replicate = sdsnew(master_id); + } } if (config_epoch != NULL) currentNode->current_epoch = atoll(config_epoch); @@ -2442,27 +2448,39 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { freeClusterManagerNode(node); return 0; } + listIter li; + listNode *ln; + if (cluster_manager.nodes != NULL) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) + freeClusterManagerNode((clusterManagerNode *) ln->value); + listRelease(cluster_manager.nodes); + } cluster_manager.nodes = listCreate(); listAddNodeTail(cluster_manager.nodes, node); if (node->friends != NULL) { - listIter li; - listNode *ln; listRewind(node->friends, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *friend = ln->value; - if (!friend->ip || !friend->port) continue; + if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) friend->context = redisConnect(friend->ip, friend->port); - if (friend->context->err) continue; + if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | CLUSTER_MANAGER_FLAG_DISCONNECT | - CLUSTER_MANAGER_FLAG_FAIL)) continue; + CLUSTER_MANAGER_FLAG_FAIL)) + goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); - - } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + } else { + fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + goto invalid_friend; + } + continue; +invalid_friend: + freeClusterManagerNode(friend); } listRelease(node->friends); node->friends = NULL; @@ -2601,6 +2619,7 @@ static void clusterManagerCheckCluster(int quiet) { CLUSTER_MANAGER_ERROR(err); } else printf("[OK] All nodes agree about slots configuration.\n"); // Check open slots + printf(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2836,6 +2855,7 @@ assign_replicas: if (slave != NULL) { assigned_replicas++; available_count--; + if (slave->replicate) sdsfree(slave->replicate); slave->replicate = sdsnew(master->name); slave->dirty = 1; } else break; @@ -2873,7 +2893,7 @@ assign_replicas: zfree(err); } goto cmd_err; - } + } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); printf(">>> Assign a different config epoch to each node\n"); From ef8e711bafc2a3271464cbd67e1ed4a34069628a Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 17:43:36 +0100 Subject: [PATCH 35/66] Cluster Manager: slots coverage check. --- src/redis-cli.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index a596afca2..0dede2d9c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2607,6 +2607,24 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static int clusterManagerGetCoveredSlots(char *all_slots) { + if (cluster_manager.nodes == NULL) return 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + int totslots = 0, i; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + if (node->slots[i] && !all_slots[i]) { + all_slots[i] = 1; + totslots++; + } + } + } + return totslots; +} + static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; @@ -2677,7 +2695,19 @@ static void clusterManagerCheckCluster(int quiet) { sdsfree(errstr); dictRelease(open_slots); } - //TODO:check_slots_coverage + printf(">>> Check slots coverage...\n"); + char slots[CLUSTER_MANAGER_SLOTS]; + memset(slots, 0, CLUSTER_MANAGER_SLOTS); + int coverage = clusterManagerGetCoveredSlots(slots); + if (coverage == CLUSTER_MANAGER_SLOTS) + printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); + else { + sds err = sdsempty(); + err = sdscatprintf(err, "[ERR] Not all %d slots are " + "covered by nodes.\n", + CLUSTER_MANAGER_SLOTS); + CLUSTER_MANAGER_ERROR(err); + } } static void clusterManagerMode(clusterManagerCommandProc *proc) { From 9b561af4e1d3815db28975ca1710f6b717cd4f2c Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 20:09:30 +0100 Subject: [PATCH 36/66] Cluster Manager: reply error catch for MEET command --- src/redis-cli.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 0dede2d9c..83638616a 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2949,7 +2949,16 @@ assign_replicas: redisReply *reply = NULL; reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", first->ip, first->port); - if (reply != NULL) freeReplyObject(reply); + int is_err = 0; + if (reply != NULL) { + if ((is_err = reply->type == REDIS_REPLY_ERROR)) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, reply->str); + freeReplyObject(reply); + } else { + is_err = 1; + fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); + } + if (is_err) goto cmd_err; } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about From 29e4586d40a00a4d81f41d41c597cf5d98ad708e Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 11:29:25 +0100 Subject: [PATCH 37/66] Cluster Manager: cluster is considered consistent if only one node has been found --- src/redis-cli.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 83638616a..19c8fcddb 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2584,7 +2584,10 @@ cleanup: static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; - int consistent = 0; + int consistent = (listLength(cluster_manager.nodes) <= 1); + // If the Cluster has only one node, it's always consistent + // Does it make sense? + if (consistent) return 1; sds first_cfg = NULL; listIter li; listNode *ln; From a936b967fdd3bd06fa714e33803eb86d71c30e35 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 12:02:56 +0100 Subject: [PATCH 38/66] ClusterManager: added replicas count to clusterManagerNode --- src/redis-cli.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 19c8fcddb..791b0dd87 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -223,9 +223,11 @@ typedef struct clusterManagerNode { time_t ping_recv; int flags; sds replicate; + list replicas; int dirty; uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; + int replicas_count; list *friends; sds *migrating; sds *importing; @@ -250,6 +252,7 @@ static dictType clusterManagerDictType = { }; static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); @@ -265,6 +268,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); + typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { char *name; @@ -1890,10 +1894,31 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->importing = NULL; node->migrating_count = 0; node->importing_count = 0; + node->replicas_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } +static clusterManagerNode *clusterManagerNodeByName(const char *name) { + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && !sdscmp(n->name, lcname)) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); int is_err = 0; @@ -2119,7 +2144,9 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { } if (node->replicate != NULL) info = sdscatfmt(info, "\n replicates %S", node->replicate); - //else if () {} //TODO: add replicas info + else if (node->replicas_count) + info = sdscatfmt(info, "\n %U additional replica(s)", + node->replicas_count); return info; } @@ -2485,6 +2512,18 @@ invalid_friend: listRelease(node->friends); node->friends = NULL; } + // Count replicas for each node + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate != NULL) { + clusterManagerNode *master = clusterManagerNodeByName(n->replicate); + if (master == NULL) { + printf("*** WARNING: %s:%d claims to be slave of unknown " + "node ID %s.\n", n->ip, n->port, n->replicate); + } else master->replicas_count++; + } + } return 1; } From 29b0af5a26674a939ab0b989c608003798a543dc Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 9 Feb 2018 13:02:37 +0100 Subject: [PATCH 39/66] Cluster Manager: CLUSTER_MANAGER_NODE_CONNECT macro --- src/redis-cli.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 791b0dd87..4ce3a12dc 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -70,6 +70,8 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_NODE_CONNECT(n) \ + (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) @@ -2449,7 +2451,7 @@ node_cmd_err: static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) - node->context = redisConnect(node->ip, node->port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, @@ -2491,7 +2493,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { clusterManagerNode *friend = ln->value; if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) - friend->context = redisConnect(friend->ip, friend->port); + CLUSTER_MANAGER_NODE_CONNECT(friend); if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { @@ -2785,7 +2787,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *ip = addr; int port = atoi(++c); clusterManagerNode *node = clusterManagerNewNode(ip, port); - node->context = redisConnect(ip, port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); From 581823f06820b3e23b66e4446bb863c830124f43 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 13 Feb 2018 12:00:06 +0100 Subject: [PATCH 40/66] Cluster Manager: 'call' command. --- src/redis-cli.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4ce3a12dc..00b5e90a0 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -286,6 +286,7 @@ static int clusterManagerIsConfigConsistent(void); static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ @@ -1802,6 +1803,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { "cluster-replicas"}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"call", clusterManagerCommandCall, -2, + "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -2449,6 +2452,11 @@ node_cmd_err: return 0; } +/* Retrieves info about the cluster using argument 'node' as the starting + * point. All nodes will be loaded inside the cluster_manager.nodes list. + * Warning: if something goes wrong, it will free the starting node before + * returning 0. */ + static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -3115,6 +3123,56 @@ invalid_args: return 0; } +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0; + char *ip = NULL; + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + int i; + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else { + fprintf(stderr, + "Invalid arguments: first agrumnt must be host:port.\n"); + return 0; + } + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + argc--; + argv++; + size_t *argvlen = zmalloc(argc*sizeof(size_t)); + printf(">>> Calling"); + for (i = 0; i < argc; i++) { + argvlen[i] = strlen(argv[i]); + printf(" %s", argv[i]); + } + printf("\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (!n->context) CLUSTER_MANAGER_NODE_CONNECT(n); + redisReply *reply = NULL; + redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen); + int status = redisGetReply(n->context, (void **)(&reply)); + if (status != REDIS_OK || reply == NULL ) + printf("%s:%d: Failed!\n", n->ip, n->port); //TODO: better message? + else { + sds formatted_reply = cliFormatReplyTTY(reply, ""); + printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply); + sdsfree(formatted_reply); + } + if (reply != NULL) freeReplyObject(reply); + } + zfree(argvlen); + return 1; +} + static int clusterManagerCommandHelp(int argc, char **argv) { UNUSED(argc); UNUSED(argv); From fec06d1afc20fe99d9485eefad67d046deb5d4d0 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 17:54:46 +0100 Subject: [PATCH 41/66] Cluster Manager: improved cleanup/error handling in various functions --- src/redis-cli.c | 101 +++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 00b5e90a0..63a4f69bd 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2220,7 +2220,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; int argc; sds *argv = NULL; size_t *argvlen = NULL; @@ -2235,39 +2235,44 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) added++; } } - if (!added) goto node_cmd_err; + if (!added) { + success = 0; + goto cleanup; + } argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) goto node_cmd_err; + if (argc == 0 || argv == NULL) { + success = 0; + goto cleanup; + } argvlen = zmalloc(argc*sizeof(size_t)); for (i = 0; i < argc; i++) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); - if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + if (redisGetReply(node->context, &_reply) != REDIS_OK) { + success = 1; + goto cleanup; + } reply = (redisReply*) _reply; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } - sdsfree(cmd); - zfree(argvlen); - sdsfreesplitres(argv,argc); - freeReplyObject(reply); - return 1; -node_cmd_err: +cleanup: sdsfree(cmd); zfree(argvlen); if (argv != NULL) sdsfreesplitres(argv,argc); if (reply != NULL) freeReplyObject(reply); - return 0; + return success; } static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", @@ -2277,18 +2282,20 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) goto node_cmd_err; + if (!added || *err != NULL) { + success = 0; + goto cleanup; + } } node->dirty = 0; - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply != NULL) freeReplyObject(reply); + return success; } static void clusterManagerWaitForClusterJoin(void) { @@ -2305,14 +2312,15 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); char *lines = reply->str, *p, *line; @@ -2340,7 +2348,10 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (i == 8) break; // Slots } - if (!flags) goto node_cmd_err; + if (!flags) { + success = 0; + goto cleanup; + } int myself = (strstr(flags, "myself") != NULL); clusterManagerNode *currentNode = NULL; if (myself) { @@ -2406,14 +2417,16 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (addr == NULL) { // TODO: find a better err message fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } char *c = strrchr(addr, '@'); if (c != NULL) *c = '\0'; c = strrchr(addr, ':'); if (c == NULL) { fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } *c = '\0'; int port = atoi(++c); @@ -2445,11 +2458,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); if (!getfriends && myself) break; } - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply) freeReplyObject(reply); + return success; } /* Retrieves info about the cluster using argument 'node' as the starting @@ -2780,7 +2791,7 @@ cluster_manager_err: static int clusterManagerCommandCreate(int argc, char **argv) { printf("Cluster Manager: Creating Cluster\n"); - int i, j; + int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { char *addr = argv[i]; @@ -2974,7 +2985,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); @@ -3010,7 +3022,10 @@ assign_replicas: is_err = 1; fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); } - if (is_err) goto cmd_err; + if (is_err) { + success = 0; + goto cleanup; + } } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about @@ -3029,7 +3044,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } } // Reset Nodes @@ -3041,9 +3057,13 @@ assign_replicas: else freeClusterManagerNode(node); } listEmpty(cluster_manager.nodes); - if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + if (!clusterManagerLoadInfoFromNode(first_node, 0)) { + success = 0; + goto cleanup; //TODO: msg? + } clusterManagerCheckCluster(0); } +cleanup: /* Free everything */ zfree(masters); zfree(ips); @@ -3052,16 +3072,7 @@ assign_replicas: CLUSTER_MANAGER_NODEARRAY_FREE(node_array); } zfree(ip_nodes); - return 1; -cmd_err: - zfree(masters); - zfree(ips); - for (i = 0; i < node_len; i++) { - clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); - } - zfree(ip_nodes); - return 0; + return success; } static int clusterManagerCommandInfo(int argc, char **argv) { From 513fd614d3c6e4d832f7a0b474ab84b32bb9fc20 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 19:29:28 +0100 Subject: [PATCH 42/66] Cluster Manager: colorized output --- src/redis-cli.c | 130 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 63a4f69bd..09ad54979 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -67,6 +67,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" + #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) @@ -80,7 +81,7 @@ if (cluster_manager.errors == NULL) \ cluster_manager.errors = listCreate(); \ listAddNodeTail(cluster_manager.errors, err); \ - fprintf(stderr, "%s\n", (char *) err); \ + clusterManagerLogErr("%s\n", (char *) err); \ } while(0) #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ @@ -124,7 +125,20 @@ } while(0) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ - fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ + n->ip, n->port, err); + +#define clusterManagerLogInfo(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_INFO,__VA_ARGS__) + +#define clusterManagerLogErr(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_ERR,__VA_ARGS__) + +#define clusterManagerLogWarn(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_WARN,__VA_ARGS__) + +#define clusterManagerLogOk(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_SUCCESS,__VA_ARGS__) #define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 #define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 @@ -133,7 +147,22 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 + +#define CLUSTER_MANAGER_LOG_LVL_INFO 1 +#define CLUSTER_MANAGER_LOG_LVL_WARN 2 +#define CLUSTER_MANAGER_LOG_LVL_ERR 3 +#define CLUSTER_MANAGER_LOG_LVL_SUCCESS 4 + +#define LOG_COLOR_BOLD "29;1m" +#define LOG_COLOR_RED "31;1m" +#define LOG_COLOR_GREEN "32;1m" +#define LOG_COLOR_YELLOW "33;1m" +#define LOG_COLOR_RESET "0m" /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -270,6 +299,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { @@ -1267,6 +1297,7 @@ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; } static int parseOptions(int argc, char **argv) { @@ -2042,7 +2073,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerNode **offenders = NULL, **aux; int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); if (score == 0) goto cleanup; - printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + clusterManagerLogInfo(">>> Trying to optimize slaves allocation " + "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; int maxiter = 500 * node_len; srand(time(NULL)); @@ -2091,12 +2123,15 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(aux), aux = NULL; score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); char *msg; - if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + int perfect = (score == 0); + int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : + CLUSTER_MANAGER_LOG_LVL_WARN); + if (perfect) msg = "[OK] Perfect anti-affinity obtained!"; else if (score >= 10000) msg = ("[WARNING] Some slaves are in the same host as their master"); else msg=("[WARNING] Some slaves of the same master are in the same host"); - printf("%s\n", msg); + clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); zfree(aux); @@ -2211,7 +2246,7 @@ static void clusterManagerShowInfo(void) { keys += dbsize; } } - printf("[OK] %d keys in %d masters.\n", keys, masters); + clusterManagerLogOk("[OK] %d keys in %d masters.\n", keys, masters); float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; printf("%.2f keys per slot on average.\n", keys_per_slot); } @@ -2482,7 +2517,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { char *msg = (e ? e : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -2522,8 +2557,9 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); } else { - fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + clusterManagerLogErr("[ERR] Unable to load info for " + "node %s:%d\n", + friend->ip, friend->port); goto invalid_friend; } continue; @@ -2692,15 +2728,18 @@ static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; clusterManagerNode *node = ln->value; - printf(">>> Performing Cluster Check (using node %s:%d)\n", - node->ip, node->port); + clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); CLUSTER_MANAGER_ERROR(err); - } else printf("[OK] All nodes agree about slots configuration.\n"); + } else { + clusterManagerLogOk("[OK] All nodes agree about slots " + "configuration.\n"); + } // Check open slots - printf(">>> Check for open slots...\n"); + clusterManagerLogInfo(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2754,17 +2793,18 @@ static void clusterManagerCheckCluster(int quiet) { char *fmt = (i++ > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } - fprintf(stderr, "%s.\n", (char *) errstr); + clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); dictRelease(open_slots); } - printf(">>> Check slots coverage...\n"); + clusterManagerLogInfo(">>> Check slots coverage...\n"); char slots[CLUSTER_MANAGER_SLOTS]; memset(slots, 0, CLUSTER_MANAGER_SLOTS); int coverage = clusterManagerGetCoveredSlots(slots); - if (coverage == CLUSTER_MANAGER_SLOTS) - printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); - else { + if (coverage == CLUSTER_MANAGER_SLOTS) { + clusterManagerLogOk("[OK] All %d slots covered.\n", + CLUSTER_MANAGER_SLOTS); + } else { sds err = sdsempty(); err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", @@ -2773,6 +2813,26 @@ static void clusterManagerCheckCluster(int quiet) { } } +static void clusterManagerLog(int level, const char* fmt, ...) { + int use_colors = + (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); + if (use_colors) { + printf("\033["); + switch (level) { + case CLUSTER_MANAGER_LOG_LVL_INFO: printf(LOG_COLOR_BOLD); break; + case CLUSTER_MANAGER_LOG_LVL_WARN: printf(LOG_COLOR_YELLOW); break; + case CLUSTER_MANAGER_LOG_LVL_ERR: printf(LOG_COLOR_RED); break; + case CLUSTER_MANAGER_LOG_LVL_SUCCESS: printf(LOG_COLOR_GREEN); break; + default: printf(LOG_COLOR_RESET); break; + } + } + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + if (use_colors) printf("\033[" LOG_COLOR_RESET); +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2790,7 +2850,6 @@ cluster_manager_err: /* Cluster Manager Commands */ static int clusterManagerCommandCreate(int argc, char **argv) { - printf("Cluster Manager: Creating Cluster\n"); int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { @@ -2816,7 +2875,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { char *msg = (err ? err : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2835,11 +2894,11 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *msg; if (err) msg = err; else { - msg = " is not empty. Either the node already knows other " + msg = "is not empty. Either the node already knows other " "nodes (check with CLUSTER NODES) or contains some " "key in database 0."; } - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2850,18 +2909,17 @@ static int clusterManagerCommandCreate(int argc, char **argv) { int replicas = config.cluster_manager_command.replicas; int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); if (masters_count < 3) { - fprintf(stderr, - "*** ERROR: Invalid configuration for cluster creation.\n"); - fprintf(stderr, - "*** Redis Cluster requires at least 3 master nodes.\n"); - fprintf(stderr, + clusterManagerLogErr( + "*** ERROR: Invalid configuration for cluster creation.\n" + "*** Redis Cluster requires at least 3 master nodes.\n" "*** This is not possible with %d nodes and %d replicas per node.", node_len, replicas); - fprintf(stderr, "\n*** At least %d nodes are required.\n", - (3 * (replicas + 1))); + clusterManagerLogErr("\n*** At least %d nodes are required.\n", + 3 * (replicas + 1)); return 0; } - printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + clusterManagerLogInfo(">>> Performing hash slots allocation " + "on %d nodes...\n", node_len); int interleaved_len = 0, ips_len = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); @@ -2989,8 +3047,9 @@ assign_replicas: goto cleanup; } else if (err != NULL) zfree(err); } - printf(">>> Nodes configuration updated\n"); - printf(">>> Assign a different config epoch to each node\n"); + clusterManagerLogInfo(">>> Nodes configuration updated\n"); + clusterManagerLogInfo(">>> Assign a different config epoch to " + "each node\n"); int config_epoch = 1; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3001,7 +3060,8 @@ assign_replicas: config_epoch++); if (reply != NULL) freeReplyObject(reply); } - printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerLogInfo(">>> Sending CLUSTER MEET messages to join " + "the cluster\n"); clusterManagerNode *first = NULL; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3156,7 +3216,7 @@ static int clusterManagerCommandCall(int argc, char **argv) { argc--; argv++; size_t *argvlen = zmalloc(argc*sizeof(size_t)); - printf(">>> Calling"); + clusterManagerLogInfo(">>> Calling"); for (i = 0; i < argc; i++) { argvlen[i] = strlen(argv[i]); printf(" %s", argv[i]); From d25f04d62d33c7cfd32d83ad2d4bb55b9de83629 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 20 Feb 2018 12:01:13 +0100 Subject: [PATCH 43/66] - Fixed bug in clusterManagerGetAntiAffinityScore - Code improvements --- src/redis-cli.c | 57 ++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 09ad54979..6a5279d2e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -505,7 +505,6 @@ static int dictSdsKeyCompare(void *privdata, const void *key1, static void dictSdsDestructor(void *privdata, void *val) { DICT_NOTUSED(privdata); - sdsfree(val); } @@ -2008,11 +2007,13 @@ result: static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_len, clusterManagerNode ***offending, int *offending_len) { - assert(offending != NULL); int score = 0, i, j; int node_len = cluster_manager.nodes->len; - *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); - clusterManagerNode **offending_p = *offending; + clusterManagerNode **offending_p = NULL; + if (offending != NULL) { + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + offending_p = *offending; + } for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); @@ -2021,23 +2022,21 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, clusterManagerNode *node = node_array->nodes[j]; if (node == NULL) continue; if (!ip) ip = node->ip; - sds types; - if (!node->replicate) { - assert(node->name != NULL); - dictEntry *entry = dictFind(related, node->name); - if (entry) types = (sds) dictGetVal(entry); - else types = sdsempty(); - types = sdscatprintf(types, "m%s", types); - dictReplace(related, node->name, types); - } else { - dictEntry *entry = dictFind(related, node->replicate); - if (entry) types = (sds) dictGetVal(entry); - else { - types = sdsempty(); - dictAdd(related, node->replicate, types); - } - sdscat(types, "s"); + sds types, otypes; + // We always use the Master ID as key + sds key = (!node->replicate ? node->name : node->replicate); + assert(key != NULL); + dictEntry *entry = dictFind(related, key); + if (entry) otypes = (sds) dictGetVal(entry); + else { + otypes = sdsempty(); + dictAdd(related, key, otypes); } + // Master type 'm' is always set as the first character of the + // types string. + if (!node->replicate) types = sdscatprintf(otypes, "m%s", otypes); + else types = sdscat(otypes, "s"); + if (types != otypes) dictReplace(related, key, types); } dictIterator *iter = dictGetIterator(related); dictEntry *entry; @@ -2048,6 +2047,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (typeslen < 2) continue; if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); + if (offending == NULL) continue; listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2056,11 +2056,12 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (n->replicate == NULL) continue; if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { *(offending_p++) = n; + if (offending_len != NULL) (*offending_len)++; break; } } } - if (offending_len != NULL) *offending_len = offending_p - *offending; + //if (offending_len != NULL) *offending_len = offending_p - *offending; dictReleaseIterator(iter); dictRelease(related); } @@ -2070,8 +2071,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_len) { - clusterManagerNode **offenders = NULL, **aux; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + clusterManagerNode **offenders = NULL; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); @@ -2088,7 +2089,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, &offending_len); if (score == 0) break; int rand_idx = rand() % offending_len; - clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode *first = offenders[rand_idx], + *second = NULL; clusterManagerNode **other_replicas = zcalloc((node_len - 1) * sizeof(*other_replicas)); int other_replicas_count = 0; @@ -2110,9 +2112,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - zfree(aux), aux = NULL; int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, - &aux, NULL); + NULL, NULL); if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2120,8 +2121,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - zfree(aux), aux = NULL; - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2134,7 +2134,6 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); - zfree(aux); } static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { From 78817f01b6477bd6bf417244e0b08845a46ed8c9 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 22 Feb 2018 18:32:39 +0100 Subject: [PATCH 44/66] Cluster Manager: - Almost all Cluster Manager related code moved to the same section. - Many macroes converted to functions - Added various comments - Little code restyling --- src/redis-cli.c | 460 ++++++++++++++++++++++++++++-------------------- 1 file changed, 271 insertions(+), 189 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 6a5279d2e..66fc4d183 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -75,54 +75,8 @@ (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) -#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) -#define CLUSTER_MANAGER_ERROR(err) do { \ - if (cluster_manager.errors == NULL) \ - cluster_manager.errors = listCreate(); \ - listAddNodeTail(cluster_manager.errors, err); \ - clusterManagerLogErr("%s\n", (char *) err); \ -} while(0) - -#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ - memset(n->slots, 0, sizeof(n->slots)); \ - n->slots_count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ - array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ - array->alloc = array->nodes; \ - array->len = alloc_len; \ - array->count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ - if (array->nodes > array->alloc) { \ - array->len = array->nodes - array->alloc; \ - array->nodes = array->alloc; \ - array->count = 0; \ - int i = 0; \ - for(; i < array->len; i++) { \ - if (array->nodes[i] != NULL) array->count++;\ - } \ - } \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) - -#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ - assert(array->nodes < (array->nodes + array->len)); \ - if (*array->nodes != NULL) array->count--; \ - nodeptr = *array->nodes; \ - array->nodes++; \ - array->len--; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ - assert(array->nodes < (array->nodes + array->len)); \ - assert(nodeptr != NULL); \ - array->nodes[array->count++] = nodeptr; \ -} while(0) +#define CLUSTER_MANAGER_NODE_ARRAY_FREE(array) zfree(array->alloc) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ @@ -190,6 +144,7 @@ typedef struct clusterManagerCommand { int flags; int replicas; } clusterManagerCommand; +static void createClusterManagerCommand(char *cmdname, int argc, char **argv); static redisContext *context; @@ -237,88 +192,6 @@ static struct config { clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager */ - -static struct clusterManager { - list *nodes; - list *errors; -} cluster_manager; - -typedef struct clusterManagerNode { - redisContext *context; - sds name; - char *ip; - int port; - uint64_t current_epoch; - time_t ping_sent; - time_t ping_recv; - int flags; - sds replicate; - list replicas; - int dirty; - uint8_t slots[CLUSTER_MANAGER_SLOTS]; - int slots_count; - int replicas_count; - list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; -} clusterManagerNode; - -typedef struct clusterManagerNodeArray { - clusterManagerNode **nodes; - clusterManagerNode **alloc; - int len; - int count; -} clusterManagerNodeArray; - -static dictType clusterManagerDictType = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ -}; - -static clusterManagerNode *clusterManagerNewNode(char *ip, int port); -static clusterManagerNode *clusterManagerNodeByName(const char *name); -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); -static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, - char **err); -static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); -static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); -static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len); -static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len); -static sds clusterManagerNodeInfo(clusterManagerNode *node); -static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); -static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); -static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); -static void clusterManagerLog(int level, const char* fmt, ...); - -typedef int clusterManagerCommandProc(int argc, char **argv); -typedef struct clusterManagerCommandDef { - char *name; - clusterManagerCommandProc *proc; - int arity; - char *args; - char *options; -} clusterManagerCommandDef; -static int clusterManagerIsConfigConsistent(void); - -/* Cluster Manager commands. */ - -static int clusterManagerCommandCreate(int argc, char **argv); -static int clusterManagerCommandInfo(int argc, char **argv); -static int clusterManagerCommandCheck(int argc, char **argv); -static int clusterManagerCommandCall(int argc, char **argv); -static int clusterManagerCommandHelp(int argc, char **argv); - /* User preferences. */ static struct pref { int hints; @@ -1291,14 +1164,6 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ -static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - clusterManagerCommand *cmd = &config.cluster_manager_command; - cmd->name = cmdname; - cmd->argc = argc; - cmd->argv = argc ? argv : NULL; - if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; -} - static int parseOptions(int argc, char **argv) { int i; @@ -1828,6 +1693,100 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +/* The Cluster Manager global structure */ +static struct clusterManager { + list *nodes; /* List of nodes int he configuration. */ + list *errors; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; /* Master ID if node is a slave */ + list replicas; + int dirty; /* Node has changes that can be flushed */ + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + int replicas_count; + list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; +} clusterManagerNode; + +/* Data structure used to represent a sequence of nodes. */ +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; /* Actual nodes array */ + clusterManagerNode **alloc; /* Pointer to the allocated memory */ + int len; /* Actual length of the array */ + int count; /* Non-NULL nodes count */ +} clusterManagerNodeArray; + +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + +typedef int clusterManagerCommandProc(int argc, char **argv); + +/* Cluster Manager helper functions */ + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); +static void clusterManagerNodeResetSlots(clusterManagerNode *node); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_count, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_count); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); +static int clusterManagerIsConfigConsistent(void); +static void clusterManagerOnError(sds err); +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int len); +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array); +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr); +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node); + +/* Cluster Manager commands. */ + +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); + +typedef struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; + char *args; + char *options; +} clusterManagerCommandDef; + clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "cluster-replicas"}, @@ -1838,6 +1797,16 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; + +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; +} + + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / sizeof(clusterManagerCommandDef); @@ -1930,7 +1899,7 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); return node; } @@ -1954,41 +1923,49 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); - int is_err = 0; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { +static void clusterManagerNodeResetSlots(clusterManagerNode *node) { + memset(node->slots, 0, sizeof(node->slots)); + node->slots_count = 0; +} + +static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, + char **err) +{ + redisReply *info = CLUSTER_MANAGER_COMMAND(node, "INFO"); + if (err != NULL) *err = NULL; + if (info == NULL) return NULL; + if (info->type == REDIS_REPLY_ERROR) { + if (err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); strcpy(*err, info->str); } freeReplyObject(info); - return 0; + return NULL; } + return info; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); + if (info == NULL) return 0; int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); freeReplyObject(info); return is_cluster; } +/* Checks whether the node is empty. Node is considered not-empty if it has + * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); int is_err = 0, is_empty = 1; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } - is_empty = 0; - goto result; - } + if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; goto result; } freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (err != NULL) *err = NULL; if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); @@ -2004,8 +1981,37 @@ result: return is_empty; } +/* Return the anti-affinity score, which is a measure of the amount of + * violations of anti-affinity in the current cluster layout, that is, how + * badly the masters and slaves are distributed in the different IP + * addresses so that slaves of the same master are not in the master + * host and are also in different hosts. + * + * The score is calculated as follows: + * + * SAME_AS_MASTER = 10000 * each slave in the same IP of its master. + * SAME_AS_SLAVE = 1 * each slave having the same IP as another slave + of the same master. + * FINAL_SCORE = SAME_AS_MASTER + SAME_AS_SLAVE + * + * So a greater score means a worse anti-affinity level, while zero + * means perfect anti-affinity. + * + * The anti affinity optimizator will try to get a score as low as + * possible. Since we do not want to sacrifice the fact that slaves should + * not be in the same host as the master, we assign 10000 times the score + * to this violation, so that we'll optimize for the second factor only + * if it does not impact the first one. + * + * The ipnodes argument is an array of clusterManagerNodeArray, one for + * each IP, while ip_count is the total number of IPs in the configuration. + * + * The function returns the above score, and the list of + * offending slaves can be stored into the 'offending' argument, + * so that the optimizer can try changing the configuration of the + * slaves violating the anti-affinity goals. */ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len) + int ip_count, clusterManagerNode ***offending, int *offending_len) { int score = 0, i, j; int node_len = cluster_manager.nodes->len; @@ -2014,7 +2020,10 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); offending_p = *offending; } - for (i = 0; i < ip_len; i++) { + /* For each set of nodes in the same host, split by + * related nodes (masters and slaves which are involved in + * replication of each other) */ + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; @@ -2038,6 +2047,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, else types = sdscat(otypes, "s"); if (types != otypes) dictReplace(related, key, types); } + /* Now it's trivial to check, for each related group having the + * same host, what is their local score. */ dictIterator *iter = dictGetIterator(related); dictEntry *entry; while ((entry = dictNext(iter)) != NULL) { @@ -2048,6 +2059,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); if (offending == NULL) continue; + /* Populate the list of offending nodes. */ listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2069,15 +2081,16 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, } static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len) + int ip_count) { clusterManagerNode **offenders = NULL; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, + NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; - int maxiter = 500 * node_len; + int maxiter = 500 * node_len; // Effort is proportional to cluster size... srand(time(NULL)); while (maxiter > 0) { int offending_len = 0; @@ -2085,9 +2098,14 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(offenders); offenders = NULL; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, + &offenders, &offending_len); - if (score == 0) break; + if (score == 0) break; // Optimal anti affinity reached + /* We'll try to randomly swap a slave's assigned master causing + * an affinity problem with another random slave, to see if we + * can improve the affinity. */ int rand_idx = rand() % offending_len; clusterManagerNode *first = offenders[rand_idx], *second = NULL; @@ -2112,8 +2130,12 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, NULL, NULL); + /* If the change actually makes thing worse, revert. Otherwise + * leave as it is becuase the best solution may need a few + * combined swaps. */ if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2121,7 +2143,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2136,6 +2158,7 @@ cleanup: zfree(offenders); } +/* Return a representable string of the node's slots */ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { sds slots = sdsempty(); int first_range_idx = -1, last_slot_idx = -1, i; @@ -2303,11 +2326,13 @@ cleanup: return success; } +/* Flush the dirty node configuration by calling replicate for slaves or + * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; int is_err = 0, success = 1; - *err = NULL; + if (err != NULL) *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", node->replicate); @@ -2317,14 +2342,15 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { strcpy(*err, reply->str); } success = 0; + /* If the cluster did not already joined it is possible that + * the slave does not know the master node yet. So on errors + * we return ASAP leaving the dirty flag set, to flush the + * config later. */ goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) { - success = 0; - goto cleanup; - } + if (!added || *err != NULL) success = 0; } node->dirty = 0; cleanup: @@ -2342,6 +2368,11 @@ static void clusterManagerWaitForClusterJoin(void) { printf("\n"); } +/* Load node's cluster configuration by calling "CLUSTER NODES" command. + * Node's configuration (name, replicate, slots, ...) is then updated. + * If CLUSTER_MANAGER_OPT_GETFRIENDS flag is set into 'opts' argument, + * and node already knows other nodes, the node's friends list is populated + * with the other nodes info. */ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { @@ -2391,7 +2422,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; currentNode = node; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); //TODO: just while(remaining) && assign p inside the block @@ -2501,7 +2532,6 @@ cleanup: * point. All nodes will be loaded inside the cluster_manager.nodes list. * Warning: if something goes wrong, it will free the starting node before * returning 0. */ - static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -2681,7 +2711,6 @@ static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; int consistent = (listLength(cluster_manager.nodes) <= 1); // If the Cluster has only one node, it's always consistent - // Does it make sense? if (consistent) return 1; sds first_cfg = NULL; listIter li; @@ -2705,6 +2734,13 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static void clusterManagerOnError(sds err) { + if (cluster_manager.errors == NULL) + cluster_manager.errors = listCreate(); + listAddNodeTail(cluster_manager.errors, err); + clusterManagerLogErr("%s\n", (char *) err); +} + static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -2732,7 +2768,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -2761,7 +2797,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } if (n->importing != NULL) { if (open_slots == NULL) @@ -2779,7 +2815,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } } if (open_slots != NULL) { @@ -2808,7 +2844,7 @@ static void clusterManagerCheckCluster(int quiet) { err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } } @@ -2832,6 +2868,53 @@ static void clusterManagerLog(int level, const char* fmt, ...) { if (use_colors) printf("\033[" LOG_COLOR_RESET); } +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int alloc_len) +{ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*)); + array->alloc = array->nodes; + array->len = alloc_len; + array->count = 0; +} + +/* Reset array->nodes to the original array allocation and re-count non-NULL + * nodes. */ +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array) { + if (array->nodes > array->alloc) { + array->len = array->nodes - array->alloc; + array->nodes = array->alloc; + array->count = 0; + int i = 0; + for(; i < array->len; i++) { + if (array->nodes[i] != NULL) array->count++; + } + } +} + +/* Shift array->nodes and store the shifted node into 'nodeptr'. */ +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr) +{ + assert(array->nodes < (array->nodes + array->len)); + /* If the first node to be shifted is not NULL, decrement count. */ + if (*array->nodes != NULL) array->count--; + /* Store the first node to be shifted into 'nodeptr'. */ + *nodeptr = *array->nodes; + /* Shift the nodes array and decrement length. */ + array->nodes++; + array->len--; +} + +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node) +{ + assert(array->nodes < (array->nodes + array->len)); + assert(node != NULL); + assert(array->count < array->len); + array->nodes[array->count++] = node; +} + +/* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2919,7 +3002,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } clusterManagerLogInfo(">>> Performing hash slots allocation " "on %d nodes...\n", node_len); - int interleaved_len = 0, ips_len = 0; + int interleaved_len = 0, ip_count = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); @@ -2929,7 +3012,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; int found = 0; - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { char *ip = ips[i]; if (!strcmp(ip, n->ip)) { found = 1; @@ -2937,19 +3020,19 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } } if (!found) { - ips[ips_len++] = n->ip; + ips[ip_count++] = n->ip; } clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->nodes == NULL) - CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); - CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + clusterManagerNodeArrayInit(node_array, node_len); + clusterManagerNodeArrayAdd(node_array, n); } while (interleaved_len < node_len) { - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->count > 0) { - clusterManagerNode *n; - CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + clusterManagerNode *n = NULL; + clusterManagerNodeArrayShift(node_array, &n); interleaved[interleaved_len++] = n; } } @@ -3019,11 +3102,11 @@ assign_replicas: printf("Adding extra replicas...\n"); goto assign_replicas; } - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + clusterManagerNodeArrayReset(node_array); } - clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); fflush(stdout); @@ -3031,7 +3114,6 @@ assign_replicas: int nread = read(fileno(stdin),buf,4); buf[3] = '\0'; if (nread != 0 && !strcmp("yes", buf)) { - printf("\nFlushing configuration!\n"); listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3128,7 +3210,7 @@ cleanup: zfree(ips); for (i = 0; i < node_len; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + CLUSTER_MANAGER_NODE_ARRAY_FREE(node_array); } zfree(ip_nodes); return success; From c26fc9a47bbced8f51f0823d7adaccf49e744911 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 10:44:11 +0100 Subject: [PATCH 45/66] Cluster Manager: reshard command, fixed slots parsing bug and other minor bugs. --- src/redis-cli.c | 655 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 593 insertions(+), 62 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 66fc4d183..fcf48a473 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -69,6 +69,13 @@ #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" #define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 +#define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 + +#define CLUSTER_MANAGER_INVALID_HOST_ARG \ + "Invalid arguments: you need to pass either a valid " \ + "address (ie. 120.0.0.1:7000) or space separated IP " \ + "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) #define CLUSTER_MANAGER_NODE_CONNECT(n) \ @@ -103,9 +110,14 @@ #define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 #define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 #define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_OPT_COLD 1 << 1 +#define CLUSTER_MANAGER_OPT_UPDATE 1 << 2 +#define CLUSTER_MANAGER_OPT_QUIET 1 << 6 +#define CLUSTER_MANAGER_OPT_VERBOSE 1 << 7 #define CLUSTER_MANAGER_LOG_LVL_INFO 1 #define CLUSTER_MANAGER_LOG_LVL_WARN 2 @@ -143,6 +155,11 @@ typedef struct clusterManagerCommand { char **argv; int flags; int replicas; + char *from; + char *to; + int slots; + int timeout; + int pipeline; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1261,6 +1278,19 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { + config.cluster_manager_command.from = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { + config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { + config.cluster_manager_command.slots = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { + config.cluster_manager_command.timeout = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { + config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-yes")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_YES; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1358,7 +1388,7 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" -" --cluster [args...]\n" +" --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" @@ -1729,6 +1759,12 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; +/* Used for reshard table. */ +typedef struct clusterManagerReshardTableItem { + clusterManagerNode *source; + int slot; +} clusterManagerReshardTableItem; + static dictType clusterManagerDictType = { dictSdsHash, /* hash function */ NULL, /* key dup */ @@ -1754,7 +1790,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_count, clusterManagerNode ***offending, int *offending_len); static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); -static sds clusterManagerNodeInfo(clusterManagerNode *node); +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); @@ -1776,6 +1812,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1789,9 +1826,11 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", - "cluster-replicas"}, - {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"reshard", clusterManagerCommandReshard, -1, "host:port", + "from ,to ,slots ,yes,timeout ,pipeline "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1829,6 +1868,38 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +/* Get host ip and port from command arguments. If only one argument has + * been provided it must be in the form of 'ip:port', elsewhere + * the first argument must be the ip and the second one the port. + * If host and port can be detected, it returns 1 and it stores host and + * port into variables referenced by'ip_ptr' and 'port_ptr' pointers, + * elsewhere it returns 0. */ +static int getClusterHostFromCmdArgs(int argc, char **argv, + char **ip_ptr, int *port_ptr) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else return 0; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) return 0; + else { + *ip_ptr = ip; + *port_ptr = port; + } + return 1; +} + static void freeClusterManagerNode(clusterManagerNode *node) { if (node->context != NULL) redisFree(node->context); if (node->friends != NULL) { @@ -2188,8 +2259,12 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } -static sds clusterManagerNodeInfo(clusterManagerNode *node) { +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); + sds spaces = sdsempty(); + int i; + for (i = 0; i < indent; i++) spaces = sdscat(spaces, " "); + if (indent) info = sdscat(info, spaces); int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); char *role = (is_master ? "M" : "S"); sds slots = NULL; @@ -2198,17 +2273,18 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { else { slots = clusterManagerNodeSlotsString(node); info = sdscatfmt(info, "%s: %S %s:%u\n" - " slots:%S (%u slots) " + "%s slots:%S (%u slots) " "", //TODO: flags string - role, node->name, node->ip, node->port, + role, node->name, node->ip, node->port, spaces, slots, node->slots_count); sdsfree(slots); } if (node->replicate != NULL) - info = sdscatfmt(info, "\n replicates %S", node->replicate); + info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); else if (node->replicas_count) - info = sdscatfmt(info, "\n %U additional replica(s)", - node->replicas_count); + info = sdscatfmt(info, "\n%s %U additional replica(s)", + spaces, node->replicas_count); + sdsfree(spaces); return info; } @@ -2218,7 +2294,7 @@ static void clusterManagerShowNodes(void) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; - sds info = clusterManagerNodeInfo(node); + sds info = clusterManagerNodeInfo(node, 0); printf("%s\n", info); sdsfree(info); } @@ -2306,7 +2382,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { - success = 1; + success = 0; goto cleanup; } reply = (redisReply*) _reply; @@ -2326,6 +2402,193 @@ cleanup: return success; } +/* Set slot status to "importing" or "migrating" */ +static int clusterManagerSetSlot(clusterManagerNode *node1, + clusterManagerNode *node2, + int slot, const char *mode, char **err) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " + "SETSLOT %d %s %s", + slot, mode, + (char *) node2->name); + if (err != NULL) *err = NULL; + if (!reply) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + return 0; + } + return 1; +} + +static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int timeout, + int pipeline, int verbose, + char **err) +{ + int success = 1; + while (1) { + redisReply *reply = NULL, *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " + "GETKEYSINSLOT %d %d", slot, + pipeline); + success = (reply != NULL); + if (!success) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + assert(reply->type == REDIS_REPLY_ARRAY); + size_t count = reply->elements; + if (count == 0) { + freeReplyObject(reply); + break; + } + char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + /* Calling MIGRATE command. */ + size_t argc = count + 8; + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + argv[6] = "REPLACE"; + argv_len[6] = 7; + argv[7] = "KEYS"; + argv_len[7] = 4; + for (size_t i = 0; i < count; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + 8; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (verbose) dots[i] = '.'; + } + if (verbose) dots[count] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); + if (!success) goto next; + migrate_reply = (redisReply *) _reply; + if (migrate_reply->type == REDIS_REPLY_ERROR) { + // TODO: Implement fix. + success = 0; + if (err != NULL) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + if (verbose) { + printf("%s", dots); + fflush(stdout); + } +next: + if (reply != NULL) freeReplyObject(reply); + if (migrate_reply != NULL) freeReplyObject(migrate_reply); + zfree(argv); + zfree(argv_len); + if (!success) break; + } + return success; +} + +/* Move slots between source and target nodes using MIGRATE. + * + * Options: + * CLUSTER_MANAGER_OPT_VERBOSE -- Print a dot for every moved key. + * CLUSTER_MANAGER_OPT_COLD -- Move keys without opening slots / + * reconfiguring the nodes. + * CLUSTER_MANAGER_OPT_UPDATE -- Update node->slots for source/target nodes. + * CLUSTER_MANAGER_OPT_QUIET -- Don't print info messages. +*/ +static int clusterManagerMoveSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int opts, char**err) +{ + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) { + printf("Moving slot %d from %s:%d to %s:%d: ", slot, source->ip, + source->port, target->ip, target->port); + fflush(stdout); + } + if (err != NULL) *err = NULL; + int pipeline = config.cluster_manager_command.pipeline, + timeout = config.cluster_manager_command.timeout, + print_dots = (opts & CLUSTER_MANAGER_OPT_VERBOSE), + option_cold = (opts & CLUSTER_MANAGER_OPT_COLD), + success = 1; + if (!option_cold) { + success = clusterManagerSetSlot(target, source, slot, + "importing", err); + if (!success) return 0; + success = clusterManagerSetSlot(source, target, slot, + "migrating", err); + if (!success) return 0; + } + success = clusterManagerMigrateKeysInSlot(source, target, slot, timeout, + pipeline, print_dots, err); + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) printf("\n"); + if (!success) return 0; + /* Set the new node as the owner of the slot in all the known nodes. */ + if (!option_cold) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER " + "SETSLOT %d %s %s", + slot, "node", + target->name); + success = (r != NULL); + if (!success) return 0; + if (r->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + } + } + freeReplyObject(r); + if (!success) return 0; + } + } + /* Update the node logical config */ + if (opts & CLUSTER_MANAGER_OPT_UPDATE) { + source->slots[slot] = 0; + target->slots[slot] = 1; + } + return 1; +} + /* Flush the dirty node configuration by calling replicate for slaves or * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { @@ -2425,20 +2688,24 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; remaining -= (p - line); char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds dst = sdsnew(p); node->migrating_count += 2; @@ -2451,6 +2718,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } else if ((p = strstr(slotsdef, "-<-"))) {//Importing *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds src = sdsnew(p); node->importing_count += 2; @@ -2605,8 +2874,9 @@ invalid_friend: if (n->replicate != NULL) { clusterManagerNode *master = clusterManagerNodeByName(n->replicate); if (master == NULL) { - printf("*** WARNING: %s:%d claims to be slave of unknown " - "node ID %s.\n", n->ip, n->port, n->replicate); + clusterManagerLogWarn("*** WARNING: %s:%d claims to be " + "slave of unknown node ID %s.\n", + n->ip, n->port, n->replicate); } else master->replicas_count++; } } @@ -2619,6 +2889,12 @@ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { return strcmp(*i1, *i2); } +int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node2->slots_count - node1->slots_count; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -2651,16 +2927,18 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { if (remaining == 0) continue; char **slots = NULL; int c = 0; - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; int size = (p - line); remaining -= size; tot_size += size; char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] != '[') { c++; slots = zrealloc(slots, (c * sizeof(char *))); @@ -2792,7 +3070,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->migrating_count; i += 2) { sds slot = n->migrating[i]; - dictAdd(open_slots, slot, n->migrating[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->migrating[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2810,7 +3088,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->importing_count; i += 2) { sds slot = n->importing[i]; - dictAdd(open_slots, slot, n->importing[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->importing[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2848,6 +3126,76 @@ static void clusterManagerCheckCluster(int quiet) { } } +static clusterManagerNode *clusterNodeForResharding(char *id, + clusterManagerNode *target, + int *raise_err) +{ + clusterManagerNode *node = NULL; + const char *invalid_node_msg = "*** The specified node is not known or " + "not a master, please retry.\n"; + node = clusterManagerNodeByName(id); + *raise_err = 0; + if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { + clusterManagerLogErr(invalid_node_msg); + *raise_err = 1; + return NULL; + } else if (node != NULL && target != NULL) { + if (!strcmp(node->name, target->name)) { + clusterManagerLogErr( "*** It is not possible to use " + "the target node as " + "source node.\n"); + return NULL; + } + } + return node; +} + +static list *clusterManagerComputeReshardTable(list *sources, int numslots) { + list *moved = listCreate(); + int src_count = listLength(sources), i = 0, tot_slots = 0, j; + clusterManagerNode **sorted = zmalloc(src_count * sizeof(**sorted)); + listIter li; + listNode *ln; + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + tot_slots += node->slots_count; + sorted[i++] = node; + } + qsort(sorted, src_count, sizeof(clusterManagerNode *), + clusterManagerSlotCountCompareDesc); + for (i = 0; i < src_count; i++) { + clusterManagerNode *node = sorted[i]; + float n = ((float) numslots / tot_slots * node->slots_count); + if (i == 0) n = ceil(n); + else n = floor(n); + int max = (int) n, count = 0; + for (j = 0; j < CLUSTER_MANAGER_SLOTS; j++) { + int slot = node->slots[j]; + if (!slot) continue; + if (count >= max || (int)listLength(moved) >= numslots) break; + clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + item->source = node; + item->slot = j; + listAddNodeTail(moved, item); + count++; + } + } + zfree(sorted); + return moved; +} + +static void clusterManagerShowReshardTable(list *table) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + clusterManagerNode *n = item->source; + printf(" Moving slot %d from %s\n", item->slot, (char *) n->name); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3219,59 +3567,218 @@ cleanup: static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } static int clusterManagerCommandCheck(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); clusterManagerCheckCluster(0); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandReshard(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerCheckCluster(0); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + fflush(stdout); + fprintf(stderr, + "*** Please fix your cluster problems before resharding\n"); + return 0; + } + int slots = config.cluster_manager_command.slots; + if (!slots) { + while (slots <= 0 || slots > CLUSTER_MANAGER_SLOTS) { + printf("How many slots do you want to move (from 1 to %d)? ", + CLUSTER_MANAGER_SLOTS); + fflush(stdout); + char buf[6]; + int nread = read(fileno(stdin),buf,6); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + slots = atoi(buf); + } + } + char buf[255]; + char *to = config.cluster_manager_command.to, + *from = config.cluster_manager_command.from; + while (to == NULL) { + printf("What is the receiving node ID? "); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (strlen(buf) > 0) to = buf; + } + int raise_err = 0; + clusterManagerNode *target = clusterNodeForResharding(to, NULL, &raise_err); + if (target == NULL) return 0; + list *sources = listCreate(); + list *table = NULL; + int all = 0, result = 1; + if (from == NULL) { + printf("Please enter all the source node IDs.\n"); + printf(" Type 'all' to use all the nodes as source nodes for " + "the hash slots.\n"); + printf(" Type 'done' once you entered all the source nodes IDs.\n"); + while (1) { + printf("Source node #%lu: ", listLength(sources) + 1); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (!strcmp(buf, "done")) break; + else if (!strcmp(buf, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(buf, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + } else { + char *p; + while((p = strchr(from, ',')) != NULL) { + *p = '\0'; + if (!strcmp(from, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + from = p + 1; + } + /* Check if there's still another source to process. */ + if (!all && strlen(from) > 0) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + listIter li; + listNode *ln; + if (all) { + listEmpty(sources); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!sdscmp(n->name, target->name)) continue; + listAddNodeTail(sources, n); + } + } + if (listLength(sources) == 0) { + fprintf(stderr, "*** No source nodes given, operation aborted.\n"); + result = 0; + goto cleanup; + } + printf("\nReady to move %d slots.\n", slots); + printf(" Source nodes:\n"); + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *src = ln->value; + sds info = clusterManagerNodeInfo(src, 4); + printf("%s\n", info); + sdsfree(info); + } + printf(" Destination node:\n"); + sds info = clusterManagerNodeInfo(target, 4); + printf("%s\n", info); + sdsfree(info); + table = clusterManagerComputeReshardTable(sources, slots); + printf(" Resharding plan:\n"); + clusterManagerShowReshardTable(table); + if (!(config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_YES)) + { + printf("Do you want to proceed with the proposed " + "reshard plan (yes/no)? "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread <= 0 || strcmp("yes", buf) != 0) { + result = 0; + goto cleanup; + } + } + int opts = CLUSTER_MANAGER_OPT_VERBOSE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + char *err = NULL; + result = clusterManagerMoveSlot(item->source, target, item->slot, + opts, &err); + if (!result) { + if (err != NULL) { + clusterManagerLogErr("\n%s\n", err); + zfree(err); + } + goto cleanup; + } + } +cleanup: + listRelease(sources); + if (table) { + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } @@ -3332,13 +3839,32 @@ static int clusterManagerCommandHelp(int argc, char **argv) { sizeof(clusterManagerCommandDef); int i = 0, j; fprintf(stderr, "Cluster Manager Commands:\n"); + int padding = 15; for (; i < commands_count; i++) { clusterManagerCommandDef *def = &(clusterManagerCommands[i]); - int namelen = strlen(def->name), padlen = 15 - namelen; + int namelen = strlen(def->name), padlen = padding - namelen; fprintf(stderr, " %s", def->name); for (j = 0; j < padlen; j++) fprintf(stderr, " "); fprintf(stderr, "%s\n", (def->args ? def->args : "")); - //TODO: if (def->options) + if (def->options != NULL) { + int optslen = strlen(def->options); + char *p = def->options, *eos = p + optslen; + char *comma = NULL; + while ((comma = strchr(p, ',')) != NULL) { + int deflen = (int)(comma - p); + char buf[255]; + memcpy(buf, p, deflen); + buf[deflen] = '\0'; + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", buf); + p = comma + 1; + if (p >= eos) break; + } + if (p < eos) { + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", p); + } + } } return 0; } @@ -4641,6 +5167,11 @@ int main(int argc, char **argv) { config.cluster_manager_command.argv = NULL; config.cluster_manager_command.flags = 0; config.cluster_manager_command.replicas = 0; + config.cluster_manager_command.from = NULL; + config.cluster_manager_command.to = NULL; + config.cluster_manager_command.slots = 0; + config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; + config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; pref.hints = 1; spectrum_palette = spectrum_palette_color; From d031ac4a4275bca698f25de9a024f1c8f4049ef4 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 11:49:10 +0100 Subject: [PATCH 46/66] Fixed memory write error in clusterManagerGetConfigSignature --- src/redis-cli.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index fcf48a473..baaa615c5 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2295,7 +2295,7 @@ static void clusterManagerShowNodes(void) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; sds info = clusterManagerNodeInfo(node, 0); - printf("%s\n", info); + printf("%s\n", (char *) info); sdsfree(info); } } @@ -2916,8 +2916,8 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { line = p + 1; if (i == 0) { nodename = token; - tot_size = p - token; - name_len = tot_size; + tot_size = (p - token); + name_len = tot_size++; // Make room for ':' in tot_size } else if (i == 8) break; i++; } @@ -2951,6 +2951,7 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { node_count++; node_configs = zrealloc(node_configs, (node_count * sizeof(char *))); + /* Make room for '|' separators. */ tot_size += (sizeof(char) * (c - 1)); char *cfg = zmalloc((sizeof(char) * tot_size) + 1); memcpy(cfg, nodename, name_len); @@ -3760,7 +3761,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { opts, &err); if (!result) { if (err != NULL) { - clusterManagerLogErr("\n%s\n", err); + //clusterManagerLogErr("\n%s\n", err); zfree(err); } goto cleanup; From dcc7d427424602a4a21374a4500b47ecbc962f5c Mon Sep 17 00:00:00 2001 From: Artix Date: Wed, 28 Feb 2018 15:21:08 +0100 Subject: [PATCH 47/66] Cluster Manager: fixed some memory error --- src/redis-cli.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index baaa615c5..317b1125e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2412,14 +2412,19 @@ static int clusterManagerSetSlot(clusterManagerNode *node1, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; + int success = 1; if (reply->type == REDIS_REPLY_ERROR) { + success = 0; if (err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node1, err); } - return 0; + goto cleanup; } - return 1; +cleanup: + freeReplyObject(reply); + return success; } static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, @@ -3175,7 +3180,7 @@ static list *clusterManagerComputeReshardTable(list *sources, int numslots) { int slot = node->slots[j]; if (!slot) continue; if (count >= max || (int)listLength(moved) >= numslots) break; - clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + clusterManagerReshardTableItem *item = zmalloc(sizeof(*item)); item->source = node; item->slot = j; listAddNodeTail(moved, item); From c45e915ff0322179f15c6423df3db4613f8c901f Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 2 Mar 2018 17:06:50 +0100 Subject: [PATCH 48/66] ClusterManager: fixed --cluster-from 'all' parsing --- src/redis-cli.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 317b1125e..8fa2d7254 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -3137,12 +3137,12 @@ static clusterManagerNode *clusterNodeForResharding(char *id, int *raise_err) { clusterManagerNode *node = NULL; - const char *invalid_node_msg = "*** The specified node is not known or " - "not a master, please retry.\n"; + const char *invalid_node_msg = "*** The specified node (%s) is not known " + "or not a master, please retry.\n"; node = clusterManagerNodeByName(id); *raise_err = 0; if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { - clusterManagerLogErr(invalid_node_msg); + clusterManagerLogErr(invalid_node_msg, id); *raise_err = 1; return NULL; } else if (node != NULL && target != NULL) { @@ -3700,12 +3700,15 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } /* Check if there's still another source to process. */ if (!all && strlen(from) > 0) { - clusterManagerNode *src = - clusterNodeForResharding(from, target, &raise_err); - if (src != NULL) listAddNodeTail(sources, src); - else if (raise_err) { - result = 0; - goto cleanup; + if (!strcmp(from, "all")) all = 1; + if (!all) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } } } } From 05c0101164a5970d06556746a6c66cdf8589b67e Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 6 Mar 2018 13:06:04 +0200 Subject: [PATCH 49/66] clusterManagerAddSlots: changed the way ADDSLOTS command is built --- src/redis-cli.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 8fa2d7254..4f87f9067 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2354,32 +2354,28 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) redisReply *reply = NULL; void *_reply = NULL; int is_err = 0, success = 1; - int argc; - sds *argv = NULL; - size_t *argvlen = NULL; + /* First two args are used for the command itself. */ + int argc = node->slots_count + 2; + sds *argv = zmalloc(argc * sizeof(*argv)); + size_t *argvlen = zmalloc(argc * sizeof(*argvlen)); + argv[0] = "CLUSTER"; + argv[1] = "ADDSLOTS"; + argvlen[0] = 7; + argvlen[1] = 8; *err = NULL; - sds cmd = sdsnew("CLUSTER ADDSLOTS "); - int i, added = 0; + int i, argv_idx = 2; for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { - int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (argv_idx >= argc) break; if (node->slots[i]) { - char *fmt = (!last_slot ? "%u " : "%u"); - cmd = sdscatfmt(cmd, fmt, i); - added++; + argv[argv_idx] = sdsfromlonglong((long long) i); + argvlen[argv_idx] = sdslen(argv[argv_idx]); + argv_idx++; } } - if (!added) { + if (!argv_idx) { success = 0; goto cleanup; } - argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) { - success = 0; - goto cleanup; - } - argvlen = zmalloc(argc*sizeof(size_t)); - for (i = 0; i < argc; i++) - argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { success = 0; @@ -2395,9 +2391,11 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } cleanup: - sdsfree(cmd); zfree(argvlen); - if (argv != NULL) sdsfreesplitres(argv,argc); + if (argv != NULL) { + for (i = 2; i < argc; i++) sdsfree(argv[i]); + zfree(argv); + } if (reply != NULL) freeReplyObject(reply); return success; } From 8d1f8e343e9b24ffff9f88fb47a8d6c7ae48584e Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 23 Mar 2018 16:46:43 +0100 Subject: [PATCH 50/66] Cluster Manager: rebalance command --- src/redis-cli.c | 297 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 286 insertions(+), 11 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4f87f9067..49ba41257 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -71,6 +71,7 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 +#define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ "Invalid arguments: you need to pass either a valid " \ @@ -108,10 +109,13 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 -#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 -#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 +#define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 +#define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 +#define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -157,9 +161,12 @@ typedef struct clusterManagerCommand { int replicas; char *from; char *to; + char **weight; + int weight_argc; int slots; int timeout; int pipeline; + float threshold; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -206,6 +213,7 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + int verbose; clusterManagerCommand cluster_manager_command; } config; @@ -1266,6 +1274,8 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--verbose")) { + config.verbose = 1; } else if (!strcmp(argv[i],"--cluster") && !lastarg) { if (CLUSTER_MANAGER_MODE()) usage(); char *cmd = argv[++i]; @@ -1282,15 +1292,35 @@ static int parseOptions(int argc, char **argv) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-weight") && !lastarg) { + int widx = i + 1; + char **weight = argv + widx; + int wargc = 0; + for (; widx < argc; widx++) { + if (strstr(argv[widx], "--") == argv[widx]) break; + wargc++; + } + if (wargc > 0) { + config.cluster_manager_command.weight = weight; + config.cluster_manager_command.weight_argc = wargc; + } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { config.cluster_manager_command.timeout = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-threshold") && !lastarg) { + config.cluster_manager_command.threshold = atof(argv[++i]); } else if (!strcmp(argv[i],"--cluster-yes")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_YES; + } else if (!strcmp(argv[i],"--cluster-simulate")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1390,6 +1420,7 @@ static void usage(void) { " are not rolled back from the server memory.\n" " --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" +" --verbose Verbose mode.\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" @@ -1749,6 +1780,8 @@ typedef struct clusterManagerNode { sds *importing; int migrating_count; int importing_count; + float weight; /* Weight used by rebalance */ + int balance; /* Used by rebalance */ } clusterManagerNode; /* Data structure used to represent a sequence of nodes. */ @@ -1780,6 +1813,7 @@ typedef int clusterManagerCommandProc(int argc, char **argv); static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static clusterManagerNode *clusterManagerNodeByName(const char *name); +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1813,6 +1847,7 @@ static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); +static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1831,6 +1866,9 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, + {"rebalance", clusterManagerCommandRebalance, -1, "host:port", + "weight ,use-empty-masters," + "timeout ,simulate,pipeline ,threshold "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1970,10 +2008,13 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; + node->weight = 1.0f; + node->balance = 0; clusterManagerNodeResetSlots(node); return node; } +/* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -1994,6 +2035,32 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } +/* Like get_node_by_name but the specified name can be just the first + * part of the node ID as long as the prefix in unique across the + * cluster. + */ +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char*name) +{ + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && + strstr(n->name, lcname) == n->name) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static void clusterManagerNodeResetSlots(clusterManagerNode *node) { memset(node->slots, 0, sizeof(node->slots)); node->slots_count = 0; @@ -2898,6 +2965,12 @@ int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { return node2->slots_count - node1->slots_count; } +int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node1->balance - node2->balance; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -3200,6 +3273,19 @@ static void clusterManagerShowReshardTable(list *table) { } } +static void clusterManagerReleaseReshardTable(list *table) { + if (table != NULL) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3775,14 +3861,199 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } cleanup: listRelease(sources); - if (table) { - listRewind(table, &li); - while ((ln = listNext(&li)) != NULL) { - clusterManagerReshardTableItem *item = ln->value; - zfree(item); - } - listRelease(table); + clusterManagerReleaseReshardTable(table); + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandRebalance(int argc, char **argv) { + int port = 0; + char *ip = NULL; + clusterManagerNode **weightedNodes = NULL; + list *involved = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int result = 1, i; + if (config.cluster_manager_command.weight != NULL) { + for (i = 0; i < config.cluster_manager_command.weight_argc; i++) { + char *name = config.cluster_manager_command.weight[i]; + char *p = strchr(name, '='); + if (p == NULL) { + result = 0; + goto cleanup; + } + *p = '\0'; + float w = atof(++p); + clusterManagerNode *n = clusterManagerNodeByAbbreviatedName(name); + if (n == NULL) { + clusterManagerLogErr("*** No such master node %s\n", name); + result = 0; + goto cleanup; + } + n->weight = w; + } } + float total_weight = 0; + int nodes_involved = 0; + int use_empty = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; + + involved = listCreate(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + /* Compute the total cluster weight. */ + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!use_empty && n->slots_count == 0) { + n->weight = 0; + continue; + } + total_weight += n->weight; + nodes_involved++; + listAddNodeTail(involved, n); + } + weightedNodes = zmalloc(nodes_involved * + sizeof(clusterManagerNode *)); + if (weightedNodes == NULL) goto cleanup; + /* Check cluster, only proceed if it looks sane. */ + clusterManagerCheckCluster(1); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + clusterManagerLogErr("*** Please fix your cluster problems " + "before rebalancing" ); + result = 0; + goto cleanup; + } + /* Calculate the slots balance for each node. It's the number of + * slots the node should lose (if positive) or gain (if negative) + * in order to be balanced. */ + int threshold_reached = 0, total_balance = 0; + float threshold = config.cluster_manager_command.threshold; + i = 0; + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + weightedNodes[i++] = n; + int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + (int) n->weight); + n->balance = n->slots_count - expected; + total_balance += n->balance; + /* Compute the percentage of difference between the + * expected number of slots and the real one, to see + * if it's over the threshold specified by the user. */ + int over_threshold = 0; + if (config.cluster_manager_command.threshold > 0) { + if (n->slots_count > 0) { + float err_perc = fabs((100-(100.0*expected/n->slots_count))); + if (err_perc > threshold) over_threshold = 1; + } else if (expected > 1) { + over_threshold = 1; + } + } + if (over_threshold) threshold_reached = 1; + } + if (!threshold_reached) { + clusterManagerLogErr("*** No rebalancing needed! " + "All nodes are within the %.2f%% threshold.\n", + config.cluster_manager_command.threshold); + result = 0; + goto cleanup; + } + /* Because of rounding, it is possible that the balance of all nodes + * summed does not give 0. Make sure that nodes that have to provide + * slots are always matched by nodes receiving slots. */ + while (total_balance > 0) { + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->balance < 0 && total_balance > 0) { + n->balance--; + total_balance--; + } + } + } + /* Sort nodes by their slots balance. */ + qsort(weightedNodes, nodes_involved, sizeof(clusterManagerNode *), + clusterManagerCompareNodeBalance); + clusterManagerLogInfo(">>> Rebalancing across %d nodes. " + "Total weight = %.2f\n", + nodes_involved, total_weight); + if (config.verbose) { + for (i = 0; i < nodes_involved; i++) { + clusterManagerNode *n = weightedNodes[i]; + printf("%s:%d balance is %d slots\n", n->ip, n->port, n->balance); + } + } + /* Now we have at the start of the 'sn' array nodes that should get + * slots, at the end nodes that must give slots. + * We take two indexes, one at the start, and one at the end, + * incrementing or decrementing the indexes accordingly til we + * find nodes that need to get/provide slots. */ + int dst_idx = 0; + int src_idx = nodes_involved - 1; + int simulate = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + while (dst_idx < src_idx) { + clusterManagerNode *dst = weightedNodes[dst_idx]; + clusterManagerNode *src = weightedNodes[src_idx]; + int db = abs(dst->balance); + int sb = abs(src->balance); + int numslots = (db < sb ? db : sb); + if (numslots > 0) { + printf("Moving %d slots from %s:%d to %s:%d\n", numslots, + src->ip, + src->port, + dst->ip, + dst->port); + /* Actaully move the slots. */ + list *lsrc = listCreate(), *table = NULL; + listAddNodeTail(lsrc, src); + table = clusterManagerComputeReshardTable(lsrc, numslots); + listRelease(lsrc); + int table_len = (int) listLength(table); + if (!table || table_len != numslots) { + clusterManagerLogErr("*** Assertio failed: Reshard table " + "!= number of slots"); + result = 0; + goto end_move; + } + if (simulate) { + for (i = 0; i < table_len; i++) printf("#"); + } else { + int opts = CLUSTER_MANAGER_OPT_QUIET | + CLUSTER_MANAGER_OPT_UPDATE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + result = clusterManagerMoveSlot(item->source, + dst, + item->slot, + opts, NULL); + if (!result) goto end_move; + printf("#"); + fflush(stdout); + } + + } + printf("\n"); +end_move: + clusterManagerReleaseReshardTable(table); + if (!result) goto cleanup; + } + /* Update nodes balance. */ + dst->balance += numslots; + src->balance -= numslots; + if (dst->balance == 0) dst_idx++; + if (src->balance == 0) src_idx --; + } +cleanup: + if (involved != NULL) listRelease(involved); + if (weightedNodes != NULL) zfree(weightedNodes); return result; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5169,6 +5440,7 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; + config.verbose = 0; config.cluster_manager_command.name = NULL; config.cluster_manager_command.argc = 0; config.cluster_manager_command.argv = NULL; @@ -5176,9 +5448,12 @@ int main(int argc, char **argv) { config.cluster_manager_command.replicas = 0; config.cluster_manager_command.from = NULL; config.cluster_manager_command.to = NULL; + config.cluster_manager_command.weight = NULL; config.cluster_manager_command.slots = 0; config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; + config.cluster_manager_command.threshold = + CLUSTER_MANAGER_REBALANCE_THRESHOLD; pref.hints = 1; spectrum_palette = spectrum_palette_color; From 213f0cff93902bb069259fd9dd04e929ee10c554 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 6 Apr 2018 18:02:40 +0200 Subject: [PATCH 51/66] Cluster Manager: fix command. --- src/redis-cli.c | 715 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 642 insertions(+), 73 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 49ba41257..8d5732c20 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -151,6 +151,7 @@ static uint64_t dictSdsHash(const void *key); static int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2); static void dictSdsDestructor(void *privdata, void *val); +static void dictListDestructor(void *privdata, void *val); /* Cluster Manager Command Info */ typedef struct clusterManagerCommand { @@ -406,6 +407,12 @@ static void dictSdsDestructor(void *privdata, void *val) sdsfree(val); } +void dictListDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + listRelease((list*)val); +} + /* _serverAssert is needed by dict */ void _serverAssert(const char *estr, const char *file, int line) { fprintf(stderr, "=== ASSERTION FAILED ==="); @@ -1446,6 +1453,15 @@ static void usage(void) { exit(1); } +static int confirmWithYes(char *msg) { + printf("%s (type 'yes' to accept): ", msg); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + return (nread != 0 && !strcmp("yes", buf)); +} + /* Turn the plain C strings into Sds strings */ static char **convertToSds(int count, char** args) { int j; @@ -1751,7 +1767,7 @@ static int evalMode(int argc, char **argv) { } /*------------------------------------------------------------------------------ - * Cluster Manager mode + * Cluster Manager *--------------------------------------------------------------------------- */ /* The Cluster Manager global structure */ @@ -1760,6 +1776,9 @@ static struct clusterManager { list *errors; } cluster_manager; +/* Used by clusterManagerFixSlotsCoverage */ +dict *clusterManagerUncoveredSlots = NULL; + typedef struct clusterManagerNode { redisContext *context; sds name; @@ -1776,10 +1795,12 @@ typedef struct clusterManagerNode { int slots_count; int replicas_count; list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; + sds *migrating; /* An array of sds where even strings are slots and odd + * strings are the destination node IDs. */ + sds *importing; /* An array of sds where even strings are slots and odd + * strings are the source node IDs. */ + int migrating_count; /* Length of the migrating array (migrating slots*2) */ + int importing_count; /* Length of the importing array (importing slots*2) */ float weight; /* Weight used by rebalance */ int balance; /* Used by rebalance */ } clusterManagerNode; @@ -1829,7 +1850,7 @@ static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); +static int clusterManagerCheckCluster(int quiet); static void clusterManagerLog(int level, const char* fmt, ...); static int clusterManagerIsConfigConsistent(void); static void clusterManagerOnError(sds err); @@ -1846,6 +1867,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); @@ -1863,6 +1885,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, @@ -1988,6 +2011,8 @@ static void freeClusterManager(void) { listRelease(cluster_manager.errors); cluster_manager.errors = NULL; } + if (clusterManagerUncoveredSlots != NULL) + dictRelease(clusterManagerUncoveredSlots); } static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { @@ -2013,6 +2038,38 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } +/* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the + * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * of reply error (it's up to the caller function to free it), elsewhere + * the error is directly printed. */ +static int clusterManagerCheckRedisReply(clusterManagerNode *n, + redisReply *r, char **err) +{ + int is_err = 0; + if (!r || (is_err = (r->type == REDIS_REPLY_ERROR))) { + if (is_err) { + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, r->str); + } + return 0; + } + return 1; +} + +static void clusterManagerRemoveNodeFromList(list *nodelist, + clusterManagerNode *node) { + listIter li; + listNode *ln; + listRewind(nodelist, &li); + while ((ln = listNext(&li)) != NULL) { + if (node == ln->value) { + listDelNode(nodelist, ln); + break; + } + } +} /* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { @@ -2470,10 +2527,10 @@ cleanup: /* Set slot status to "importing" or "migrating" */ static int clusterManagerSetSlot(clusterManagerNode *node1, clusterManagerNode *node2, - int slot, const char *mode, char **err) { + int slot, const char *status, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " "SETSLOT %d %s %s", - slot, mode, + slot, status, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; @@ -2492,6 +2549,70 @@ cleanup: return success; } +/* Migrate keys taken from reply->elements. It returns the reply from the + * MIGRATE command, or NULL if something goes wrong. If the argument 'dots' + * is not NULL, a dot will be printed for every migrated key. */ +static redisReply *clusterManagerMigrateKeysInReply(clusterManagerNode *source, + clusterManagerNode *target, + redisReply *reply, + int replace, int timeout, + char *dots) +{ + redisReply *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + int c = (replace ? 8 : 7); + size_t argc = c + reply->elements; + size_t i, offset = 6; // Keys Offset + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + if (replace) { + argv[offset] = "REPLACE"; + argv_len[offset] = 7; + offset++; + } + argv[offset] = "KEYS"; + argv_len[offset] = 4; + offset++; + for (i = 0; i < reply->elements; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + offset; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (dots) dots[i] = '.'; + } + if (dots) dots[reply->elements] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + int success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (i = 0; i < reply->elements; i++) sdsfree(argv[i + offset]); + if (!success) goto cleanup; + migrate_reply = (redisReply *) _reply; +cleanup: + zfree(argv); + zfree(argv_len); + return migrate_reply; +} + +/* Migrate all keys in the given slot from source to target.*/ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, clusterManagerNode *target, int slot, int timeout, @@ -2499,10 +2620,11 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, char **err) { int success = 1; + int do_fix = (config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX); while (1) { + char *dots = NULL; redisReply *reply = NULL, *migrate_reply = NULL; - char **argv = NULL; - size_t *argv_len = NULL; reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " "GETKEYSINSLOT %d %d", slot, pipeline); @@ -2523,57 +2645,37 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, freeReplyObject(reply); break; } - char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + if (verbose) dots = zmalloc((count+1) * sizeof(char)); /* Calling MIGRATE command. */ - size_t argc = count + 8; - argv = zcalloc(argc * sizeof(char *)); - argv_len = zcalloc(argc * sizeof(size_t)); - char portstr[255]; - char timeoutstr[255]; - snprintf(portstr, 10, "%d", target->port); - snprintf(timeoutstr, 10, "%d", timeout); - argv[0] = "MIGRATE"; - argv_len[0] = 7; - argv[1] = target->ip; - argv_len[1] = strlen(target->ip); - argv[2] = portstr; - argv_len[2] = strlen(portstr); - argv[3] = ""; - argv_len[3] = 0; - argv[4] = "0"; - argv_len[4] = 1; - argv[5] = timeoutstr; - argv_len[5] = strlen(timeoutstr); - argv[6] = "REPLACE"; - argv_len[6] = 7; - argv[7] = "KEYS"; - argv_len[7] = 4; - for (size_t i = 0; i < count; i++) { - redisReply *entry = reply->element[i]; - size_t idx = i + 8; - assert(entry->type == REDIS_REPLY_STRING); - argv[idx] = (char *) sdsnew(entry->str); - argv_len[idx] = entry->len; - if (verbose) dots[i] = '.'; - } - if (verbose) dots[count] = '\0'; - void *_reply = NULL; - redisAppendCommandArgv(source->context,argc, - (const char**)argv,argv_len); - success = (redisGetReply(source->context, &_reply) == REDIS_OK); - for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); - if (!success) goto next; - migrate_reply = (redisReply *) _reply; + migrate_reply = clusterManagerMigrateKeysInReply(source, target, + reply, 0, timeout, + dots); + if (migrate_reply == NULL) goto next; if (migrate_reply->type == REDIS_REPLY_ERROR) { - // TODO: Implement fix. - success = 0; - if (err != NULL) { - *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); - strcpy(*err, migrate_reply->str); - printf("\n"); - CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + if (do_fix && strstr(migrate_reply->str, "BUSYKEY")) { + clusterManagerLogWarn("*** Target key exists. " + "Replacing it for FIX.\n"); + freeReplyObject(migrate_reply); + /* Try to migrate keys adding REPLACE option. */ + migrate_reply = clusterManagerMigrateKeysInReply(source, + target, + reply, + 1, timeout, + NULL); + success = (migrate_reply != NULL && + migrate_reply->type != REDIS_REPLY_ERROR); + } else success = 0; + if (!success) { + if (migrate_reply != NULL) { + if (err) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + } + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; } - goto next; } if (verbose) { printf("%s", dots); @@ -2582,8 +2684,7 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, next: if (reply != NULL) freeReplyObject(reply); if (migrate_reply != NULL) freeReplyObject(migrate_reply); - zfree(argv); - zfree(argv_len); + if (dots) zfree(dots); if (!success) break; } return success; @@ -2729,6 +2830,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, *link_status = NULL; + UNUSED(link_status); int i = 0; while ((p = strchr(line, ' ')) != NULL) { *p = '\0'; @@ -2974,11 +3076,11 @@ int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; + char **node_configs = NULL; redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); if (reply == NULL || reply->type == REDIS_REPLY_ERROR) goto cleanup; char *lines = reply->str, *p, *line; - char **node_configs = NULL; while ((p = strstr(lines, "\n")) != NULL) { i = 0; *p = '\0'; @@ -3057,8 +3159,10 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { } cleanup: if (reply != NULL) freeReplyObject(reply); - for (i = 0; i < node_count; i++) zfree(node_configs[i]); - zfree(node_configs); + if (node_configs != NULL) { + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + } return signature; } @@ -3114,9 +3218,453 @@ static int clusterManagerGetCoveredSlots(char *all_slots) { return totslots; } -static void clusterManagerCheckCluster(int quiet) { +static void clusterManagerPrintSlotsList(list *slots) { + listIter li; + listNode *ln; + listRewind(slots, &li); + sds first = NULL; + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + if (!first) first = slot; + else printf(", "); + printf("%s", slot); + } + printf("\n"); +} + +/* Return the node, among 'nodes' with the greatest number of keys + * in the specified slot. */ +static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, + int slot, + char **err) +{ + clusterManagerNode *node = NULL; + int numkeys = 0; + listIter li; + listNode *ln; + listRewind(nodes, &li); + if (err) *err = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *r = + CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOTi %d", slot); + int success = clusterManagerCheckRedisReply(n, r, err); + if (success) { + if (r->integer > numkeys || node == NULL) { + numkeys = r->integer; + node = n; + } + } + if (r != NULL) freeReplyObject(r); + /* If the reply contains errors */ + if (!success) { + if (err != NULL && *err != NULL) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + node = NULL; + break; + } + } + return node; +} + +static int clusterManagerFixSlotsCoverage(char *all_slots) { + int i, fixed = 0; + list *none = NULL, *single = NULL, *multi = NULL; + clusterManagerLogInfo(">>> Fixing slots coverage...\n"); + printf("List of not covered slots: \n"); + int uncovered_count = 0; + sds log = sdsempty(); + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int covered = all_slots[i]; + if (!covered) { + sds key = sdsfromlonglong((long long) i); + if (uncovered_count++ > 0) printf(","); + printf("%s", (char *) key); + list *slot_nodes = listCreate(); + sds slot_nodes_str = sdsempty(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", i, 1); + if (!clusterManagerCheckRedisReply(n, reply, NULL)) { + fixed = -1; + if (reply) freeReplyObject(reply); + goto cleanup; + } + assert(reply->type == REDIS_REPLY_ARRAY); + if (reply->elements > 0) { + listAddNodeTail(slot_nodes, n); + if (listLength(slot_nodes) > 1) + slot_nodes_str = sdscat(slot_nodes_str, ", "); + slot_nodes_str = sdscatfmt(slot_nodes_str, + "%s:%u", n->ip, n->port); + } + freeReplyObject(reply); + } + log = sdscatfmt(log, "\nSlot %S has keys in %u nodes: %S", + key, listLength(slot_nodes), slot_nodes_str); + sdsfree(slot_nodes_str); + dictAdd(clusterManagerUncoveredSlots, key, slot_nodes); + } + } + printf("\n%s\n", log); + /* For every slot, take action depending on the actual condition: + * 1) No node has keys for this slot. + * 2) A single node has keys for this slot. + * 3) Multiple nodes have keys for this slot. */ + none = listCreate(); + single = listCreate(); + multi = listCreate(); + dictIterator *iter = dictGetIterator(clusterManagerUncoveredSlots); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + list *nodes = (list *) dictGetVal(entry); + switch (listLength(nodes)){ + case 0: listAddNodeTail(none, slot); break; + case 1: listAddNodeTail(single, slot); break; + default: listAddNodeTail(multi, slot); break; + } + } + dictReleaseIterator(iter); + + /* Handle case "1": keys in no node. */ + if (listLength(none) > 0) { + printf("The following uncovered slots have no keys " + "across the cluster:\n"); + clusterManagerPrintSlotsList(none); + if (confirmWithYes("Fix these slots by covering with a random node?")){ + srand(time(NULL)); + listIter li; + listNode *ln; + listRewind(none, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + long idx = (long) (rand() % listLength(cluster_manager.nodes)); + listNode *node_n = listIndex(cluster_manager.nodes, idx); + assert(node_n != NULL); + clusterManagerNode *n = node_n->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "2": keys only in one node. */ + if (listLength(single) > 0) { + printf("The following uncovered slots have keys in just one node:\n"); + clusterManagerPrintSlotsList(single); + if (confirmWithYes("Fix these slots by covering with those nodes?")){ + listIter li; + listNode *ln; + listRewind(single, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + listNode *fn = listFirst(nodes); + assert(fn != NULL); + clusterManagerNode *n = fn->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "3": keys in multiple nodes. */ + if (listLength(multi) > 0) { + printf("The folowing uncovered slots have keys in multiple nodes:\n"); + clusterManagerPrintSlotsList(multi); + if (confirmWithYes("Fix these slots by moving keys " + "into a single node?")) { + listIter li; + listNode *ln; + listRewind(multi, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + int s = atoi(slot); + clusterManagerNode *target = + clusterManagerGetNodeWithMostKeysInSlot(nodes, s, NULL); + if (target == NULL) { + fixed = -1; + goto cleanup; + } + clusterManagerLogInfo(">>> Covering slot %s moving keys " + "to %s:%d\n", slot, + target->ip, target->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER SETSLOT %s %s", slot, "STABLE"); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + listIter nli; + listNode *nln; + listRewind(nodes, &nli); + while ((nln = listNext(&nli)) != NULL) { + clusterManagerNode *src = nln->value; + if (src == target) continue; + /* Set the source node in 'importing' state + * (even if we will actually migrate keys away) + * in order to avoid receiving redirections + * for MIGRATE. */ + redisReply *r = CLUSTER_MANAGER_COMMAND(src, + "CLUSTER SETSLOT %s %s %s", slot, + "IMPORTING", target->name); + if (!clusterManagerCheckRedisReply(target, r, NULL)) + fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + int opts = CLUSTER_MANAGER_OPT_VERBOSE | + CLUSTER_MANAGER_OPT_COLD; + if (!clusterManagerMoveSlot(src, target, s, opts, NULL)) { + fixed = -1; + goto cleanup; + } + } + fixed++; + } + } + } +cleanup: + sdsfree(log); + if (none) listRelease(none); + if (single) listRelease(single); + if (multi) listRelease(multi); + return fixed; +} + +/* Slot 'slot' was found to be in importing or migrating state in one or + * more nodes. This function fixes this condition by migrating keys where + * it seems more sensible. */ +static int clusterManagerFixOpenSlot(int slot) { + clusterManagerLogInfo(">>> Fixing open slot %d\n", slot); + /* Try to obtain the current slot owner, according to the current + * nodes configuration. */ + int success = 1; + list *owners = listCreate(); + list *migrating = listCreate(); + list *importing = listCreate(); + sds migrating_str = sdsempty(); + sds importing_str = sdsempty(); + clusterManagerNode *owner = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots[slot]) { + if (owner == NULL) owner = n; + listAddNodeTail(owners, n); + } + } + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->migrating) { + for (int i = 0; i < n->migrating_count; i += 2) { + sds migrating_slot = n->migrating[i]; + if (atoi(migrating_slot) == slot) { + char *sep = (listLength(migrating) == 0 ? "" : ","); + migrating_str = sdscatfmt(migrating_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(migrating, n); + break; + } + } + } + if (n->importing) { + for (int i = 0; i < n->importing_count; i += 2) { + sds importing_slot = n->importing[i]; + if (atoi(importing_slot) == slot) { + char *sep = (listLength(importing) == 0 ? "" : ","); + importing_str = sdscatfmt(importing_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(importing, n); + break; + } + } + } + } + printf("Set as migrating in: %s\n", migrating_str); + printf("Set as importing in: %s\n", importing_str); + /* If there is no slot owner, set as owner the slot with the biggest + * number of keys, among the set of migrating / importing nodes. */ + if (owner == NULL) { + clusterManagerLogInfo(">>> Nobody claims ownership, " + "selecting an owner...\n"); + owner = clusterManagerGetNodeWithMostKeysInSlot(cluster_manager.nodes, + slot, NULL); + // If we still don't have an owner, we can't fix it. + if (owner == NULL) { + clusterManagerLogErr("[ERR] Can't select a slot owner. " + "Impossible to fix.\n"); + success = 0; + goto cleanup; + } + + // Use ADDSLOTS to assign the slot. + printf("*** Configuring %s:%d as the slot owner\n", owner->ip, + owner->port); + redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " + "SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER ADDSLOTS %d", slot); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Make sure this information will propagate. Not strictly needed + * since there is no past owner, so all the other nodes will accept + * whatever epoch this node will claim the slot with. */ + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Remove the owner from the list of migrating/importing + * nodes. */ + clusterManagerRemoveNodeFromList(migrating, owner); + clusterManagerRemoveNodeFromList(importing, owner); + } + /* If there are multiple owners of the slot, we need to fix it + * so that a single node is the owner and all the other nodes + * are in importing state. Later the fix can be handled by one + * of the base cases above. + * + * Note that this case also covers multiple nodes having the slot + * in migrating state, since migrating is a valid state only for + * slot owners. */ + if (listLength(owners) > 1) { + owner = clusterManagerGetNodeWithMostKeysInSlot(owners, slot, NULL); + listRewind(owners, &li); + redisReply *reply = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + reply = CLUSTER_MANAGER_COMMAND(n, "CLUSTER DELSLOT %d", slot); + success = clusterManagerCheckRedisReply(n, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + success = clusterManagerSetSlot(n, owner, slot, "importing", NULL); + if (!success) goto cleanup; + clusterManagerRemoveNodeFromList(importing, n); //Avoid duplicates + listAddNodeTail(importing, n); + } + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + } + int move_opts = CLUSTER_MANAGER_OPT_VERBOSE; + /* Case 1: The slot is in migrating state in one slot, and in + * importing state in 1 slot. That's trivial to address. */ + if (listLength(migrating) == 1 && listLength(importing) == 1) { + clusterManagerNode *src = listFirst(migrating)->value; + clusterManagerNode *dst = listFirst(importing)->value; + success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL); + } + /* Case 2: There are multiple nodes that claim the slot as importing, + * they probably got keys about the slot after a restart so opened + * the slot. In this case we just move all the keys to the owner + * according to the configuration. */ + else if (listLength(migrating) == 0 && listLength(importing) > 0) { + clusterManagerLogInfo(">>> Moving all the %d slot keys to its " + "owner %s:%d\n", slot, owner->ip, owner->port); + move_opts |= CLUSTER_MANAGER_OPT_COLD; + listRewind(importing, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + success = clusterManagerMoveSlot(n, owner, slot, move_opts, NULL); + if (!success) goto cleanup; + clusterManagerLogInfo(">>> Setting %d as STABLE in " + "%s:%d\n", slot, n->ip, n->port); + + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } + } else { + int try_to_close_slot = (listLength(importing) == 0 && + listLength(migrating) == 1); + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", slot, 10); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) { + if (success) try_to_close_slot = (r->elements == 0); + freeReplyObject(r); + } + if (!success) goto cleanup; + } + /* Case 3: There are no slots claiming to be in importing state, but + * there is a migrating node that actually don't have any key. We + * can just close the slot, probably a reshard interrupted in the middle. */ + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } else { + success = 0; + clusterManagerLogErr("[ERR] Sorry, redis-cli can't fix this slot " + "yet (work in progress). Slot is set as " + "migrating in %s, as importing in %s, " + "owner is %s:%d\n", migrating_str, + importing_str, owner->ip, owner->port); + } + } +cleanup: + listRelease(owners); + listRelease(migrating); + listRelease(importing); + sdsfree(migrating_str); + sdsfree(importing_str); + return success; +} + +static int clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); - if (!ln) return; + if (!ln) return 0; + int result = 1; + int do_fix = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX; clusterManagerNode *node = ln->value; clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); @@ -3124,6 +3672,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); clusterManagerOnError(err); + result = 0; } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -3174,6 +3723,7 @@ static void clusterManagerCheckCluster(int quiet) { } } if (open_slots != NULL) { + result = 0; dictIterator *iter = dictGetIterator(open_slots); dictEntry *entry; sds errstr = sdsnew("[WARNING] The following slots are open: "); @@ -3185,6 +3735,17 @@ static void clusterManagerCheckCluster(int quiet) { } clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); + if (do_fix) { + // Fix open slots. + dictReleaseIterator(iter); + iter = dictGetIterator(open_slots); + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + result = clusterManagerFixOpenSlot(atoi(slot)); + if (!result) break; + } + } + dictReleaseIterator(iter); dictRelease(open_slots); } clusterManagerLogInfo(">>> Check slots coverage...\n"); @@ -3200,7 +3761,16 @@ static void clusterManagerCheckCluster(int quiet) { "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); clusterManagerOnError(err); + result = 0; + if (do_fix/* && result*/) { + dictType dtype = clusterManagerDictType; + dtype.valDestructor = dictListDestructor; + clusterManagerUncoveredSlots = dictCreate(&dtype, NULL); + int fixed = clusterManagerFixSlotsCoverage(slots); + if (fixed > 0) result = 1; + } } + return result; } static clusterManagerNode *clusterNodeForResharding(char *id, @@ -3546,12 +4116,7 @@ assign_replicas: } clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); - printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); - fflush(stdout); - char buf[4]; - int nread = read(fileno(stdin),buf,4); - buf[3] = '\0'; - if (nread != 0 && !strcmp("yes", buf)) { + if (confirmWithYes("Can I set the above configuration?")) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3674,13 +4239,17 @@ static int clusterManagerCommandCheck(int argc, char **argv) { clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); - clusterManagerCheckCluster(0); - return 1; + return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } +static int clusterManagerCommandFix(int argc, char **argv) { + config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX; + return clusterManagerCommandCheck(argc, argv); +} + static int clusterManagerCommandReshard(int argc, char **argv) { int port = 0; char *ip = NULL; From 1239139d082a7917f2bd0ae4150ee9a3a9b8db9c Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:25:25 +0200 Subject: [PATCH 52/66] Cluster Manager: import command --- src/Makefile | 2 +- src/redis-cli.c | 216 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 195 insertions(+), 23 deletions(-) diff --git a/src/Makefile b/src/Makefile index 14112aa1f..269a70933 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 8d5732c20..08a356eb1 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,7 +74,7 @@ #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ - "Invalid arguments: you need to pass either a valid " \ + "[ERR] Invalid arguments: you need to pass either a valid " \ "address (ie. 120.0.0.1:7000) or space separated IP " \ "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) @@ -115,7 +115,9 @@ #define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 #define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 #define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_REPLACE 1 << 6 +#define CLUSTER_MANAGER_CMD_FLAG_COPY 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 8 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -237,6 +239,8 @@ static long getLongInfoField(char *info, char *field); * Utility functions *--------------------------------------------------------------------------- */ +uint16_t crc16(const char *buf, int len); + static long long ustime(void) { struct timeval tv; long long ust; @@ -1325,6 +1329,12 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-simulate")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-replace")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_REPLACE; + } else if (!strcmp(argv[i],"--cluster-copy")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_COPY; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1870,6 +1880,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1892,6 +1903,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -2383,6 +2396,37 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } +/* ----------------------------------------------------------------------------- + * Key space handling + * -------------------------------------------------------------------------- */ + +/* We have 16384 hash slots. The hash slot of a given key is obtained + * as the least significant 14 bits of the crc16 of the key. + * + * However if the key contains the {...} pattern, only the part between + * { and } is hashed. This may be useful in the future to force certain + * keys to be in the same node (assuming no resharding is in progress). */ +static unsigned int keyHashSlot(char *key, int keylen) { + int s, e; /* start-end indexes of { and } */ + + for (s = 0; s < keylen; s++) + if (key[s] == '{') break; + + /* No '{' ? Hash the whole key. This is the base case. */ + if (s == keylen) return crc16(key,keylen) & 0x3FFF; + + /* '{' found? Check if we have the corresponding '}'. */ + for (e = s+1; e < keylen; e++) + if (key[e] == '}') break; + + /* No '}' or nothing between {} ? Hash the whole key. */ + if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF; + + /* If we are here there is both a { and a } on its right. Hash + * what is in the middle between { and }. */ + return crc16(key+s+1,e-s-1) & 0x3FFF; +} + static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -3533,8 +3577,8 @@ static int clusterManagerFixOpenSlot(int slot) { } // Use ADDSLOTS to assign the slot. - printf("*** Configuring %s:%d as the slot owner\n", owner->ip, - owner->port); + clusterManagerLogWarn("*** Configuring %s:%d as the slot owner\n", + owner->ip, owner->port); redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " "SETSLOT %d %s", slot, "STABLE"); @@ -4527,7 +4571,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { if (over_threshold) threshold_reached = 1; } if (!threshold_reached) { - clusterManagerLogErr("*** No rebalancing needed! " + clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); result = 0; @@ -4586,7 +4630,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { listRelease(lsrc); int table_len = (int) listLength(table); if (!table || table_len != numslots) { - clusterManagerLogErr("*** Assertio failed: Reshard table " + clusterManagerLogErr("*** Assertion failed: Reshard table " "!= number of slots"); result = 0; goto end_move; @@ -4629,23 +4673,148 @@ invalid_args: return 0; } -static int clusterManagerCommandCall(int argc, char **argv) { - int port = 0; - char *ip = NULL; - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - int i; - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else { - fprintf(stderr, - "Invalid arguments: first agrumnt must be host:port.\n"); - return 0; +static int clusterManagerCommandImport(int argc, char **argv) { + int success = 1; + int port = 0, src_port = 0; + char *ip = NULL, *src_ip = NULL; + char *invalid_args_msg = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) { + invalid_args_msg = CLUSTER_MANAGER_INVALID_HOST_ARG; + goto invalid_args; } + if (config.cluster_manager_command.from == NULL) { + invalid_args_msg = "[ERR] Option '--cluster-from' is required for " + "subcommand 'import'.\n"; + goto invalid_args; + } + char *src_host[] = {config.cluster_manager_command.from}; + if (!getClusterHostFromCmdArgs(1, src_host, &src_ip, &src_port)) { + invalid_args_msg = "[ERR] Invalid --cluster-from host. You need to " + "pass a valid address (ie. 120.0.0.1:7000).\n"; + goto invalid_args; + } + clusterManagerLogInfo(">>> Importing data from %s:%d to cluster %s:%d\n", + src_ip, src_port, ip, port); + + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + char *reply_err = NULL; + redisReply *src_reply = NULL; + // Connect to the source node. + redisContext *src_ctx = redisConnect(src_ip, src_port); + if (src_ctx->err) { + success = 0; + fprintf(stderr,"Could not connect to Redis at %s:%d: %s.\n", src_ip, + src_port, src_ctx->errstr); + goto cleanup; + } + src_reply = reconnectingRedisCommand(src_ctx, "INFO"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + if (getLongInfoField(src_reply->str, "cluster_enabled")) { + clusterManagerLogErr("[ERR] The source node should not be a " + "cluster node.\n"); + success = 0; + goto cleanup; + } + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "DBSIZE"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + int size = src_reply->integer, i; + clusterManagerLogWarn("*** Importing %d keys from DB 0\n", size); + + // Build a slot -> node map + clusterManagerNode *slots_map[CLUSTER_MANAGER_SLOTS]; + memset(slots_map, 0, sizeof(slots_map) / sizeof(clusterManagerNode *)); + listIter li; + listNode *ln; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots_count == 0) continue; + if (n->slots[i]) { + slots_map[i] = n; + break; + } + } + } + + char cmdfmt[50] = "MIGRATE %s %d %s %d %d"; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COPY) + strcat(cmdfmt, " %s"); + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLACE) + strcat(cmdfmt, " %s"); + + /* Use SCAN to iterate over the keys, migrating to the + * right node as needed. */ + int cursor = -999, timeout = config.cluster_manager_command.timeout; + while (cursor != 0) { + if (cursor < 0) cursor = 0; + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "SCAN %d COUNT %d", + cursor, 1000); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + assert(src_reply->type == REDIS_REPLY_ARRAY); + assert(src_reply->elements >= 2); + assert(src_reply->element[1]->type == REDIS_REPLY_ARRAY); + if (src_reply->element[0]->type == REDIS_REPLY_STRING) + cursor = atoi(src_reply->element[0]->str); + else if (src_reply->element[0]->type == REDIS_REPLY_INTEGER) + cursor = src_reply->element[0]->integer; + int keycount = src_reply->element[1]->elements; + for (i = 0; i < keycount; i++) { + redisReply *kr = src_reply->element[1]->element[i]; + assert(kr->type == REDIS_REPLY_STRING); + char *key = kr->str; + uint16_t slot = keyHashSlot(key, kr->len); + clusterManagerNode *target = slots_map[slot]; + printf("Migrating %s to %s:%d: ", key, target->ip, target->port); + redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, + target->ip, target->port, + key, 0, timeout, + "COPY", "REPLACE"); + if (!r || r->type == REDIS_REPLY_ERROR) { + if (r && r->str) { + clusterManagerLogErr("Source %s:%d replied with " + "error:\n%s\n", src_ip, src_port, + r->str); + } + success = 0; + } + freeReplyObject(r); + if (!success) goto cleanup; + clusterManagerLogOk("OK\n"); + } + } +cleanup: + if (reply_err) + clusterManagerLogErr("Source %s:%d replied with error:\n%s\n", + src_ip, src_port, reply_err); + if (src_ctx) redisFree(src_ctx); + if (src_reply) freeReplyObject(src_reply); + return success; +invalid_args: + fprintf(stderr, "%s", invalid_args_msg); + return 0; +} + +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0, i; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; @@ -4677,6 +4846,9 @@ static int clusterManagerCommandCall(int argc, char **argv) { } zfree(argvlen); return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; } static int clusterManagerCommandHelp(int argc, char **argv) { From 46a75898b39699ec9a9031d4085770480659e619 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:53:24 +0200 Subject: [PATCH 53/66] Cluster Manager: added clusterManagerCheckCluster to import command --- src/redis-cli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 08a356eb1..9d93f29bc 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4698,6 +4698,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; char *reply_err = NULL; redisReply *src_reply = NULL; // Connect to the source node. From 29825d81981d7067fe4b9232fb339687be37d23e Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 17:08:53 +0200 Subject: [PATCH 54/66] Cluster Manager: add-node command. --- src/redis-cli.c | 168 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 154 insertions(+), 14 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9d93f29bc..da2421c72 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -165,6 +165,7 @@ typedef struct clusterManagerCommand { char *from; char *to; char **weight; + char *master_id; int weight_argc; int slots; int timeout; @@ -1299,6 +1300,8 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-master-id") && !lastarg) { + config.cluster_manager_command.master_id = argv[++i]; } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { @@ -1335,6 +1338,9 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-copy")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_COPY; + } else if (!strcmp(argv[i],"--cluster-slave")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SLAVE; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1847,6 +1853,8 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name); static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); @@ -1875,6 +1883,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, /* Cluster Manager commands. */ static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandAddNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1895,6 +1904,8 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3030,8 +3041,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { - char *msg = (e ? e : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); + clusterManagerPrintNotClusterNodeError(node, e); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -3313,6 +3323,27 @@ static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, return node; } +/* This function returns the master that has the least number of replicas + * in the cluster. If there are multiple masters with the same smaller + * number of replicas, one at random is returned. */ + +static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { + clusterManagerNode *node = NULL; + int lowest_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (node == NULL || n->replicas_count < lowest_count) { + node = n; + lowest_count = n->replicas_count; + } + } + return node; +} + static int clusterManagerFixSlotsCoverage(char *all_slots) { int i, fixed = 0; list *none = NULL, *single = NULL, *multi = NULL; @@ -3966,6 +3997,26 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, array->nodes[array->count++] = node; } +static void clusterManagerPrintNotEmptyNodeError(clusterManagerNode *node, + char *err) +{ + char *msg; + if (err) msg = err; + else { + msg = "is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err) +{ + char *msg = (err ? err : "is not configured as a cluster node."); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + /* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; @@ -4008,8 +4059,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { - char *msg = (err ? err : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotClusterNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4025,14 +4075,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } err = NULL; if (!clusterManagerNodeIsEmpty(node, &err)) { - char *msg; - if (err) msg = err; - else { - msg = "is not empty. Either the node already knows other " - "nodes (check with CLUSTER NODES) or contains some " - "key in database 0."; - } - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotEmptyNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4263,6 +4306,104 @@ cleanup: return success; } +static int clusterManagerCommandAddNode(int argc, char **argv) { + int success = 1; + redisReply *reply = NULL; + char *ref_ip = NULL, *ip = NULL; + int ref_port = 0, port = 0; + if (!getClusterHostFromCmdArgs(argc - 1, argv + 1, &ref_ip, &ref_port)) + goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) + goto invalid_args; + clusterManagerLogInfo(">>> Adding node %s:%d to cluster %s:%d\n", ip, port, + ref_ip, ref_port); + // Check the existing cluster + clusterManagerNode *refnode = clusterManagerNewNode(ref_ip, ref_port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; + + /* If --cluster-master-id was specified, try to resolve it now so that we + * abort before starting with the node configuration. */ + clusterManagerNode *master_node = NULL; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVE) { + char *master_id = config.cluster_manager_command.master_id; + if (master_id != NULL) { + master_node = clusterManagerNodeByName(master_id); + if (master_node == NULL) { + clusterManagerLogErr("[ERR] No such master ID %s\n", master_id); + return 0; + } + } else { + master_node = clusterManagerNodeWithLeastReplicas(); + assert(master_node != NULL); + printf("Automatically selected master %s:%d\n", master_node->ip, + master_node->port); + } + } + + // Add the new node + clusterManagerNode *new_node = clusterManagerNewNode(ip, port); + int added = 0; + CLUSTER_MANAGER_NODE_CONNECT(new_node); + if (new_node->context->err) { + clusterManagerLogErr("[ERR] Sorry, can't connect to node %s:%d\n", + ip, port); + success = 0; + goto cleanup; + } + char *err = NULL; + if (!(success = clusterManagerNodeIsCluster(new_node, &err))) { + clusterManagerPrintNotClusterNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + if (!clusterManagerNodeLoadInfo(new_node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(new_node, err); + zfree(err); + } + success = 0; + goto cleanup; + } + if (!(success = clusterManagerNodeIsEmpty(new_node, &err))) { + clusterManagerPrintNotEmptyNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + clusterManagerNode *first = listFirst(cluster_manager.nodes)->value; + listAddNodeTail(cluster_manager.nodes, new_node); + added = 1; + + // Send CLUSTER MEET command to the new node + clusterManagerLogInfo(">>> Send CLUSTER MEET to node %s:%d to make it " + "join the cluster.\n", ip, port); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER MEET %s %d", + first->ip, first->port); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + + /* Additional configuration is needed if the node is added as a slave. */ + if (master_node) { + sleep(1); + clusterManagerWaitForClusterJoin(); + clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n", + master_node->ip, master_node->port); + freeReplyObject(reply); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s", + master_node->name); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + } + clusterManagerLogOk("[OK] New node added correctly.\n"); +cleanup: + if (!added && new_node) freeClusterManagerNode(new_node); + if (reply) freeReplyObject(reply); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -4531,8 +4672,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { nodes_involved++; listAddNodeTail(involved, n); } - weightedNodes = zmalloc(nodes_involved * - sizeof(clusterManagerNode *)); + weightedNodes = zmalloc(nodes_involved * sizeof(clusterManagerNode *)); if (weightedNodes == NULL) goto cleanup; /* Check cluster, only proceed if it looks sane. */ clusterManagerCheckCluster(1); From 51b8ab3c97577e2c3477182822c651326283c05a Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 18:22:44 +0200 Subject: [PATCH 55/66] - Cluster Manager: del-node command. - Cluster Manager: fixed bug in clusterManagerNodeWithLeastReplicas --- src/redis-cli.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index da2421c72..9a1ab0fdb 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1884,6 +1884,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandAddNode(int argc, char **argv); +static int clusterManagerCommandDeleteNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1906,6 +1907,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { "replicas "}, {"add-node", clusterManagerCommandAddNode, 2, "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3335,7 +3337,7 @@ static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; - if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; if (node == NULL || n->replicas_count < lowest_count) { node = n; lowest_count = n->replicas_count; @@ -4404,6 +4406,73 @@ invalid_args: return 0; } +static int clusterManagerCommandDeleteNode(int argc, char **argv) { + UNUSED(argc); + int success = 1; + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + char *node_id = argv[1]; + clusterManagerLogInfo(">>> Removing node %s from cluster %s:%d\n", + node_id, ip, port); + clusterManagerNode *ref_node = clusterManagerNewNode(ip, port); + clusterManagerNode *node = NULL; + + // Load cluster information + if (!clusterManagerLoadInfoFromNode(ref_node, 0)) return 0; + + // Check if the node exists and is not empty + node = clusterManagerNodeByName(node_id); + if (node == NULL) { + clusterManagerLogErr("[ERR] No such node ID %s\n", node_id); + return 0; + } + if (node->slots_count != 0) { + clusterManagerLogErr("[ERR] Node %s:%d is not empty! Reshard data " + "away and try again.\n", node->ip, node->port); + return 0; + } + + // Send CLUSTER FORGET to all the nodes but the node to remove + clusterManagerLogInfo(">>> Sending CLUSTER FORGET messages to the " + "cluster...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == node) continue; + if (n->replicate && !strcasecmp(n->replicate, node_id)) { + // Reconfigure the slave to replicate with some other node + clusterManagerNode *master = clusterManagerNodeWithLeastReplicas(); + //TODO: check whether master could be the same as node + assert(master != NULL); + clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", + n->ip, n->port, master->ip, master->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s", + master->name); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER FORGET %s", + node_id); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + + // Finally shutdown the node + clusterManagerLogInfo(">>> SHUTDOWN the node.\n"); + redisReply *r = redisCommand(node->context, "SHUTDOWN"); + success = clusterManagerCheckRedisReply(node, r, NULL); + if (r) freeReplyObject(r); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -5026,6 +5095,9 @@ static int clusterManagerCommandHelp(int argc, char **argv) { } } } + fprintf(stderr, "\nFor check, fix, reshard, del-node, set-timeout you " + "can specify the host and port of any working node in " + "the cluster.\n\n"); return 0; } From 53667570a78381002dd03a354703851dae62e8d7 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 13 Apr 2018 16:09:22 +0200 Subject: [PATCH 56/66] Cluster Manager: set-timeout command --- src/redis-cli.c | 70 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9a1ab0fdb..dba8781f1 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1890,6 +1890,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandSetTimeout(int argc, char **argv); static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1905,21 +1906,23 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, - {"add-node", clusterManagerCommandAddNode, 2, - "new_host:new_port existing_host:existing_port", "slave,master-id "}, - {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, - {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, - {"import", clusterManagerCommandImport, 1, "host:port", - "from ,copy,replace"}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, + {"set-timeout", clusterManagerCommandSetTimeout, 2, + "host:port milliseconds", NULL}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -4882,6 +4885,61 @@ invalid_args: return 0; } +static int clusterManagerCommandSetTimeout(int argc, char **argv) { + UNUSED(argc); + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + int timeout = atoi(argv[1]); + if (timeout < 100) { + fprintf(stderr, "Setting a node timeout of less than 100 " + "milliseconds is a bad idea.\n"); + return 0; + } + // Load cluster information + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int ok_count = 0, err_count = 0; + + clusterManagerLogInfo(">>> Reconfiguring node timeout in every " + "cluster node...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + char *err = NULL; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s %s %d", + "SET", + "cluster-node-timeout", + timeout); + if (reply == NULL) goto reply_err; + int ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s", "REWRITE"); + if (reply == NULL) goto reply_err; + ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + clusterManagerLogWarn("*** New timeout set for %s:%d\n", n->ip, + n->port); + ok_count++; + continue; +reply_err: + if (err == NULL) err = ""; + clusterManagerLogErr("ERR setting node-timeot for %s:%d: %s\n", n->ip, + n->port, err); + err_count++; + } + clusterManagerLogInfo(">>> New node timeout set. %d OK, %d ERR.\n", + ok_count, err_count); + return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandImport(int argc, char **argv) { int success = 1; int port = 0, src_port = 0; From c941584bffd694427da0131b122917234f8f143b Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 19 Apr 2018 18:52:01 +0200 Subject: [PATCH 57/66] Cluster Manager: code improvements and more comments added. --- src/redis-cli.c | 66 +++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index dba8781f1..07732367a 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -68,7 +68,7 @@ #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" -#define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 @@ -172,6 +172,7 @@ typedef struct clusterManagerCommand { int pipeline; float threshold; } clusterManagerCommand; + static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1788,7 +1789,7 @@ static int evalMode(int argc, char **argv) { /* The Cluster Manager global structure */ static struct clusterManager { - list *nodes; /* List of nodes int he configuration. */ + list *nodes; /* List of nodes in the configuration. */ list *errors; } cluster_manager; @@ -1821,7 +1822,7 @@ typedef struct clusterManagerNode { int balance; /* Used by rebalance */ } clusterManagerNode; -/* Data structure used to represent a sequence of nodes. */ +/* Data structure used to represent a sequence of cluster nodes. */ typedef struct clusterManagerNodeArray { clusterManagerNode **nodes; /* Actual nodes array */ clusterManagerNode **alloc; /* Pointer to the allocated memory */ @@ -1829,7 +1830,7 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; -/* Used for reshard table. */ +/* Used for the reshard table. */ typedef struct clusterManagerReshardTableItem { clusterManagerNode *source; int slot; @@ -1865,7 +1866,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); +static void clusterManagerShowClusterInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static int clusterManagerCheckCluster(int quiet); @@ -2067,8 +2068,9 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } + /* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the - * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * latest case, if the 'err' arg is not NULL, it gets allocated with a copy * of reply error (it's up to the caller function to free it), elsewhere * the error is directly printed. */ static int clusterManagerCheckRedisReply(clusterManagerNode *n, @@ -2100,7 +2102,7 @@ static void clusterManagerRemoveNodeFromList(list *nodelist, } } -/* Return the node with the specified ID or NULL. */ +/* Return the node with the specified name (ID) or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -2121,7 +2123,7 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -/* Like get_node_by_name but the specified name can be just the first +/* Like clusterManagerNodeByName but the specified name can be just the first * part of the node ID as long as the prefix in unique across the * cluster. */ @@ -2152,6 +2154,7 @@ static void clusterManagerNodeResetSlots(clusterManagerNode *node) { node->slots_count = 0; } +/* Call "INFO" redis command on the specified node and return the reply. */ static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, char **err) { @@ -2181,7 +2184,7 @@ static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { redisReply *info = clusterManagerGetNodeRedisInfo(node, err); - int is_err = 0, is_empty = 1; + int is_empty = 1; if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; @@ -2190,11 +2193,7 @@ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); if (err != NULL) *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } + if (!clusterManagerCheckRedisReply(node, info, err)) { is_empty = 0; goto result; } @@ -2422,7 +2421,7 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { * However if the key contains the {...} pattern, only the part between * { and } is hashed. This may be useful in the future to force certain * keys to be in the same node (assuming no resharding is in progress). */ -static unsigned int keyHashSlot(char *key, int keylen) { +static unsigned int clusterManagerKeyHashSlot(char *key, int keylen) { int s, e; /* start-end indexes of { and } */ for (s = 0; s < keylen; s++) @@ -2443,6 +2442,7 @@ static unsigned int keyHashSlot(char *key, int keylen) { return crc16(key+s+1,e-s-1) & 0x3FFF; } +/* Return a string representation of the cluster node. */ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -2484,7 +2484,7 @@ static void clusterManagerShowNodes(void) { } } -static void clusterManagerShowInfo(void) { +static void clusterManagerShowClusterInfo(void) { int masters = 0; int keys = 0; listIter li; @@ -2533,11 +2533,12 @@ static void clusterManagerShowInfo(void) { printf("%.2f keys per slot on average.\n", keys_per_slot); } +/* Flush dirty slots configuration of the node by calling CLUSTER ADDSLOTS */ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0, success = 1; + int success = 1; /* First two args are used for the command itself. */ int argc = node->slots_count + 2; sds *argv = zmalloc(argc * sizeof(*argv)); @@ -2566,14 +2567,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } reply = (redisReply*) _reply; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } - success = 0; - goto cleanup; - } + success = clusterManagerCheckRedisReply(node, reply, err); cleanup: zfree(argvlen); if (argv != NULL) { @@ -2821,7 +2815,7 @@ static int clusterManagerMoveSlot(clusterManagerNode *source, } /* Flush the dirty node configuration by calling replicate for slaves or - * adding the slots for masters. */ + * adding the slots defined in the masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; @@ -2852,6 +2846,7 @@ cleanup: return success; } +/* Wait until the cluster configuration is consistent. */ static void clusterManagerWaitForClusterJoin(void) { printf("Waiting for the cluster to join\n"); while(!clusterManagerIsConfigConsistent()) { @@ -2871,13 +2866,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0, success = 1; + int success = 1; *err = NULL; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } + if (!clusterManagerCheckRedisReply(node, reply, err)) { success = 0; goto cleanup; } @@ -3114,6 +3105,7 @@ invalid_friend: return 1; } +/* Compare functions used by various sorting operations. */ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { const char **i1 = (const char **)slot1; const char **i2 = (const char **)slot2; @@ -3252,6 +3244,7 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +/* Add the error string to cluster_manager.errors and print it. */ static void clusterManagerOnError(sds err) { if (cluster_manager.errors == NULL) cluster_manager.errors = listCreate(); @@ -3259,6 +3252,9 @@ static void clusterManagerOnError(sds err) { clusterManagerLogErr("%s\n", (char *) err); } +/* Check the slots coverage of the cluster. The 'all_slots' argument must be + * and array of 16384 bytes. Every covered slot will be set to 1 in the + * 'all_slots' array. The function returns the total number if covered slots.*/ static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -4482,7 +4478,7 @@ static int clusterManagerCommandInfo(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return 1; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -4495,7 +4491,7 @@ static int clusterManagerCommandCheck(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5047,7 +5043,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { redisReply *kr = src_reply->element[1]->element[i]; assert(kr->type == REDIS_REPLY_STRING); char *key = kr->str; - uint16_t slot = keyHashSlot(key, kr->len); + uint16_t slot = clusterManagerKeyHashSlot(key, kr->len); clusterManagerNode *target = slots_map[slot]; printf("Migrating %s to %s:%d: ", key, target->ip, target->port); redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, From 6a113e8f577b9e64d4f03bcd8aaf1c4c146037dd Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 18:08:30 +0200 Subject: [PATCH 58/66] Cluster Manager: fixed bug when parsing CLUSTER NODES reply (clusterManagerNodeLoadInfo) --- src/redis-cli.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 07732367a..adb2095e1 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2922,6 +2922,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, line = p + 1; remaining--; } else line = p; + char *dash = NULL; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating @@ -2953,7 +2954,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, node->importing[node->importing_count - 1] = src; } - } else if ((p = strchr(slotsdef, '-')) != NULL) { + } else if ((dash = strchr(slotsdef, '-')) != NULL) { + p = dash; int start, stop; *p = '\0'; start = atoi(slotsdef); @@ -5078,7 +5080,7 @@ invalid_args: static int clusterManagerCommandCall(int argc, char **argv) { int port = 0, i; char *ip = NULL; - if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; From 96865ab61b1455ab4ed490858be2f54620bcc396 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:25:08 +0200 Subject: [PATCH 59/66] Cluster Manager: fixed expected slots calculation (rebalance) Cluster Manager: fixed argument parsing after --cluster-weight --- src/redis-cli.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index adb2095e1..bdc4b7b45 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1318,6 +1318,7 @@ static int parseOptions(int argc, char **argv) { if (wargc > 0) { config.cluster_manager_command.weight = weight; config.cluster_manager_command.weight_argc = wargc; + i += wargc; } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); @@ -4724,7 +4725,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { int nodes_involved = 0; int use_empty = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; - involved = listCreate(); listIter li; listNode *ln; @@ -4762,15 +4762,15 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; weightedNodes[i++] = n; - int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * - (int) n->weight); + int expected = (int) (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + n->weight); n->balance = n->slots_count - expected; total_balance += n->balance; /* Compute the percentage of difference between the * expected number of slots and the real one, to see * if it's over the threshold specified by the user. */ int over_threshold = 0; - if (config.cluster_manager_command.threshold > 0) { + if (threshold > 0) { if (n->slots_count > 0) { float err_perc = fabs((100-(100.0*expected/n->slots_count))); if (err_perc > threshold) over_threshold = 1; @@ -4784,7 +4784,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); - result = 0; goto cleanup; } /* Because of rounding, it is possible that the balance of all nodes From ff03a6c51dedc138d8ebfca7a350fb3ccab9069f Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:29:42 +0200 Subject: [PATCH 60/66] Cluster tests now using redis-cli instead of redis-trib --- tests/cluster/tests/04-resharding.tcl | 10 +++++----- tests/cluster/tests/12-replica-migration-2.tcl | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl index 0ccbf717d..68fba135e 100644 --- a/tests/cluster/tests/04-resharding.tcl +++ b/tests/cluster/tests/04-resharding.tcl @@ -73,12 +73,12 @@ test "Cluster consistency during live resharding" { flush stdout set target [dict get [get_myself [randomInt 5]] id] set tribpid [lindex [exec \ - ../../../src/redis-trib.rb reshard \ - --from all \ - --to $target \ - --slots 100 \ - --yes \ + ../../../src/redis-cli --cluster reshard \ 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-from all \ + --cluster-to $target \ + --cluster-slots 100 \ + --cluster-yes \ | [info nameofexecutable] \ ../tests/helpers/onlydots.tcl \ &] 0] diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl index 48ecd1d50..3d8b7b04b 100644 --- a/tests/cluster/tests/12-replica-migration-2.tcl +++ b/tests/cluster/tests/12-replica-migration-2.tcl @@ -31,9 +31,9 @@ test "Each master should have at least two replicas attached" { set master0_id [dict get [get_myself 0] id] test "Resharding all the master #0 slots away from it" { set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=0 \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=0 >@ stdout ] } test "Master #0 should lose its replicas" { @@ -49,10 +49,10 @@ test "Resharding back some slot to master #0" { # new resharding. after 10000 set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=.01 \ - --use-empty-masters \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=.01 \ + --cluster-use-empty-masters >@ stdout] } test "Master #0 should re-acquire one or more replicas" { From 614127c47840ad1d831259140e9f0a06a8a9c977 Mon Sep 17 00:00:00 2001 From: artix Date: Mon, 7 May 2018 15:56:12 +0200 Subject: [PATCH 61/66] - Updated create-cluster with redis-cli - Updated README --- utils/create-cluster/README | 2 +- utils/create-cluster/create-cluster | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/create-cluster/README b/utils/create-cluster/README index f2a89839b..e682f6dc9 100644 --- a/utils/create-cluster/README +++ b/utils/create-cluster/README @@ -15,7 +15,7 @@ To create a cluster, follow these steps: 1. Edit create-cluster and change the start / end port, depending on the number of instances you want to create. 2. Use "./create-cluster start" in order to run the instances. -3. Use "./create-cluster create" in order to execute redis-trib create, so that +3. Use "./create-cluster create" in order to execute redis-cli --cluster create, so that an actual Redis cluster will be created. 4. Now you are ready to play with the cluster. AOF files and logs for each instances are created in the current directory. diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster index d821683f6..468f924a4 100755 --- a/utils/create-cluster/create-cluster +++ b/utils/create-cluster/create-cluster @@ -34,7 +34,7 @@ then PORT=$((PORT+1)) HOSTS="$HOSTS 127.0.0.1:$PORT" done - ../../src/redis-trib.rb create --replicas $REPLICAS $HOSTS + ../../src/redis-cli --cluster create $HOSTS --cluster-replicas $REPLICAS exit 0 fi @@ -94,7 +94,7 @@ fi echo "Usage: $0 [start|create|stop|watch|tail|clean]" echo "start -- Launch Redis Cluster instances." -echo "create -- Create a cluster using redis-trib create." +echo "create -- Create a cluster using redis-cli --cluster create." echo "stop -- Stop Redis Cluster instances." echo "watch -- Show CLUSTER NODES output (first 30 lines) of first node." echo "tail -- Run tail -f of instance at base port + ID." From 0119cd160a95c63c7b02a21bc3870625e0dd4095 Mon Sep 17 00:00:00 2001 From: artix Date: Mon, 7 May 2018 17:31:34 +0200 Subject: [PATCH 62/66] Cluster Manager: --cluster options can now be placed everywhere --- src/redis-cli.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index bdc4b7b45..85588fe42 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1293,8 +1293,8 @@ static int parseOptions(int argc, char **argv) { if (CLUSTER_MANAGER_MODE()) usage(); char *cmd = argv[++i]; int j = i; - for (; j < argc; j++) if (argv[j][0] == '-') break; - j--; + while (j < argc && argv[j][0] != '-') j++; + if (j > i) j--; createClusterManagerCommand(cmd, j - i, argv + i + 1); i = j; } else if (!strcmp(argv[i],"--cluster") && lastarg) { @@ -1351,6 +1351,15 @@ static int parseOptions(int argc, char **argv) { printf("redis-cli %s\n", version); sdsfree(version); exit(0); + } else if (CLUSTER_MANAGER_MODE() && argv[i][0] != '-') { + if (config.cluster_manager_command.argc == 0) { + int j = i + 1; + while (j < argc && argv[j][0] != '-') j++; + int cmd_argc = j - i; + config.cluster_manager_command.argc = cmd_argc; + config.cluster_manager_command.argv = argv + i; + if (cmd_argc > 1) i = j - 1; + } } else { if (argv[i][0] == '-') { fprintf(stderr, From 5d8f0ba7eaf168e6c24c12797d470c6b763791c7 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 11 May 2018 18:28:10 +0200 Subject: [PATCH 63/66] - Fixed mistyped redis command (clusterManagerGetNodeWithMostKeysInSlot) - Cluster node structure is now updated after ADDSLOTS --- src/redis-cli.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 85588fe42..d591bcd01 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -3316,7 +3316,7 @@ static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) continue; redisReply *r = - CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOTi %d", slot); + CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOT %d", slot); int success = clusterManagerCheckRedisReply(n, r, err); if (success) { if (r->integer > numkeys || node == NULL) { @@ -3446,6 +3446,9 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; if (r) freeReplyObject(r); if (fixed < 0) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + n->slots[atoi(slot)] = 1; fixed++; } } @@ -3474,6 +3477,9 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; if (r) freeReplyObject(r); if (fixed < 0) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + n->slots[atoi(slot)] = 1; fixed++; } } @@ -3513,6 +3519,9 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; if (r) freeReplyObject(r); if (fixed < 0) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + target->slots[atoi(slot)] = 1; listIter nli; listNode *nln; listRewind(nodes, &nli); @@ -3633,6 +3642,9 @@ static int clusterManagerFixOpenSlot(int slot) { success = clusterManagerCheckRedisReply(owner, reply, NULL); if (reply) freeReplyObject(reply); if (!success) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + owner->slots[slot] = 1; /* Make sure this information will propagate. Not strictly needed * since there is no past owner, so all the other nodes will accept * whatever epoch this node will claim the slot with. */ From c5458b11c4cff85e0345096f650c2fb4d11accf1 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 15 May 2018 18:41:46 +0200 Subject: [PATCH 64/66] Cluster Manager: print flags as strings. --- src/redis-cli.c | 90 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 72 insertions(+), 18 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index d591bcd01..c108e6735 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1815,6 +1815,7 @@ typedef struct clusterManagerNode { time_t ping_sent; time_t ping_recv; int flags; + list *flags_str; /* Flags string representations */ sds replicate; /* Master ID if node is a slave */ list replicas; int dirty; /* Node has changes that can be flushed */ @@ -2001,6 +2002,17 @@ static int getClusterHostFromCmdArgs(int argc, char **argv, return 1; } +static void freeClusterManagerNodeFlags(list *flags) { + listIter li; + listNode *ln; + listRewind(flags, &li); + while ((ln = listNext(&li)) != NULL) { + sds flag = ln->value; + sdsfree(flag); + } + listRelease(flags); +} + static void freeClusterManagerNode(clusterManagerNode *node) { if (node->context != NULL) redisFree(node->context); if (node->friends != NULL) { @@ -2027,6 +2039,10 @@ static void freeClusterManagerNode(clusterManagerNode *node) { for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]); zfree(node->importing); } + if (node->flags_str != NULL) { + freeClusterManagerNodeFlags(node->flags_str); + node->flags_str = NULL; + } zfree(node); } @@ -2065,6 +2081,7 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->ping_sent = 0; node->ping_recv = 0; node->flags = 0; + node->flags_str = NULL; node->replicate = NULL; node->dirty = 0; node->friends = NULL; @@ -2391,6 +2408,24 @@ cleanup: zfree(offenders); } +/* Return a representable string of the node's flags */ +static sds clusterManagerNodeFlagString(clusterManagerNode *node) { + sds flags = sdsempty(); + if (!node->flags_str) return flags; + int empty = 1; + listIter li; + listNode *ln; + listRewind(node->flags_str, &li); + while ((ln = listNext(&li)) != NULL) { + sds flag = ln->value; + if (strcmp(flag, "myself") == 0) continue; + if (!empty) flags = sdscat(flags, ","); + flags = sdscatfmt(flags, "%S", flag); + empty = 0; + } + return flags; +} + /* Return a representable string of the node's slots */ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { sds slots = sdsempty(); @@ -2466,12 +2501,14 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port); else { slots = clusterManagerNodeSlotsString(node); + sds flags = clusterManagerNodeFlagString(node); info = sdscatfmt(info, "%s: %S %s:%u\n" "%s slots:%S (%u slots) " - "", //TODO: flags string + "%S", role, node->name, node->ip, node->port, spaces, - slots, node->slots_count); + slots, node->slots_count, flags); sdsfree(slots); + sdsfree(flags); } if (node->replicate != NULL) info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); @@ -3008,18 +3045,35 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (currentNode->name) sdsfree(currentNode->name); currentNode->name = sdsnew(name); } - if (strstr(flags, "noaddr") != NULL) - currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; - if (strstr(flags, "disconnected") != NULL) - currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; - if (strstr(flags, "fail") != NULL) - currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; - if (strstr(flags, "slave") != NULL) { - currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; - if (master_id != NULL) { - if (currentNode->replicate) sdsfree(currentNode->replicate); - currentNode->replicate = sdsnew(master_id); + if (currentNode->flags_str != NULL) + freeClusterManagerNodeFlags(currentNode->flags_str); + currentNode->flags_str = listCreate(); + int flag_len; + while ((flag_len = strlen(flags)) > 0) { + sds flag = NULL; + char *fp = strchr(flags, ','); + if (fp) { + *fp = '\0'; + flag = sdsnew(flags); + flags = fp + 1; + } else { + flag = sdsnew(flags); + flags += flag_len; } + if (strcmp(flag, "noaddr") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + else if (strcmp(flag, "disconnected") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + else if (strcmp(flag, "fail") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; + else if (strcmp(flag, "slave") == 0) { + currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; + if (master_id == 0) { + if (currentNode->replicate) sdsfree(currentNode->replicate); + currentNode->replicate = sdsnew(master_id); + } + } + listAddNodeTail(currentNode->flags_str, flag); } if (config_epoch != NULL) currentNode->current_epoch = atoll(config_epoch); @@ -4283,12 +4337,12 @@ assign_replicas: goto cleanup; } } - // Give one second for the join to start, in order to avoid that - // waiting for cluster join will find all the nodes agree about - // the config as they are still empty with unassigned slots. + /* Give one second for the join to start, in order to avoid that + * waiting for cluster join will find all the nodes agree about + * the config as they are still empty with unassigned slots. */ sleep(1); clusterManagerWaitForClusterJoin(); - // Useful for the replicas //TODO: create a function for this? + /* Useful for the replicas */ listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -4315,7 +4369,7 @@ assign_replicas: listEmpty(cluster_manager.nodes); if (!clusterManagerLoadInfoFromNode(first_node, 0)) { success = 0; - goto cleanup; //TODO: msg? + goto cleanup; } clusterManagerCheckCluster(0); } From 092761db62c92e3d27d5b4d55c5a5fdd921d89c1 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 16 May 2018 17:49:18 +0200 Subject: [PATCH 65/66] Cluster Manager: fixed unprinted reply error --- src/redis-cli.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c108e6735..9ea47ab07 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2773,7 +2773,8 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, strcpy(*err, migrate_reply->str); } printf("\n"); - CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, + migrate_reply->str); } goto next; } @@ -3021,7 +3022,6 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, else break; } else { if (addr == NULL) { - // TODO: find a better err message fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); success = 0; goto cleanup; @@ -4602,7 +4602,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { fflush(stdout); char buf[6]; int nread = read(fileno(stdin),buf,6); - if (!nread) continue; //TODO: nread < 0 + if (nread <= 0) continue; int last_idx = nread - 1; if (buf[last_idx] != '\n') { int ch; @@ -4619,7 +4619,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { printf("What is the receiving node ID? "); fflush(stdout); int nread = read(fileno(stdin),buf,255); - if (!nread) continue; //TODO: nread < 0 + if (nread <= 0) continue; int last_idx = nread - 1; if (buf[last_idx] != '\n') { int ch; @@ -4643,7 +4643,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { printf("Source node #%lu: ", listLength(sources) + 1); fflush(stdout); int nread = read(fileno(stdin),buf,255); - if (!nread) continue; //TODO: nread < 0 + if (nread <= 0) continue; int last_idx = nread - 1; if (buf[last_idx] != '\n') { int ch; @@ -5176,7 +5176,7 @@ static int clusterManagerCommandCall(int argc, char **argv) { redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen); int status = redisGetReply(n->context, (void **)(&reply)); if (status != REDIS_OK || reply == NULL ) - printf("%s:%d: Failed!\n", n->ip, n->port); //TODO: better message? + printf("%s:%d: Failed!\n", n->ip, n->port); else { sds formatted_reply = cliFormatReplyTTY(reply, ""); printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply); From 20c6dcb82345f46925540b50474e35a1166c3822 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 16 May 2018 18:04:13 +0200 Subject: [PATCH 66/66] Removed TODO in redis-cli --- src/redis-cli.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9ea47ab07..850b10241 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4522,7 +4522,6 @@ static int clusterManagerCommandDeleteNode(int argc, char **argv) { if (n->replicate && !strcasecmp(n->replicate, node_id)) { // Reconfigure the slave to replicate with some other node clusterManagerNode *master = clusterManagerNodeWithLeastReplicas(); - //TODO: check whether master could be the same as node assert(master != NULL); clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", n->ip, n->port, master->ip, master->port);