From 02705216e01fa55a389dea50a67a89f216814ca1 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 10 Apr 2020 10:12:26 +0200 Subject: [PATCH 01/71] RESP3: change streams items from maps to arrays. Streams items are similar to dictionaries, however they preserve both the order, and allow for duplicated field names. So a map is not a semantically sounding way to deal with this. https://twitter.com/antirez/status/1248261087553880069 --- src/t_stream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_stream.c b/src/t_stream.c index e0af87f97..4ce3a9b25 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -963,7 +963,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end addReplyArrayLen(c,2); addReplyStreamID(c,&id); - addReplyMapLen(c,numfields); + addReplyArrayLen(c,numfields*2); /* Emit the field-value pairs. */ while(numfields--) { From e44a3189392dd948de82dddbdc4c9cc261aa2b42 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 14 Apr 2020 10:52:40 +0200 Subject: [PATCH 02/71] Fix function names in zslDeleteNode() top comment. --- src/t_zset.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/t_zset.c b/src/t_zset.c index ea6f4b848..147313d53 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -186,7 +186,8 @@ zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) { return x; } -/* Internal function used by zslDelete, zslDeleteByScore and zslDeleteByRank */ +/* Internal function used by zslDelete, zslDeleteRangeByScore and + * zslDeleteRangeByRank. */ void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) { int i; for (i = 0; i < zsl->level; i++) { From ebc872725f18e0968ba7a105b3934f0c1491c56d Mon Sep 17 00:00:00 2001 From: hayleeliu Date: Wed, 8 Apr 2020 18:20:32 +0800 Subject: [PATCH 03/71] fix spelling mistake in bitops.c --- src/bitops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bitops.c b/src/bitops.c index f78e4fd34..496d78b66 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -269,7 +269,7 @@ int64_t getSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) { * then zero is returned, otherwise in case of overflow, 1 is returned, * otherwise in case of underflow, -1 is returned. * - * When non-zero is returned (oferflow or underflow), if not NULL, *limit is + * When non-zero is returned (overflow or underflow), if not NULL, *limit is * set to the value the operation should result when an overflow happens, * depending on the specified overflow semantics: * From 37a54d6e96d831da7fe5ed2beaeedf82c0553ed8 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 14 Apr 2020 11:23:44 +0200 Subject: [PATCH 04/71] Fix zsetAdd() top comment spelling. --- src/t_zset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/t_zset.c b/src/t_zset.c index 147313d53..5c000e76f 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -1301,14 +1301,14 @@ int zsetScore(robj *zobj, sds member, double *score) { * none could be set if we re-added an element using the same score it used * to have, or in the case a zero increment is used). * - * The function returns 0 on erorr, currently only when the increment + * The function returns 0 on error, currently only when the increment * produces a NAN condition, or when the 'score' value is NAN since the * start. * - * The commad as a side effect of adding a new element may convert the sorted + * The command as a side effect of adding a new element may convert the sorted * set internal encoding from ziplist to hashtable+skiplist. * - * Memory managemnet of 'ele': + * Memory management of 'ele': * * The function does not take ownership of the 'ele' SDS string, but copies * it if needed. */ From 868ad19b961785c6d5c524c272d984283bcd0502 Mon Sep 17 00:00:00 2001 From: hwware Date: Tue, 14 Apr 2020 00:16:29 -0400 Subject: [PATCH 05/71] fix spelling in acl.c --- src/acl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/acl.c b/src/acl.c index 733988013..6847130ad 100644 --- a/src/acl.c +++ b/src/acl.c @@ -1783,8 +1783,8 @@ void aclCommand(client *c) { long count = 10; /* Number of entries to emit by default. */ /* Parse the only argument that LOG may have: it could be either - * the number of entires the user wants to display, or alternatively - * the "RESET" command in order to flush the old entires. */ + * the number of entries the user wants to display, or alternatively + * the "RESET" command in order to flush the old entries. */ if (c->argc == 3) { if (!strcasecmp(c->argv[2]->ptr,"reset")) { listSetFreeMethod(ACLLog,ACLFreeLogEntry); From d1a7b6998150c5222f450544ee0e9614f4bd9310 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 Apr 2020 15:59:52 +0200 Subject: [PATCH 06/71] Fix HELLO reply in Sentinel mode, see #6160. --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 654fda517..3dd6bf5da 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2460,7 +2460,7 @@ void helloCommand(client *c) { addReplyBulkCString(c,"mode"); if (server.sentinel_mode) addReplyBulkCString(c,"sentinel"); - if (server.cluster_enabled) addReplyBulkCString(c,"cluster"); + else if (server.cluster_enabled) addReplyBulkCString(c,"cluster"); else addReplyBulkCString(c,"standalone"); if (!server.sentinel_mode) { From 157906ca8d824762fef344e0bc28614e5f557078 Mon Sep 17 00:00:00 2001 From: Guy Benoish Date: Sat, 11 Apr 2020 15:05:01 +0300 Subject: [PATCH 07/71] Typo in getTimeoutFromObjectOrReply's error reply --- src/timeout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/timeout.c b/src/timeout.c index bb5999418..7787a049f 100644 --- a/src/timeout.c +++ b/src/timeout.c @@ -166,7 +166,7 @@ int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int if (unit == UNIT_SECONDS) { if (getLongDoubleFromObjectOrReply(c,object,&ftval, - "timeout is not an float or out of range") != C_OK) + "timeout is not a float or out of range") != C_OK) return C_ERR; tval = (long long) (ftval * 1000.0); } else { From ec68525829507b0ddcd4c480b908e6b2c9dfe22c Mon Sep 17 00:00:00 2001 From: liumiuyong Date: Thu, 9 Apr 2020 17:48:29 +0800 Subject: [PATCH 08/71] FIX: truncate max/min longitude,latitude related geo_point (ex: {180, 85.05112878} ) --- src/geohash.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/geohash.c b/src/geohash.c index db5ae025a..de9620b7a 100644 --- a/src/geohash.c +++ b/src/geohash.c @@ -206,7 +206,11 @@ int geohashDecodeWGS84(const GeoHashBits hash, GeoHashArea *area) { int geohashDecodeAreaToLongLat(const GeoHashArea *area, double *xy) { if (!xy) return 0; xy[0] = (area->longitude.min + area->longitude.max) / 2; + if (xy[0] > GEO_LONG_MAX) xy[0] = GEO_LONG_MAX; + if (xy[0] < GEO_LONG_MIN) xy[0] = GEO_LONG_MIN; xy[1] = (area->latitude.min + area->latitude.max) / 2; + if (xy[1] > GEO_LAT_MAX) xy[1] = GEO_LAT_MAX; + if (xy[1] < GEO_LAT_MIN) xy[1] = GEO_LAT_MIN; return 1; } From 577b4b6908514afed34d875e703d2edbcce5d405 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 Apr 2020 16:12:06 +0200 Subject: [PATCH 09/71] Don't allow empty spaces in ACL key patterns. Fixes issue #6418. --- src/acl.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/acl.c b/src/acl.c index 6847130ad..a5e35c4d1 100644 --- a/src/acl.c +++ b/src/acl.c @@ -30,6 +30,7 @@ #include "server.h" #include "sha256.h" #include +#include /* ============================================================================= * Global state for ACLs @@ -690,7 +691,8 @@ void ACLAddAllowedSubcommand(user *u, unsigned long id, const char *sub) { * * When an error is returned, errno is set to the following values: * - * EINVAL: The specified opcode is not understood. + * EINVAL: The specified opcode is not understood or the key pattern is + * invalid (contains non allowed characters). * ENOENT: The command name or command category provided with + or - is not * known. * EBUSY: The subcommand you want to add is about a command that is currently @@ -789,6 +791,15 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) { errno = EEXIST; return C_ERR; } + /* Validate the pattern: no spaces nor null characters + * are allowed, for simpler rewriting of the ACLs without + * using quoting. */ + for (int i = 1; i < oplen; i++) { + if (isspace(op[i]) || op[i] == 0) { + errno = EINVAL; + return C_ERR; + } + } sds newpat = sdsnewlen(op+1,oplen-1); listNode *ln = listSearchKey(u->patterns,newpat); /* Avoid re-adding the same pattern multiple times. */ From 9674ad957950f3efbbdb185df158a61e77b65a45 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 Apr 2020 16:39:42 +0200 Subject: [PATCH 10/71] Don't allow empty spaces in ACL usernames. Fixes issue #6418. --- src/acl.c | 44 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/acl.c b/src/acl.c index a5e35c4d1..75b954c5e 100644 --- a/src/acl.c +++ b/src/acl.c @@ -170,6 +170,18 @@ sds ACLHashPassword(unsigned char *cleartext, size_t len) { * Low level ACL API * ==========================================================================*/ +/* Return 1 if the specified string contains spaces or null characters. + * We do this for usernames and key patterns for simpler rewriting of + * ACL rules, presentation on ACL list, and to avoid subtle security bugs + * that may arise from parsing the rules in presence of escapes. + * The function returns 0 if the string has no spaces. */ +int ACLStringHasSpaces(const char *s, size_t len) { + for (size_t i = 0; i < len; i++) { + if (isspace(s[i]) || s[i] == 0) return 1; + } + return 0; +} + /* Given the category name the command returns the corresponding flag, or * zero if there is no match. */ uint64_t ACLGetCommandCategoryFlagByName(const char *name) { @@ -791,14 +803,9 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) { errno = EEXIST; return C_ERR; } - /* Validate the pattern: no spaces nor null characters - * are allowed, for simpler rewriting of the ACLs without - * using quoting. */ - for (int i = 1; i < oplen; i++) { - if (isspace(op[i]) || op[i] == 0) { - errno = EINVAL; - return C_ERR; - } + if (ACLStringHasSpaces(op+1,oplen-1)) { + errno = EINVAL; + return C_ERR; } sds newpat = sdsnewlen(op+1,oplen-1); listNode *ln = listSearchKey(u->patterns,newpat); @@ -1175,6 +1182,12 @@ int ACLLoadConfiguredUsers(void) { while ((ln = listNext(&li)) != NULL) { sds *aclrules = listNodeValue(ln); sds username = aclrules[0]; + + if (ACLStringHasSpaces(aclrules[0],sdslen(aclrules[0]))) { + serverLog(LL_WARNING,"Spaces not allowed in ACL usernames"); + return C_ERR; + } + user *u = ACLCreateUser(username,sdslen(username)); if (!u) { u = ACLGetUserByName(username,sdslen(username)); @@ -1300,6 +1313,14 @@ sds ACLLoadFromFile(const char *filename) { continue; } + /* Spaces are not allowed in usernames. */ + if (ACLStringHasSpaces(argv[1],sdslen(argv[1]))) { + errors = sdscatprintf(errors, + "'%s:%d: username '%s' contains invalid characters. ", + server.acl_filename, linenum, argv[1]); + continue; + } + /* Try to process the line using the fake user to validate iif * the rules are able to apply cleanly. */ ACLSetUser(fakeuser,"reset",-1); @@ -1609,6 +1630,13 @@ void aclCommand(client *c) { char *sub = c->argv[1]->ptr; if (!strcasecmp(sub,"setuser") && c->argc >= 3) { sds username = c->argv[2]->ptr; + /* Check username validity. */ + if (ACLStringHasSpaces(username,sdslen(username))) { + addReplyErrorFormat(c, + "Usernames can't contain spaces or null characters"); + return; + } + /* Create a temporary user to validate and stage all changes against * before applying to an existing user or creating a new user. If all * arguments are valid the user parameters will all be applied together. From 2dac74ffb417bd18af65787791a077f3d82d9f0a Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 16 Apr 2020 11:21:52 +0200 Subject: [PATCH 11/71] RESP3: fix HELLO map len in Sentinel mode. See #6160. --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 3dd6bf5da..8f3d79170 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2444,7 +2444,7 @@ void helloCommand(client *c) { /* Let's switch to the specified RESP mode. */ c->resp = ver; - addReplyMapLen(c,7); + addReplyMapLen(c,6 + !server.sentinel_mode); addReplyBulkCString(c,"server"); addReplyBulkCString(c,"redis"); From 151fae1f234dab8a6ac0816fc13c28214efe893d Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 16 Apr 2020 16:08:37 +0200 Subject: [PATCH 12/71] Update SDS to latest version. --- src/sds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sds.c b/src/sds.c index 98bd2e77f..118971621 100644 --- a/src/sds.c +++ b/src/sds.c @@ -97,11 +97,11 @@ sds sdsnewlen(const void *init, size_t initlen) { unsigned char *fp; /* flags pointer. */ sh = s_malloc(hdrlen+initlen+1); + if (sh == NULL) return NULL; if (init==SDS_NOINIT) init = NULL; else if (!init) memset(sh, 0, hdrlen+initlen+1); - if (sh == NULL) return NULL; s = (char*)sh+hdrlen; fp = ((unsigned char*)s)-1; switch(type) { From b3ff2337b9bd21d8edd70e4b768a6f77c451fb84 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 16 Apr 2020 16:18:02 +0200 Subject: [PATCH 13/71] Redis 6.0-RC4. --- 00-RELEASENOTES | 269 ++++++++++++++++++++++++++++++++++++++++++++++++ src/version.h | 2 +- 2 files changed, 270 insertions(+), 1 deletion(-) diff --git a/00-RELEASENOTES b/00-RELEASENOTES index 290158efb..223e6e66c 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -11,6 +11,275 @@ CRITICAL: There is a critical bug affecting MOST USERS. Upgrade ASAP. SECURITY: There are security fixes in the release. -------------------------------------------------------------------------------- +================================================================================ +Redis 6.0-rc4 Released Thu Apr 16 16:10:35 CEST 2020 +================================================================================ + +Upgrade urgency LOW: If you are using RC3 without issues, don't rush. + +Hi all, this the latest release candidate of Redis 6. This is likely to +be very similar to what you'll see in Redis 6 GA. Please test it and +report any issue :-) + +Main changes in this release: + + * Big INFO speedup when using a lot of of clients. + * Big speedup on all the blocking commands: now blocking + on the same key is O(1) instead of being O(N). + * Stale replicas now allow MULTI/EXEC. + * New command: LCS (Longest Common Subsequence). + * Add a new configuration to make DEL like UNLINK. + * RDB loading speedup. + * Many bugs fixed (see the commit messages at the end of this node) + +See you in 14 days for Redis 6 GA. + +List of commits: + +antirez in commit 9f594e243: + Update SDS to latest version. + 1 file changed, 1 insertion(+), 1 deletion(-) + +antirez in commit 48781dd95: + RESP3: fix HELLO map len in Sentinel mode. + 1 file changed, 1 insertion(+), 1 deletion(-) + +antirez in commit 371ab0cff: + Don't allow empty spaces in ACL usernames. + 1 file changed, 36 insertions(+), 8 deletions(-) + +antirez in commit b86140ac5: + Don't allow empty spaces in ACL key patterns. + 1 file changed, 12 insertions(+), 1 deletion(-) + +liumiuyong in commit a7ee3c3e7: + FIX: truncate max/min longitude,latitude related geo_point (ex: {180, 85.05112878} ) + 1 file changed, 4 insertions(+) + +Guy Benoish in commit e5b9eb817: + Typo in getTimeoutFromObjectOrReply's error reply + 1 file changed, 1 insertion(+), 1 deletion(-) + +antirez in commit 0f31bb5c1: + Fix HELLO reply in Sentinel mode, see #6160. + 1 file changed, 1 insertion(+), 1 deletion(-) + +hwware in commit b92d9a895: + fix spelling in acl.c + 1 file changed, 2 insertions(+), 2 deletions(-) + +antirez in commit 8f896e57a: + Fix zsetAdd() top comment spelling. + 1 file changed, 3 insertions(+), 3 deletions(-) + +hayleeliu in commit 8f5157058: + fix spelling mistake in bitops.c + 1 file changed, 1 insertion(+), 1 deletion(-) + +antirez in commit ddeda9ceb: + Fix function names in zslDeleteNode() top comment. + 1 file changed, 2 insertions(+), 1 deletion(-) + +antirez in commit bde1f0a8e: + RESP3: change streams items from maps to arrays. + 1 file changed, 1 insertion(+), 1 deletion(-) + +antirez in commit bec68bff2: + Use the special static refcount for stack objects. + 1 file changed, 1 insertion(+), 1 deletion(-) + +antirez in commit 0f239e51b: + RDB: refactor some RDB loading code into dbAddRDBLoad(). + 3 files changed, 22 insertions(+), 4 deletions(-) + +antirez in commit f855db61b: + incrRefCount(): abort on statically allocated object. + 2 files changed, 12 insertions(+), 2 deletions(-) + +antirez in commit 23094ba01: + More powerful DEBUG RELOAD. + 3 files changed, 55 insertions(+), 16 deletions(-) + +antirez in commit 8161a7a3e: + RDB: clarify a condition in rdbLoadRio(). + 2 files changed, 9 insertions(+), 2 deletions(-) + +antirez in commit 61b153073: + RDB: load files faster avoiding useless free+realloc. + 7 files changed, 40 insertions(+), 28 deletions(-) + +antirez in commit 414debfd0: + Speedup: unblock clients on keys in O(1). + 4 files changed, 50 insertions(+), 23 deletions(-) + +antirez in commit cbcd07777: + Fix ACL HELP table missing comma. + 1 file changed, 12 insertions(+), 12 deletions(-) + +mymilkbottles in commit 2437455f2: + Judge the log level in advance + 1 file changed, 1 insertion(+) + +antirez in commit 35c64b898: + Speedup INFO by counting client memory incrementally. + 4 files changed, 52 insertions(+), 26 deletions(-) + +qetu3790 in commit c3ac71748: + fix comments about RESIZE DB opcode in rdb.c + 1 file changed, 1 insertion(+), 4 deletions(-) + +antirez in commit c8dbcff9d: + Clarify redis.conf comment about lazyfree-lazy-user-del. + 1 file changed, 9 insertions(+), 5 deletions(-) + +zhaozhao.zz in commit abd5156f2: + lazyfree: add a new configuration lazyfree-lazy-user-del + 4 files changed, 7 insertions(+), 2 deletions(-) + +antirez in commit 5719b3054: + LCS: more tests. + 1 file changed, 8 insertions(+) + +antirez in commit c89e1f293: + LCS: allow KEYS / STRINGS to be anywhere. + 1 file changed, 6 deletions(-) + +antirez in commit 0b16f8d44: + LCS tests. + 1 file changed, 22 insertions(+) + +antirez in commit 9254a805d: + LCS: get rid of STOREIDX option. Fix get keys helper. + 2 files changed, 20 insertions(+), 21 deletions(-) + +antirez in commit a4c490703: + LCS: fix stale comment. + 1 file changed, 1 insertion(+), 1 deletion(-) + +antirez in commit cb92c23de: + LCS: output LCS len as well in IDX mode. + 1 file changed, 6 insertions(+), 1 deletion(-) + +antirez in commit 56a52e804: + LCS: MINMATCHLEN and WITHMATCHLEN options. + 1 file changed, 24 insertions(+), 11 deletions(-) + +antirez in commit ebb09a5c3: + LCS: 7x speedup by accessing the array with better locality. + 1 file changed, 1 insertion(+), 1 deletion(-) + +antirez in commit a9f8a8cba: + LCS: implement KEYS option. + 1 file changed, 18 insertions(+), 2 deletions(-) + +antirez in commit 4aa24e62a: + LCS: other fixes to range emission. + 1 file changed, 20 insertions(+), 16 deletions(-) + +antirez in commit 2b67b6b87: + LCS: fix emission of last range starting at index 0. + 1 file changed, 1 insertion(+), 1 deletion(-) + +antirez in commit 420aac727: + LCS: implement range indexes option. + 1 file changed, 59 insertions(+), 9 deletions(-) + +antirez in commit a518a9a76: + LCS: initial functionality implemented. + 4 files changed, 156 insertions(+), 1 deletion(-) + +srzhao in commit 026cc11b0: + Check OOM at script start to get stable lua OOM state. + 3 files changed, 11 insertions(+), 4 deletions(-) + +Oran Agra in commit 02b594f6a: + diffrent fix for runtest --host --port + 2 files changed, 13 insertions(+), 13 deletions(-) + +Guy Benoish in commit f695d1830: + Try to fix time-sensitive tests in blockonkey.tcl + 1 file changed, 54 insertions(+), 1 deletion(-) + +Guy Benoish in commit 0e42cfc36: + Use __attribute__ only if __GNUC__ is defined + 1 file changed, 12 insertions(+), 3 deletions(-) + +Guy Benoish in commit 91ed9b3c4: + Modules: Perform printf-like format checks in variadic API + 1 file changed, 3 insertions(+), 3 deletions(-) + +Valentino Geron in commit 3e0d20962: + XREAD and XREADGROUP should not be allowed from scripts when BLOCK option is being used + 3 files changed, 18 insertions(+), 2 deletions(-) + +Guy Benoish in commit 240094c9b: + Stale replica should allow MULTI/EXEC + 1 file changed, 3 insertions(+), 3 deletions(-) + +Xudong Zhang in commit 209f3a1eb: + fix integer overflow + 1 file changed, 2 insertions(+), 2 deletions(-) + +Guy Benoish in commit 024c380b9: + Fix no-negative-zero test + 1 file changed, 1 insertion(+) + +Oran Agra in commit a38ff404b: + modules don't signalModifiedKey in setKey() since that's done (optionally) in RM_CloseKey + 4 files changed, 8 insertions(+), 8 deletions(-) + +Oran Agra in commit 814874d68: + change CI to build and run the module api tests + 1 file changed, 2 insertions(+) + +Oran Agra in commit 061616c1b: + fix possible warning on incomplete struct init + 1 file changed, 1 insertion(+), 1 deletion(-) + +Guy Benoish in commit 7764996be: + Make sure Redis does not reply with negative zero + 2 files changed, 10 insertions(+) + +Guy Benoish in commit eba28e2ce: + DEBUG OBJECT should pass keyname to module when loading + 3 files changed, 4 insertions(+), 4 deletions(-) + +David Carlier in commit 15c9e79a7: + debug, dump registers on arm too. + 1 file changed, 55 insertions(+), 27 deletions(-) + +hwware in commit cd2b5df97: + fix spelling in cluster.c + 1 file changed, 1 insertion(+), 1 deletion(-) + +Valentino Geron in commit 8cdc153f5: + XACK should be executed in a "all or nothing" fashion. + 2 files changed, 23 insertions(+), 1 deletion(-) + +hwware in commit b35407fa7: + add check for not switching between optin optout mode directly + 1 file changed, 12 insertions(+), 1 deletion(-) + +hwware in commit 4395889c9: + add check for not providing both optin optout flag + 1 file changed, 8 insertions(+) + +Guy Benoish in commit 1907e0f18: + PERSIST should notify a keyspace event + 1 file changed, 1 insertion(+) + +Guy Benoish in commit c35a53169: + streamReplyWithRange: Redundant XSETIDs to replica + 1 file changed, 2 insertions(+), 1 deletion(-) + +antirez in commit 6fe66e096: + Simplify comment in moduleTryServeClientBlockedOnKey(). + 1 file changed, 3 insertions(+), 12 deletions(-) + +Guy Benoish in commit 193fc241c: + Fix memory corruption in moduleHandleBlockedClients + 3 files changed, 149 insertions(+), 46 deletions(-) + ================================================================================ Redis 6.0-rc3 Released Tue Mar 31 17:42:39 CEST 2020 ================================================================================ diff --git a/src/version.h b/src/version.h index 45465341c..c240a174e 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define REDIS_VERSION "5.9.103" +#define REDIS_VERSION "5.9.104" From 0b1b56b6fc9e72403fc36c39ebca1400574f938d Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 17 Apr 2020 12:38:12 +0200 Subject: [PATCH 14/71] Fix XCLAIM propagation in AOF/replicas for blocking XREADGROUP. See issue #7105. --- src/server.c | 9 +++++++-- src/t_stream.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/server.c b/src/server.c index 0afd67514..fc9b87aae 100644 --- a/src/server.c +++ b/src/server.c @@ -3082,8 +3082,13 @@ struct redisCommand *lookupCommandOrOriginal(sds name) { * + PROPAGATE_AOF (propagate into the AOF file if is enabled) * + PROPAGATE_REPL (propagate into the replication link) * - * This should not be used inside commands implementation. Use instead - * alsoPropagate(), preventCommandPropagation(), forceCommandPropagation(). + * This should not be used inside commands implementation since it will not + * wrap the resulting commands in MULTI/EXEC. Use instead alsoPropagate(), + * preventCommandPropagation(), forceCommandPropagation(). + * + * However for functions that need to (also) propagate out of the context of a + * command execution, for example when serving a blocked client, you + * want to use propagate(). */ void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags) diff --git a/src/t_stream.c b/src/t_stream.c index 4ce3a9b25..155167af9 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -848,7 +848,7 @@ void streamPropagateXCLAIM(client *c, robj *key, streamCG *group, robj *groupnam argv[11] = createStringObject("JUSTID",6); argv[12] = createStringObject("LASTID",6); argv[13] = createObjectFromStreamID(&group->last_id); - alsoPropagate(server.xclaimCommand,c->db->id,argv,14,PROPAGATE_AOF|PROPAGATE_REPL); + propagate(server.xclaimCommand,c->db->id,argv,14,PROPAGATE_AOF|PROPAGATE_REPL); decrRefCount(argv[0]); decrRefCount(argv[3]); decrRefCount(argv[4]); From 63f039f122f3a81729e7a609885124229cbc1473 Mon Sep 17 00:00:00 2001 From: Jamie Scott Date: Sun, 12 Apr 2020 00:10:19 -0700 Subject: [PATCH 15/71] Adding acllog-max-len to Redis.conf While playing with ACLs I noticed that acllog-max-len wasn't in the redis.conf, but was a supported config. This PR documents and adds the directive to the redis.conf file. --- redis.conf | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/redis.conf b/redis.conf index 5baeae65f..fab0a5898 100644 --- a/redis.conf +++ b/redis.conf @@ -737,6 +737,15 @@ replica-priority 100 # For more information about ACL configuration please refer to # the Redis web site at https://redis.io/topics/acl +# ACL LOG +# +# The ACL Log tracks failed commands and authentication events associated +# with ACLs. The ACL Log is useful to troubleshoot failed commands blocked +# by ACLs. The ACL Log is stored in and consumes memory. There is no limit +# to its length.You can reclaim memory with ACL LOG RESET or set a maximum +# length below. +acllog-max-len 128 + # Using an external ACL file # # Instead of configuring users here in this file, it is possible to use From f37a1236878e42a7f1d0c55337c1ea8585c06828 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 16 Apr 2020 11:05:03 +0300 Subject: [PATCH 16/71] testsuite run the defrag latency test solo this test is time sensitive and it sometimes fail to pass below the latency threshold, even on strong machines. this test was the reson we're running just 2 parallel tests in the github actions CI, revering this. --- .github/workflows/ci.yml | 4 ++-- tests/test_helper.tcl | 38 ++++++++++++++++++++++++++++++++++++ tests/unit/memefficiency.tcl | 2 ++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3a81d1a08..551fb2d91 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,9 +12,9 @@ jobs: - name: test run: | sudo apt-get install tcl8.5 - ./runtest --clients 2 --verbose + ./runtest --verbose - name: module api test - run: ./runtest-moduleapi --clients 2 --verbose + run: ./runtest-moduleapi --verbose build-ubuntu-old: runs-on: ubuntu-16.04 diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index d80cb6907..11a804bdc 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -82,6 +82,7 @@ set ::skiptests {} set ::allowtags {} set ::only_tests {} set ::single_tests {} +set ::run_solo_tests {} set ::skip_till "" set ::external 0; # If "1" this means, we are running against external instance set ::file ""; # If set, runs only the tests in this comma separated list @@ -112,6 +113,11 @@ proc execute_tests name { send_data_packet $::test_server_fd done "$name" } +proc execute_code {name code} { + eval $code + send_data_packet $::test_server_fd done "$name" +} + # Setup a list to hold a stack of server configs. When calls to start_server # are nested, use "srv 0 pid" to get the pid of the inner server. To access # outer servers, use "srv -1 pid" etcetera. @@ -188,6 +194,18 @@ proc s {args} { status [srv $level "client"] [lindex $args 0] } +# Test wrapped into run_solo are sent back from the client to the +# test server, so that the test server will send them again to +# clients once the clients are idle. +proc run_solo {name code} { + if {$::numclients == 1 || $::loop || $::external} { + # run_solo is not supported in these scenarios, just run the code. + eval $code + return + } + send_data_packet $::test_server_fd run_solo [list $name $code] +} + proc cleanup {} { if {$::dont_clean} { return @@ -337,6 +355,8 @@ proc read_from_test_client fd { } elseif {$status eq {server-killed}} { set ::active_servers [lsearch -all -inline -not -exact $::active_servers $data] set ::active_clients_task($fd) "(KILLED SERVER) pid:$data" + } elseif {$status eq {run_solo}} { + lappend ::run_solo_tests $data } else { if {!$::quiet} { puts "\[$status\]: $data" @@ -369,6 +389,13 @@ proc force_kill_all_servers {} { } } +proc lpop {listVar {count 1}} { + upvar 1 $listVar l + set ele [lindex $l 0] + set l [lrange $l 1 end] + set ele +} + # A new client is idle. Remove it from the list of active clients and # if there are still test units to run, launch them. proc signal_idle_client fd { @@ -389,6 +416,14 @@ proc signal_idle_client fd { if {$::loop && $::next_test == [llength $::all_tests]} { set ::next_test 0 } + } elseif {[llength $::run_solo_tests] != 0 && [llength $::active_clients] == 0} { + if {!$::quiet} { + puts [colorstr bold-white "Testing solo test"] + set ::active_clients_task($fd) "ASSIGNED: $fd solo test" + } + set ::clients_start_time($fd) [clock seconds] + send_data_packet $fd run_code [lpop ::run_solo_tests] + lappend ::active_clients $fd } else { lappend ::idle_clients $fd set ::active_clients_task($fd) "SLEEPING, no more units to assign" @@ -433,6 +468,9 @@ proc test_client_main server_port { foreach {cmd data} $payload break if {$cmd eq {run}} { execute_tests $data + } elseif {$cmd eq {run_code}} { + foreach {name code} $data break + execute_code $name $code } else { error "Unknown test client command: $cmd" } diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index 06b0e07d7..777693fdf 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -36,6 +36,7 @@ start_server {tags {"memefficiency"}} { } } +run_solo {defrag} { start_server {tags {"defrag"}} { if {[string match {*jemalloc*} [s mem_allocator]]} { test "Active defrag" { @@ -328,3 +329,4 @@ start_server {tags {"defrag"}} { } {1} } } +} ;# run_solo From d4d2d24745f4153912e9c308fd492f8d5f0b8181 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 17 Apr 2020 10:51:12 +0200 Subject: [PATCH 17/71] A few comments and name changes for #7103. --- tests/test_helper.tcl | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 11a804bdc..3eee1aeb7 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -106,14 +106,23 @@ set ::tlsdir "tests/tls" set ::client 0 set ::numclients 16 -proc execute_tests name { +# This function is called by one of the test clients when it receives +# a "run" command from the server, with a filename as data. +# It will run the specified test source file and signal it to the +# test server when finished. +proc execute_test_file name { set path "tests/$name.tcl" set ::curfile $path source $path send_data_packet $::test_server_fd done "$name" } -proc execute_code {name code} { +# This function is called by one of the test clients when it receives +# a "run_code" command from the server, with a verbatim test source code +# as argument, and an associated name. +# It will run the specified code and signal it to the test server when +# finished. +proc execute_test_code {name code} { eval $code send_data_packet $::test_server_fd done "$name" } @@ -467,10 +476,10 @@ proc test_client_main server_port { set payload [read $::test_server_fd $bytes] foreach {cmd data} $payload break if {$cmd eq {run}} { - execute_tests $data + execute_test_file $data } elseif {$cmd eq {run_code}} { foreach {name code} $data break - execute_code $name $code + execute_test_code $name $code } else { error "Unknown test client command: $cmd" } From b949f7c173f972157a8f279fdcab287d0c217fc1 Mon Sep 17 00:00:00 2001 From: omg-by <504094596@qq.com> Date: Sat, 18 Apr 2020 00:49:16 +0800 Subject: [PATCH 18/71] fix(sentinel): sentinel.running_scripts will always increase more times and not reset when trigger a always fail scripts, sentinel.running_scripts will increase ten times, however it only decrease one times onretry the maximum. and it will't reset, when it become SENTINEL_SCRIPT_MAX_RUNNING, sentinel don't trigger scripts. --- src/sentinel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sentinel.c b/src/sentinel.c index d091bf230..204a8b317 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -860,6 +860,7 @@ void sentinelCollectTerminatedScripts(void) { sj->pid = 0; sj->start_time = mstime() + sentinelScriptRetryDelay(sj->retry_num); + sentinel.running_scripts--; } else { /* Otherwise let's remove the script, but log the event if the * execution did not terminated in the best of the ways. */ From ac512f292847175a52cc8320cee4484360bdbc2c Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 20 Apr 2020 11:52:29 +0200 Subject: [PATCH 19/71] Sentinel: small refactoring of sentinelCollectTerminatedScripts(). Related to #7113. --- src/sentinel.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sentinel.c b/src/sentinel.c index 204a8b317..fb504ae4d 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -860,7 +860,6 @@ void sentinelCollectTerminatedScripts(void) { sj->pid = 0; sj->start_time = mstime() + sentinelScriptRetryDelay(sj->retry_num); - sentinel.running_scripts--; } else { /* Otherwise let's remove the script, but log the event if the * execution did not terminated in the best of the ways. */ @@ -870,8 +869,8 @@ void sentinelCollectTerminatedScripts(void) { } listDelNode(sentinel.scripts_queue,ln); sentinelReleaseScriptJob(sj); - sentinel.running_scripts--; } + sentinel.running_scripts--; } } From ff889d2c8a8d732a0c18c69ab158a1e56c5f3ad1 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Mon, 13 Apr 2020 09:58:35 +0800 Subject: [PATCH 20/71] Threaded IO: set thread name for redis-server Set thread name for each thread of redis-server, this helps us to monitor the utilization and optimise the performance. And suggested-by Salvatore, implement this feature for multi platforms. Currently support linux and bsd, ignore other OS. An exmaple on Linux: # top -d 5 -p `pidof redis-server ` -H PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 3682671 root 20 0 227744 8248 3836 R 99.2 0.0 0:19.53 redis-server 3682677 root 20 0 227744 8248 3836 S 26.4 0.0 0:04.15 io_thd_3 3682675 root 20 0 227744 8248 3836 S 23.6 0.0 0:03.98 io_thd_1 3682676 root 20 0 227744 8248 3836 S 23.6 0.0 0:03.97 io_thd_2 3682672 root 20 0 227744 8248 3836 S 0.2 0.0 0:00.02 bio_close_file 3682673 root 20 0 227744 8248 3836 S 0.2 0.0 0:00.02 bio_aof_fsync 3682674 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 bio_lazy_free 3682678 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd 3682682 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd 3682683 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd 3682684 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd 3682685 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd 3682687 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd Another exmaple on FreeBSD-12.1: PID USERNAME PRI NICE SIZE RES STATE C TIME WCPU COMMAND 5212 root 100 0 48M 7280K CPU2 2 0:26 99.52% redis-server{redis-server} 5212 root 38 0 48M 7280K umtxn 4 0:06 26.94% redis-server{io_thd_3} 5212 root 36 0 48M 7280K umtxn 6 0:06 26.84% redis-server{io_thd_1} 5212 root 39 0 48M 7280K umtxn 1 0:06 25.30% redis-server{io_thd_2} 5212 root 20 0 48M 7280K uwait 3 0:00 0.00% redis-server{redis-server} 5212 root 21 0 48M 7280K uwait 2 0:00 0.00% redis-server{bio_close_file} 5212 root 21 0 48M 7280K uwait 3 0:00 0.00% redis-server{bio_aof_fsync} 5212 root 21 0 48M 7280K uwait 0 0:00 0.00% redis-server{bio_lazy_free} Signed-off-by: zhenwei pi --- src/bio.c | 12 ++++++++++++ src/config.h | 12 ++++++++++++ src/networking.c | 4 ++++ 3 files changed, 28 insertions(+) diff --git a/src/bio.c b/src/bio.c index 2af684570..0662c8c4c 100644 --- a/src/bio.c +++ b/src/bio.c @@ -154,6 +154,18 @@ void *bioProcessBackgroundJobs(void *arg) { return NULL; } + switch (type) { + case BIO_CLOSE_FILE: + redis_set_thread_title("bio_close_file"); + break; + case BIO_AOF_FSYNC: + redis_set_thread_title("bio_aof_fsync"); + break; + case BIO_LAZY_FREE: + redis_set_thread_title("bio_lazy_free"); + break; + } + /* Make the thread killable at any time, so that bioKillThreads() * can work reliably. */ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); diff --git a/src/config.h b/src/config.h index efa9d11f2..82dc201d3 100644 --- a/src/config.h +++ b/src/config.h @@ -226,4 +226,16 @@ void setproctitle(const char *fmt, ...); #define USE_ALIGNED_ACCESS #endif +/* Define for redis_set_thread_title */ +#ifdef __linux__ +#define redis_set_thread_title(name) pthread_setname_np(pthread_self(), name) +#else +#if (defined __NetBSD__ || defined __FreeBSD__ || defined __OpenBSD__) +#include +#define redis_set_thread_title(name) pthread_set_name_np(pthread_self(), name) +#else +#define redis_set_thread_title(name) +#endif +#endif + #endif diff --git a/src/networking.c b/src/networking.c index 8f3d79170..1f5d0bd5d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2821,6 +2821,10 @@ void *IOThreadMain(void *myid) { /* The ID is the thread number (from 0 to server.iothreads_num-1), and is * used by the thread to just manipulate a single sub-array of clients. */ long id = (unsigned long)myid; + char thdname[16]; + + snprintf(thdname, sizeof(thdname), "io_thd_%ld", id); + redis_set_thread_title(thdname); while(1) { /* Wait for start */ From 14758cdc63450d40b608c4d4bfcc3e0f22b38d7e Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 20 Apr 2020 12:17:11 +0200 Subject: [PATCH 21/71] Implement redis_set_thread_title for MacOS. Strange enough, pthread_setname_np() produces a warning for not defined function even if pthread is included. Moreover the MacOS documentation claims the return value for the function is void, but actually is int. Related to #7089. --- src/config.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/config.h b/src/config.h index 82dc201d3..40dc683ce 100644 --- a/src/config.h +++ b/src/config.h @@ -234,8 +234,14 @@ void setproctitle(const char *fmt, ...); #include #define redis_set_thread_title(name) pthread_set_name_np(pthread_self(), name) #else +#if (defined __APPLE__ && defined(MAC_OS_X_VERSION_10_7)) +int pthread_setname_np(const char *name); +#include +#define redis_set_thread_title(name) pthread_setname_np(name) +#else #define redis_set_thread_title(name) #endif #endif +#endif #endif From f066273907c50e0d809e7972666738246fb24650 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 21 Apr 2020 10:51:46 +0200 Subject: [PATCH 22/71] Tracking: NOLOOP internals implementation. --- src/bitops.c | 8 ++-- src/cluster.c | 4 +- src/db.c | 30 +++++++----- src/debug.c | 2 +- src/expire.c | 6 +-- src/geo.c | 4 +- src/hyperloglog.c | 6 +-- src/module.c | 10 ++-- src/server.h | 12 +++-- src/sort.c | 4 +- src/t_hash.c | 10 ++-- src/t_list.c | 20 ++++---- src/t_set.c | 20 ++++---- src/t_stream.c | 6 +-- src/t_string.c | 14 +++--- src/t_zset.c | 12 ++--- src/tracking.c | 118 ++++++++++++++++++++++++++++++++++------------ 17 files changed, 174 insertions(+), 112 deletions(-) diff --git a/src/bitops.c b/src/bitops.c index 496d78b66..f506a881b 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -554,7 +554,7 @@ void setbitCommand(client *c) { byteval &= ~(1 << bit); byteval |= ((on & 0x1) << bit); ((uint8_t*)o->ptr)[byte] = byteval; - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id); server.dirty++; addReply(c, bitval ? shared.cone : shared.czero); @@ -754,11 +754,11 @@ void bitopCommand(client *c) { /* Store the computed value into the target key */ if (maxlen) { o = createObject(OBJ_STRING,res); - setKey(c->db,targetkey,o); + setKey(c,c->db,targetkey,o); notifyKeyspaceEvent(NOTIFY_STRING,"set",targetkey,c->db->id); decrRefCount(o); } else if (dbDelete(c->db,targetkey)) { - signalModifiedKey(c->db,targetkey); + signalModifiedKey(c,c->db,targetkey); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",targetkey,c->db->id); } server.dirty++; @@ -1135,7 +1135,7 @@ void bitfieldGeneric(client *c, int flags) { } if (changes) { - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id); server.dirty += changes; } diff --git a/src/cluster.c b/src/cluster.c index 2377b386b..b103e2fe1 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4982,7 +4982,7 @@ void restoreCommand(client *c) { setExpire(c,c->db,c->argv[1],ttl); } objectSetLRUOrLFU(obj,lfu_freq,lru_idle,lru_clock,1000); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_GENERIC,"restore",c->argv[1],c->db->id); addReply(c,shared.ok); server.dirty++; @@ -5329,7 +5329,7 @@ try_again: if (!copy) { /* No COPY option: remove the local key, signal the change. */ dbDelete(c->db,kv[j]); - signalModifiedKey(c->db,kv[j]); + signalModifiedKey(c,c->db,kv[j]); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",kv[j],c->db->id); server.dirty++; diff --git a/src/db.c b/src/db.c index 59f0cc7a0..dc4a0b63e 100644 --- a/src/db.c +++ b/src/db.c @@ -238,8 +238,10 @@ void dbOverwrite(redisDb *db, robj *key, robj *val) { * 3) The expire time of the key is reset (the key is made persistent), * unless 'keepttl' is true. * - * All the new keys in the database should be created via this interface. */ -void genericSetKey(redisDb *db, robj *key, robj *val, int keepttl, int signal) { + * All the new keys in the database should be created via this interface. + * The client 'c' argument may be set to NULL if the operation is performed + * in a context where there is no clear client performing the operation. */ +void genericSetKey(client *c, redisDb *db, robj *key, robj *val, int keepttl, int signal) { if (lookupKeyWrite(db,key) == NULL) { dbAdd(db,key,val); } else { @@ -247,12 +249,12 @@ void genericSetKey(redisDb *db, robj *key, robj *val, int keepttl, int signal) { } incrRefCount(val); if (!keepttl) removeExpire(db,key); - if (signal) signalModifiedKey(db,key); + if (signal) signalModifiedKey(c,db,key); } /* Common case for genericSetKey() where the TTL is not retained. */ -void setKey(redisDb *db, robj *key, robj *val) { - genericSetKey(db,key,val,0,1); +void setKey(client *c, redisDb *db, robj *key, robj *val) { + genericSetKey(c,db,key,val,0,1); } /* Return true if the specified key exists in the specified database. @@ -467,9 +469,11 @@ long long dbTotalServerKeyCount() { * Every time a DB is flushed the function signalFlushDb() is called. *----------------------------------------------------------------------------*/ -void signalModifiedKey(redisDb *db, robj *key) { +/* Note that the 'c' argument may be NULL if the key was modified out of + * a context of a client. */ +void signalModifiedKey(client *c, redisDb *db, robj *key) { touchWatchedKey(db,key); - trackingInvalidateKey(key); + trackingInvalidateKey(c,key); } void signalFlushedDb(int dbid) { @@ -563,7 +567,7 @@ void delGenericCommand(client *c, int lazy) { int deleted = lazy ? dbAsyncDelete(c->db,c->argv[j]) : dbSyncDelete(c->db,c->argv[j]); if (deleted) { - signalModifiedKey(c->db,c->argv[j]); + signalModifiedKey(c,c->db,c->argv[j]); notifyKeyspaceEvent(NOTIFY_GENERIC, "del",c->argv[j],c->db->id); server.dirty++; @@ -1003,8 +1007,8 @@ void renameGenericCommand(client *c, int nx) { dbAdd(c->db,c->argv[2],o); if (expire != -1) setExpire(c,c->db,c->argv[2],expire); dbDelete(c->db,c->argv[1]); - signalModifiedKey(c->db,c->argv[1]); - signalModifiedKey(c->db,c->argv[2]); + signalModifiedKey(c,c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[2]); notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_from", c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_to", @@ -1072,8 +1076,8 @@ void moveCommand(client *c) { /* OK! key moved, free the entry in the source DB */ dbDelete(src,c->argv[1]); - signalModifiedKey(src,c->argv[1]); - signalModifiedKey(dst,c->argv[1]); + signalModifiedKey(c,src,c->argv[1]); + signalModifiedKey(c,dst,c->argv[1]); notifyKeyspaceEvent(NOTIFY_GENERIC, "move_from",c->argv[1],src->id); notifyKeyspaceEvent(NOTIFY_GENERIC, @@ -1317,7 +1321,7 @@ int expireIfNeeded(redisDb *db, robj *key) { "expired",key,db->id); int retval = server.lazyfree_lazy_expire ? dbAsyncDelete(db,key) : dbSyncDelete(db,key); - if (retval) signalModifiedKey(db,key); + if (retval) signalModifiedKey(NULL,db,key); return retval; } diff --git a/src/debug.c b/src/debug.c index 1351b2536..cbb56cb71 100644 --- a/src/debug.c +++ b/src/debug.c @@ -588,7 +588,7 @@ NULL memcpy(val->ptr, buf, valsize<=buflen? valsize: buflen); } dbAdd(c->db,key,val); - signalModifiedKey(c->db,key); + signalModifiedKey(c,c->db,key); decrRefCount(key); } addReply(c,shared.ok); diff --git a/src/expire.c b/src/expire.c index c102a01ff..30a27193d 100644 --- a/src/expire.c +++ b/src/expire.c @@ -64,7 +64,7 @@ int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) { dbSyncDelete(db,keyobj); notifyKeyspaceEvent(NOTIFY_EXPIRED, "expired",keyobj,db->id); - trackingInvalidateKey(keyobj); + trackingInvalidateKey(NULL,keyobj); decrRefCount(keyobj); server.stat_expiredkeys++; return 1; @@ -519,14 +519,14 @@ void expireGenericCommand(client *c, long long basetime, int unit) { /* Replicate/AOF this as an explicit DEL or UNLINK. */ aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del; rewriteClientCommandVector(c,2,aux,key); - signalModifiedKey(c->db,key); + signalModifiedKey(c,c->db,key); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id); addReply(c, shared.cone); return; } else { setExpire(c,c->db,key,when); addReply(c,shared.cone); - signalModifiedKey(c->db,key); + signalModifiedKey(c,c->db,key); notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id); server.dirty++; return; diff --git a/src/geo.c b/src/geo.c index f7920a2e2..3e5d5f606 100644 --- a/src/geo.c +++ b/src/geo.c @@ -657,13 +657,13 @@ void georadiusGeneric(client *c, int flags) { if (returned_items) { zsetConvertToZiplistIfNeeded(zobj,maxelelen); - setKey(c->db,storekey,zobj); + setKey(c,c->db,storekey,zobj); decrRefCount(zobj); notifyKeyspaceEvent(NOTIFY_ZSET,"georadiusstore",storekey, c->db->id); server.dirty += returned_items; } else if (dbDelete(c->db,storekey)) { - signalModifiedKey(c->db,storekey); + signalModifiedKey(c,c->db,storekey); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",storekey,c->db->id); server.dirty++; } diff --git a/src/hyperloglog.c b/src/hyperloglog.c index facd99743..721f492a1 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1209,7 +1209,7 @@ void pfaddCommand(client *c) { } hdr = o->ptr; if (updated) { - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STRING,"pfadd",c->argv[1],c->db->id); server.dirty++; HLL_INVALIDATE_CACHE(hdr); @@ -1300,7 +1300,7 @@ void pfcountCommand(client *c) { * data structure is not modified, since the cached value * may be modified and given that the HLL is a Redis string * we need to propagate the change. */ - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); server.dirty++; } addReplyLongLong(c,card); @@ -1373,7 +1373,7 @@ void pfmergeCommand(client *c) { last hllSparseSet() call. */ HLL_INVALIDATE_CACHE(hdr); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); /* We generate a PFADD event for PFMERGE for semantical simplicity * since in theory this is a mass-add of elements. */ notifyKeyspaceEvent(NOTIFY_STRING,"pfadd",c->argv[1],c->db->id); diff --git a/src/module.c b/src/module.c index 4d3d9e1af..e3a338dad 100644 --- a/src/module.c +++ b/src/module.c @@ -896,7 +896,7 @@ void RM_SetModuleOptions(RedisModuleCtx *ctx, int options) { /* Signals that the key is modified from user's perspective (i.e. invalidate WATCH * and client side caching). */ int RM_SignalModifiedKey(RedisModuleCtx *ctx, RedisModuleString *keyname) { - signalModifiedKey(ctx->client->db,keyname); + signalModifiedKey(ctx->client,ctx->client->db,keyname); return REDISMODULE_OK; } @@ -2016,7 +2016,7 @@ void *RM_OpenKey(RedisModuleCtx *ctx, robj *keyname, int mode) { static void moduleCloseKey(RedisModuleKey *key) { int signal = SHOULD_SIGNAL_MODIFIED_KEYS(key->ctx); if ((key->mode & REDISMODULE_WRITE) && signal) - signalModifiedKey(key->db,key->key); + signalModifiedKey(key->ctx->client,key->db,key->key); /* TODO: if (key->iter) RM_KeyIteratorStop(kp); */ RM_ZsetRangeStop(key); decrRefCount(key->key); @@ -2157,7 +2157,7 @@ RedisModuleString *RM_RandomKey(RedisModuleCtx *ctx) { int RM_StringSet(RedisModuleKey *key, RedisModuleString *str) { if (!(key->mode & REDISMODULE_WRITE) || key->iter) return REDISMODULE_ERR; RM_DeleteKey(key); - genericSetKey(key->db,key->key,str,0,0); + genericSetKey(key->ctx->client,key->db,key->key,str,0,0); key->value = str; return REDISMODULE_OK; } @@ -2237,7 +2237,7 @@ int RM_StringTruncate(RedisModuleKey *key, size_t newlen) { if (key->value == NULL) { /* Empty key: create it with the new size. */ robj *o = createObject(OBJ_STRING,sdsnewlen(NULL, newlen)); - genericSetKey(key->db,key->key,o,0,0); + genericSetKey(key->ctx->client,key->db,key->key,o,0,0); key->value = o; decrRefCount(o); } else { @@ -3625,7 +3625,7 @@ int RM_ModuleTypeSetValue(RedisModuleKey *key, moduleType *mt, void *value) { if (!(key->mode & REDISMODULE_WRITE) || key->iter) return REDISMODULE_ERR; RM_DeleteKey(key); robj *o = createModuleObject(mt,value); - genericSetKey(key->db,key->key,o,0,0); + genericSetKey(key->ctx->client,key->db,key->key,o,0,0); decrRefCount(o); key->value = o; return REDISMODULE_OK; diff --git a/src/server.h b/src/server.h index d0d5ff154..d39359dce 100644 --- a/src/server.h +++ b/src/server.h @@ -252,7 +252,9 @@ typedef long long ustime_t; /* microsecond time type. */ #define CLIENT_TRACKING_OPTOUT (1ULL<<35) /* Tracking in opt-out mode. */ #define CLIENT_TRACKING_CACHING (1ULL<<36) /* CACHING yes/no was given, depending on optin/optout mode. */ -#define CLIENT_IN_TO_TABLE (1ULL<<37) /* This client is in the timeout table. */ +#define CLIENT_TRACKING_NOLOOP (1ULL<<37) /* Don't send invalidation messages + about writes performed by myself.*/ +#define CLIENT_IN_TO_TABLE (1ULL<<38) /* This client is in the timeout table. */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -1683,7 +1685,7 @@ void addReplyStatusFormat(client *c, const char *fmt, ...); void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **prefix, size_t numprefix); void disableTracking(client *c); void trackingRememberKeys(client *c); -void trackingInvalidateKey(robj *keyobj); +void trackingInvalidateKey(client *c, robj *keyobj); void trackingInvalidateKeysOnFlush(int dbid); void trackingLimitUsedSlots(void); uint64_t trackingGetTotalItems(void); @@ -2071,8 +2073,8 @@ int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle, void dbAdd(redisDb *db, robj *key, robj *val); int dbAddRDBLoad(redisDb *db, sds key, robj *val); void dbOverwrite(redisDb *db, robj *key, robj *val); -void genericSetKey(redisDb *db, robj *key, robj *val, int keepttl, int signal); -void setKey(redisDb *db, robj *key, robj *val); +void genericSetKey(client *c, redisDb *db, robj *key, robj *val, int keepttl, int signal); +void setKey(client *c, redisDb *db, robj *key, robj *val); int dbExists(redisDb *db, robj *key); robj *dbRandomKey(redisDb *db); int dbSyncDelete(redisDb *db, robj *key); @@ -2088,7 +2090,7 @@ void flushAllDataAndResetRDB(int flags); long long dbTotalServerKeyCount(); int selectDb(client *c, int id); -void signalModifiedKey(redisDb *db, robj *key); +void signalModifiedKey(client *c, redisDb *db, robj *key); void signalFlushedDb(int dbid); unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count); unsigned int countKeysInSlot(unsigned int hashslot); diff --git a/src/sort.c b/src/sort.c index db26da158..f269a7731 100644 --- a/src/sort.c +++ b/src/sort.c @@ -570,12 +570,12 @@ void sortCommand(client *c) { } } if (outputlen) { - setKey(c->db,storekey,sobj); + setKey(c,c->db,storekey,sobj); notifyKeyspaceEvent(NOTIFY_LIST,"sortstore",storekey, c->db->id); server.dirty += outputlen; } else if (dbDelete(c->db,storekey)) { - signalModifiedKey(c->db,storekey); + signalModifiedKey(c,c->db,storekey); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",storekey,c->db->id); server.dirty++; } diff --git a/src/t_hash.c b/src/t_hash.c index b9f0db7fc..866bcd25b 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -521,7 +521,7 @@ void hsetnxCommand(client *c) { } else { hashTypeSet(o,c->argv[2]->ptr,c->argv[3]->ptr,HASH_SET_COPY); addReply(c, shared.cone); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id); server.dirty++; } @@ -551,7 +551,7 @@ void hsetCommand(client *c) { /* HMSET */ addReply(c, shared.ok); } - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id); server.dirty++; } @@ -586,7 +586,7 @@ void hincrbyCommand(client *c) { new = sdsfromlonglong(value); hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE); addReplyLongLong(c,value); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH,"hincrby",c->argv[1],c->db->id); server.dirty++; } @@ -625,7 +625,7 @@ void hincrbyfloatCommand(client *c) { new = sdsnewlen(buf,len); hashTypeSet(o,c->argv[2]->ptr,new,HASH_SET_TAKE_VALUE); addReplyBulkCBuffer(c,buf,len); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH,"hincrbyfloat",c->argv[1],c->db->id); server.dirty++; @@ -721,7 +721,7 @@ void hdelCommand(client *c) { } } if (deleted) { - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_HASH,"hdel",c->argv[1],c->db->id); if (keyremoved) notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1], diff --git a/src/t_list.c b/src/t_list.c index eaeaa8e48..4770a2272 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -217,7 +217,7 @@ void pushGenericCommand(client *c, int where) { if (pushed) { char *event = (where == LIST_HEAD) ? "lpush" : "rpush"; - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id); } server.dirty += pushed; @@ -247,7 +247,7 @@ void pushxGenericCommand(client *c, int where) { if (pushed) { char *event = (where == LIST_HEAD) ? "lpush" : "rpush"; - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id); } server.dirty += pushed; @@ -292,7 +292,7 @@ void linsertCommand(client *c) { listTypeReleaseIterator(iter); if (inserted) { - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_LIST,"linsert", c->argv[1],c->db->id); server.dirty++; @@ -355,7 +355,7 @@ void lsetCommand(client *c) { addReply(c,shared.outofrangeerr); } else { addReply(c,shared.ok); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_LIST,"lset",c->argv[1],c->db->id); server.dirty++; } @@ -382,7 +382,7 @@ void popGenericCommand(client *c, int where) { c->argv[1],c->db->id); dbDelete(c->db,c->argv[1]); } - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); server.dirty++; } } @@ -482,7 +482,7 @@ void ltrimCommand(client *c) { dbDelete(c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id); } - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); server.dirty++; addReply(c,shared.ok); } @@ -519,7 +519,7 @@ void lremCommand(client *c) { listTypeReleaseIterator(li); if (removed) { - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_LIST,"lrem",c->argv[1],c->db->id); } @@ -555,7 +555,7 @@ void rpoplpushHandlePush(client *c, robj *dstkey, robj *dstobj, robj *value) { server.list_compress_depth); dbAdd(c->db,dstkey,dstobj); } - signalModifiedKey(c->db,dstkey); + signalModifiedKey(c,c->db,dstkey); listTypePush(dstobj,value,LIST_HEAD); notifyKeyspaceEvent(NOTIFY_LIST,"lpush",dstkey,c->db->id); /* Always send the pushed value to the client. */ @@ -593,7 +593,7 @@ void rpoplpushCommand(client *c) { notifyKeyspaceEvent(NOTIFY_GENERIC,"del", touchedkey,c->db->id); } - signalModifiedKey(c->db,touchedkey); + signalModifiedKey(c,c->db,touchedkey); decrRefCount(touchedkey); server.dirty++; if (c->cmd->proc == brpoplpushCommand) { @@ -708,7 +708,7 @@ void blockingPopGenericCommand(client *c, int where) { notifyKeyspaceEvent(NOTIFY_GENERIC,"del", c->argv[j],c->db->id); } - signalModifiedKey(c->db,c->argv[j]); + signalModifiedKey(c,c->db,c->argv[j]); server.dirty++; /* Replicate it as an [LR]POP instead of B[LR]POP. */ diff --git a/src/t_set.c b/src/t_set.c index 60cf22d8c..c2e73a6e6 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -280,7 +280,7 @@ void saddCommand(client *c) { if (setTypeAdd(set,c->argv[j]->ptr)) added++; } if (added) { - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[1],c->db->id); } server.dirty += added; @@ -305,7 +305,7 @@ void sremCommand(client *c) { } } if (deleted) { - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id); if (keyremoved) notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1], @@ -358,8 +358,8 @@ void smoveCommand(client *c) { dbAdd(c->db,c->argv[2],dstset); } - signalModifiedKey(c->db,c->argv[1]); - signalModifiedKey(c->db,c->argv[2]); + signalModifiedKey(c,c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[2]); server.dirty++; /* An extra key has changed when ele was successfully added to dstset */ @@ -444,7 +444,7 @@ void spopWithCountCommand(client *c) { /* Propagate this command as an DEL operation */ rewriteClientCommandVector(c,2,shared.del,c->argv[1]); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); server.dirty++; return; } @@ -546,7 +546,7 @@ void spopWithCountCommand(client *c) { * the alsoPropagate() API. */ decrRefCount(propargv[0]); preventCommandPropagation(c); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); server.dirty++; } @@ -599,7 +599,7 @@ void spopCommand(client *c) { } /* Set has been modified */ - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); server.dirty++; } @@ -808,7 +808,7 @@ void sinterGenericCommand(client *c, robj **setkeys, zfree(sets); if (dstkey) { if (dbDelete(c->db,dstkey)) { - signalModifiedKey(c->db,dstkey); + signalModifiedKey(c,c->db,dstkey); server.dirty++; } addReply(c,shared.czero); @@ -908,7 +908,7 @@ void sinterGenericCommand(client *c, robj **setkeys, notifyKeyspaceEvent(NOTIFY_GENERIC,"del", dstkey,c->db->id); } - signalModifiedKey(c->db,dstkey); + signalModifiedKey(c,c->db,dstkey); server.dirty++; } else { setDeferredSetLen(c,replylen,cardinality); @@ -1083,7 +1083,7 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, notifyKeyspaceEvent(NOTIFY_GENERIC,"del", dstkey,c->db->id); } - signalModifiedKey(c->db,dstkey); + signalModifiedKey(c,c->db,dstkey); server.dirty++; } zfree(sets); diff --git a/src/t_stream.c b/src/t_stream.c index 155167af9..3efaa4509 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -1262,7 +1262,7 @@ void xaddCommand(client *c) { } addReplyStreamID(c,&id); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STREAM,"xadd",c->argv[1],c->db->id); server.dirty++; @@ -2390,7 +2390,7 @@ void xdelCommand(client *c) { /* Propagate the write if needed. */ if (deleted) { - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STREAM,"xdel",c->argv[1],c->db->id); server.dirty += deleted; } @@ -2467,7 +2467,7 @@ void xtrimCommand(client *c) { /* Propagate the write if needed. */ if (deleted) { - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STREAM,"xtrim",c->argv[1],c->db->id); server.dirty += deleted; if (approx_maxlen) streamRewriteApproxMaxlen(c,s,maxlen_arg_idx); diff --git a/src/t_string.c b/src/t_string.c index 335bda404..ef382bb0c 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -84,7 +84,7 @@ void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, addReply(c, abort_reply ? abort_reply : shared.null[c->resp]); return; } - genericSetKey(c->db,key,val,flags & OBJ_SET_KEEPTTL,1); + genericSetKey(c,c->db,key,val,flags & OBJ_SET_KEEPTTL,1); server.dirty++; if (expire) setExpire(c,c->db,key,mstime()+milliseconds); notifyKeyspaceEvent(NOTIFY_STRING,"set",key,c->db->id); @@ -183,7 +183,7 @@ void getCommand(client *c) { void getsetCommand(client *c) { if (getGenericCommand(c) == C_ERR) return; c->argv[2] = tryObjectEncoding(c->argv[2]); - setKey(c->db,c->argv[1],c->argv[2]); + setKey(c,c->db,c->argv[1],c->argv[2]); notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[1],c->db->id); server.dirty++; } @@ -240,7 +240,7 @@ void setrangeCommand(client *c) { if (sdslen(value) > 0) { o->ptr = sdsgrowzero(o->ptr,offset+sdslen(value)); memcpy((char*)o->ptr+offset,value,sdslen(value)); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STRING, "setrange",c->argv[1],c->db->id); server.dirty++; @@ -328,7 +328,7 @@ void msetGenericCommand(client *c, int nx) { for (j = 1; j < c->argc; j += 2) { c->argv[j+1] = tryObjectEncoding(c->argv[j+1]); - setKey(c->db,c->argv[j],c->argv[j+1]); + setKey(c,c->db,c->argv[j],c->argv[j+1]); notifyKeyspaceEvent(NOTIFY_STRING,"set",c->argv[j],c->db->id); } server.dirty += (c->argc-1)/2; @@ -373,7 +373,7 @@ void incrDecrCommand(client *c, long long incr) { dbAdd(c->db,c->argv[1],new); } } - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STRING,"incrby",c->argv[1],c->db->id); server.dirty++; addReply(c,shared.colon); @@ -423,7 +423,7 @@ void incrbyfloatCommand(client *c) { dbOverwrite(c->db,c->argv[1],new); else dbAdd(c->db,c->argv[1],new); - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STRING,"incrbyfloat",c->argv[1],c->db->id); server.dirty++; addReplyBulk(c,new); @@ -467,7 +467,7 @@ void appendCommand(client *c) { o->ptr = sdscatlen(o->ptr,append->ptr,sdslen(append->ptr)); totlen = sdslen(o->ptr); } - signalModifiedKey(c->db,c->argv[1]); + signalModifiedKey(c,c->db,c->argv[1]); notifyKeyspaceEvent(NOTIFY_STRING,"append",c->argv[1],c->db->id); server.dirty++; addReplyLongLong(c,totlen); diff --git a/src/t_zset.c b/src/t_zset.c index 5c000e76f..9c409cd96 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -1646,7 +1646,7 @@ reply_to_client: cleanup: zfree(scores); if (added || updated) { - signalModifiedKey(c->db,key); + signalModifiedKey(c,c->db,key); notifyKeyspaceEvent(NOTIFY_ZSET, incr ? "zincr" : "zadd", key, c->db->id); } @@ -1681,7 +1681,7 @@ void zremCommand(client *c) { notifyKeyspaceEvent(NOTIFY_ZSET,"zrem",key,c->db->id); if (keyremoved) notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id); - signalModifiedKey(c->db,key); + signalModifiedKey(c,c->db,key); server.dirty += deleted; } addReplyLongLong(c,deleted); @@ -1779,7 +1779,7 @@ void zremrangeGenericCommand(client *c, int rangetype) { /* Step 4: Notifications and reply. */ if (deleted) { char *event[3] = {"zremrangebyrank","zremrangebyscore","zremrangebylex"}; - signalModifiedKey(c->db,key); + signalModifiedKey(c,c->db,key); notifyKeyspaceEvent(NOTIFY_ZSET,event[rangetype],key,c->db->id); if (keyremoved) notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id); @@ -2383,7 +2383,7 @@ void zunionInterGenericCommand(client *c, robj *dstkey, int op) { zsetConvertToZiplistIfNeeded(dstobj,maxelelen); dbAdd(c->db,dstkey,dstobj); addReplyLongLong(c,zsetLength(dstobj)); - signalModifiedKey(c->db,dstkey); + signalModifiedKey(c,c->db,dstkey); notifyKeyspaceEvent(NOTIFY_ZSET, (op == SET_OP_UNION) ? "zunionstore" : "zinterstore", dstkey,c->db->id); @@ -2392,7 +2392,7 @@ void zunionInterGenericCommand(client *c, robj *dstkey, int op) { decrRefCount(dstobj); addReply(c,shared.czero); if (touched) { - signalModifiedKey(c->db,dstkey); + signalModifiedKey(c,c->db,dstkey); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",dstkey,c->db->id); server.dirty++; } @@ -3216,7 +3216,7 @@ void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey if (arraylen == 0) { /* Do this only for the first iteration. */ char *events[2] = {"zpopmin","zpopmax"}; notifyKeyspaceEvent(NOTIFY_ZSET,events[where],key,c->db->id); - signalModifiedKey(c->db,key); + signalModifiedKey(c,c->db,key); } addReplyBulkCBuffer(c,ele,sdslen(ele)); diff --git a/src/tracking.c b/src/tracking.c index 6f7929430..434e086b5 100644 --- a/src/tracking.c +++ b/src/tracking.c @@ -245,7 +245,7 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) { * matches one or more prefixes in the prefix table. Later when we * return to the event loop, we'll send invalidation messages to the * clients subscribed to each prefix. */ -void trackingRememberKeyToBroadcast(char *keyname, size_t keylen) { +void trackingRememberKeyToBroadcast(client *c, char *keyname, size_t keylen) { raxIterator ri; raxStart(&ri,PrefixTable); raxSeek(&ri,"^",NULL,0); @@ -254,7 +254,11 @@ void trackingRememberKeyToBroadcast(char *keyname, size_t keylen) { if (ri.key_len != 0 && memcmp(ri.key,keyname,ri.key_len) != 0) continue; bcastState *bs = ri.data; - raxTryInsert(bs->keys,(unsigned char*)keyname,keylen,NULL,NULL); + /* We insert the client pointer as associated value in the radix + * tree. This way we know who was the client that did the last + * change to the key, and can avoid sending the notification in the + * case the client is in NOLOOP mode. */ + raxTryInsert(bs->keys,(unsigned char*)keyname,keylen,c,NULL); } raxStop(&ri); } @@ -262,13 +266,17 @@ void trackingRememberKeyToBroadcast(char *keyname, size_t keylen) { /* This function is called from signalModifiedKey() or other places in Redis * when a key changes value. In the context of keys tracking, our task here is * to send a notification to every client that may have keys about such caching - * slot. */ -void trackingInvalidateKey(robj *keyobj) { + * slot. + * + * Note that 'c' may be NULL in case the operation was performed outside the + * context of a client modifying the database (for instance when we delete a + * key because of expire). */ +void trackingInvalidateKey(client *c, robj *keyobj) { if (TrackingTable == NULL) return; sds sdskey = keyobj->ptr; if (raxSize(PrefixTable) > 0) - trackingRememberKeyToBroadcast(sdskey,sdslen(sdskey)); + trackingRememberKeyToBroadcast(c,sdskey,sdslen(sdskey)); rax *ids = raxFind(TrackingTable,(unsigned char*)sdskey,sdslen(sdskey)); if (ids == raxNotFound) return; @@ -279,19 +287,28 @@ void trackingInvalidateKey(robj *keyobj) { while(raxNext(&ri)) { uint64_t id; memcpy(&id,ri.key,sizeof(id)); - client *c = lookupClientByID(id); + client *target = lookupClientByID(id); /* Note that if the client is in BCAST mode, we don't want to * send invalidation messages that were pending in the case * previously the client was not in BCAST mode. This can happen if * TRACKING is enabled normally, and then the client switches to * BCAST mode. */ - if (c == NULL || - !(c->flags & CLIENT_TRACKING)|| - c->flags & CLIENT_TRACKING_BCAST) + if (target == NULL || + !(target->flags & CLIENT_TRACKING)|| + target->flags & CLIENT_TRACKING_BCAST) { continue; } - sendTrackingMessage(c,sdskey,sdslen(sdskey),0); + + /* If the client enabled the NOLOOP mode, don't send notifications + * about keys changed by the client itself. */ + if (target->flags & CLIENT_TRACKING_NOLOOP && + target == c) + { + continue; + } + + sendTrackingMessage(target,sdskey,sdslen(sdskey),0); } raxStop(&ri); @@ -383,6 +400,54 @@ void trackingLimitUsedSlots(void) { timeout_counter++; } +/* Generate Redis protocol for an array containing all the key names + * in the 'keys' radix tree. If the client is not NULL, the list will not + * include keys that were modified the last time by this client, in order + * to implement the NOLOOP option. + * + * If the resultin array would be empty, NULL is returned instead. */ +sds trackingBuildBroadcastReply(client *c, rax *keys) { + raxIterator ri; + uint64_t count; + + if (c == NULL) { + count = raxSize(keys); + } else { + count = 0; + raxStart(&ri,keys); + raxSeek(&ri,"^",NULL,0); + while(raxNext(&ri)) { + if (ri.data != c) count++; + } + raxStop(&ri); + + if (count == 0) return NULL; + } + + /* Create the array reply with the list of keys once, then send + * it to all the clients subscribed to this prefix. */ + char buf[32]; + size_t len = ll2string(buf,sizeof(buf),count); + sds proto = sdsempty(); + proto = sdsMakeRoomFor(proto,count*15); + proto = sdscatlen(proto,"*",1); + proto = sdscatlen(proto,buf,len); + proto = sdscatlen(proto,"\r\n",2); + raxStart(&ri,keys); + raxSeek(&ri,"^",NULL,0); + while(raxNext(&ri)) { + if (c && ri.data == c) continue; + len = ll2string(buf,sizeof(buf),ri.key_len); + proto = sdscatlen(proto,"$",1); + proto = sdscatlen(proto,buf,len); + proto = sdscatlen(proto,"\r\n",2); + proto = sdscatlen(proto,ri.key,ri.key_len); + proto = sdscatlen(proto,"\r\n",2); + } + raxStop(&ri); + return proto; +} + /* This function will run the prefixes of clients in BCAST mode and * keys that were modified about each prefix, and will send the * notifications to each client in each prefix. */ @@ -397,26 +462,10 @@ void trackingBroadcastInvalidationMessages(void) { while(raxNext(&ri)) { bcastState *bs = ri.data; if (raxSize(bs->keys)) { - /* Create the array reply with the list of keys once, then send - * it to all the clients subscribed to this prefix. */ - char buf[32]; - size_t len = ll2string(buf,sizeof(buf),raxSize(bs->keys)); - sds proto = sdsempty(); - proto = sdsMakeRoomFor(proto,raxSize(bs->keys)*15); - proto = sdscatlen(proto,"*",1); - proto = sdscatlen(proto,buf,len); - proto = sdscatlen(proto,"\r\n",2); - raxStart(&ri2,bs->keys); - raxSeek(&ri2,"^",NULL,0); - while(raxNext(&ri2)) { - len = ll2string(buf,sizeof(buf),ri2.key_len); - proto = sdscatlen(proto,"$",1); - proto = sdscatlen(proto,buf,len); - proto = sdscatlen(proto,"\r\n",2); - proto = sdscatlen(proto,ri2.key,ri2.key_len); - proto = sdscatlen(proto,"\r\n",2); - } - raxStop(&ri2); + + /* Generate the common protocol for all the clients that are + * not using the NOLOOP option. */ + sds proto = trackingBuildBroadcastReply(NULL,bs->keys); /* Send this array of keys to every client in the list. */ raxStart(&ri2,bs->clients); @@ -424,7 +473,14 @@ void trackingBroadcastInvalidationMessages(void) { while(raxNext(&ri2)) { client *c; memcpy(&c,ri2.key,sizeof(c)); - sendTrackingMessage(c,proto,sdslen(proto),1); + if (c->flags & CLIENT_TRACKING_NOLOOP) { + /* This client may have certain keys excluded. */ + sds adhoc = trackingBuildBroadcastReply(c,bs->keys); + sendTrackingMessage(c,adhoc,sdslen(adhoc),1); + sdsfree(adhoc); + } else { + sendTrackingMessage(c,proto,sdslen(proto),1); + } } raxStop(&ri2); From 249d9332151f32e8f9234e3296ce343050bebaf2 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 21 Apr 2020 17:29:18 +0200 Subject: [PATCH 23/71] Tracking: NOLOOP further implementation and fixes. --- src/networking.c | 2 ++ src/tracking.c | 25 +++++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/networking.c b/src/networking.c index 1f5d0bd5d..744979d16 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2289,6 +2289,8 @@ NULL options |= CLIENT_TRACKING_OPTIN; } else if (!strcasecmp(c->argv[j]->ptr,"optout")) { options |= CLIENT_TRACKING_OPTOUT; + } else if (!strcasecmp(c->argv[j]->ptr,"noloop")) { + options |= CLIENT_TRACKING_NOLOOP; } else if (!strcasecmp(c->argv[j]->ptr,"prefix") && moreargs) { j++; prefix = zrealloc(prefix,sizeof(robj*)*(numprefix+1)); diff --git a/src/tracking.c b/src/tracking.c index 434e086b5..48d231627 100644 --- a/src/tracking.c +++ b/src/tracking.c @@ -94,7 +94,8 @@ void disableTracking(client *c) { server.tracking_clients--; c->flags &= ~(CLIENT_TRACKING|CLIENT_TRACKING_BROKEN_REDIR| CLIENT_TRACKING_BCAST|CLIENT_TRACKING_OPTIN| - CLIENT_TRACKING_OPTOUT|CLIENT_TRACKING_CACHING); + CLIENT_TRACKING_OPTOUT|CLIENT_TRACKING_CACHING| + CLIENT_TRACKING_NOLOOP); } } @@ -129,14 +130,19 @@ void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **pr if (!(c->flags & CLIENT_TRACKING)) server.tracking_clients++; c->flags |= CLIENT_TRACKING; c->flags &= ~(CLIENT_TRACKING_BROKEN_REDIR|CLIENT_TRACKING_BCAST| - CLIENT_TRACKING_OPTIN|CLIENT_TRACKING_OPTOUT); + CLIENT_TRACKING_OPTIN|CLIENT_TRACKING_OPTOUT| + CLIENT_TRACKING_NOLOOP); c->client_tracking_redirection = redirect_to; + + /* This may be the first client we ever enable. Crete the tracking + * table if it does not exist. */ if (TrackingTable == NULL) { TrackingTable = raxNew(); PrefixTable = raxNew(); TrackingChannelName = createStringObject("__redis__:invalidate",20); } + /* For broadcasting, set the list of prefixes in the client. */ if (options & CLIENT_TRACKING_BCAST) { c->flags |= CLIENT_TRACKING_BCAST; if (numprefix == 0) enableBcastTrackingForPrefix(c,"",0); @@ -145,7 +151,10 @@ void enableTracking(client *c, uint64_t redirect_to, uint64_t options, robj **pr enableBcastTrackingForPrefix(c,sdsprefix,sdslen(sdsprefix)); } } - c->flags |= options & (CLIENT_TRACKING_OPTIN|CLIENT_TRACKING_OPTOUT); + + /* Set the remaining flags that don't need any special handling. */ + c->flags |= options & (CLIENT_TRACKING_OPTIN|CLIENT_TRACKING_OPTOUT| + CLIENT_TRACKING_NOLOOP); } /* This function is called after the execution of a readonly command in the @@ -459,10 +468,12 @@ void trackingBroadcastInvalidationMessages(void) { raxStart(&ri,PrefixTable); raxSeek(&ri,"^",NULL,0); + + /* For each prefix... */ while(raxNext(&ri)) { bcastState *bs = ri.data; - if (raxSize(bs->keys)) { + if (raxSize(bs->keys)) { /* Generate the common protocol for all the clients that are * not using the NOLOOP option. */ sds proto = trackingBuildBroadcastReply(NULL,bs->keys); @@ -476,8 +487,10 @@ void trackingBroadcastInvalidationMessages(void) { if (c->flags & CLIENT_TRACKING_NOLOOP) { /* This client may have certain keys excluded. */ sds adhoc = trackingBuildBroadcastReply(c,bs->keys); - sendTrackingMessage(c,adhoc,sdslen(adhoc),1); - sdsfree(adhoc); + if (adhoc) { + sendTrackingMessage(c,adhoc,sdslen(adhoc),1); + sdsfree(adhoc); + } } else { sendTrackingMessage(c,proto,sdslen(proto),1); } From a05f6b064bfb780ba2e092a0cbd8b7c9f696dc7b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Apr 2020 10:49:17 +0200 Subject: [PATCH 24/71] Tracking: signal key as modified when evicting. --- src/evict.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/evict.c b/src/evict.c index 71260c040..a829f1f99 100644 --- a/src/evict.c +++ b/src/evict.c @@ -569,6 +569,7 @@ int freeMemoryIfNeeded(void) { dbAsyncDelete(db,keyobj); else dbSyncDelete(db,keyobj); + signalModifiedKey(NULL,db,keyobj); latencyEndMonitor(eviction_latency); latencyAddSampleIfNeeded("eviction-del",eviction_latency); latencyRemoveNestedEvent(latency,eviction_latency); From 1b28c0cbabe83c297b92ad9768a3066c4e746046 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Apr 2020 11:24:19 +0200 Subject: [PATCH 25/71] Tracking: NOLOOP tests. --- tests/unit/tracking.tcl | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/unit/tracking.tcl b/tests/unit/tracking.tcl index 2058319f7..dc6cd671a 100644 --- a/tests/unit/tracking.tcl +++ b/tests/unit/tracking.tcl @@ -7,6 +7,9 @@ start_server {tags {"tracking"}} { $rd1 subscribe __redis__:invalidate $rd1 read ; # Consume the SUBSCRIBE reply. + # Create another client as well in order to test NOLOOP + set rd2 [redis_deferring_client] + test {Clients are able to enable tracking and redirect it} { r CLIENT TRACKING on REDIRECT $redir } {*OK} @@ -62,5 +65,34 @@ start_server {tags {"tracking"}} { assert {$keys eq {c:1234}} } + test {Tracking NOLOOP mode in standard mode works} { + r CLIENT TRACKING off + r CLIENT TRACKING on REDIRECT $redir NOLOOP + r MGET otherkey1 loopkey otherkey2 + $rd2 SET otherkey1 1; # We should get this + r SET loopkey 1 ; # We should not get this + $rd2 SET otherkey2 1; # We should get this + # Because of the internals, we know we are going to receive + # two separated notifications for the two different prefixes. + set keys1 [lsort [lindex [$rd1 read] 2]] + set keys2 [lsort [lindex [$rd1 read] 2]] + set keys [lsort [list {*}$keys1 {*}$keys2]] + assert {$keys eq {otherkey1 otherkey2}} + } + + test {Tracking NOLOOP mode in BCAST mode works} { + r CLIENT TRACKING off + r CLIENT TRACKING on BCAST REDIRECT $redir NOLOOP + $rd2 SET otherkey1 1; # We should get this + r SET loopkey 1 ; # We should not get this + $rd2 SET otherkey2 1; # We should get this + # Because of the internals, we know we are going to receive + # two separated notifications for the two different prefixes. + set keys1 [lsort [lindex [$rd1 read] 2]] + set keys2 [lsort [lindex [$rd1 read] 2]] + set keys [lsort [list {*}$keys1 {*}$keys2]] + assert {$keys eq {otherkey1 otherkey2}} + } + $rd1 close } From 6666d30f6a159ce985dc3b36ccfc5f4047adaebc Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Apr 2020 11:45:34 +0200 Subject: [PATCH 26/71] Tracking: test expired keys notifications. --- tests/unit/tracking.tcl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/unit/tracking.tcl b/tests/unit/tracking.tcl index dc6cd671a..43bb5f864 100644 --- a/tests/unit/tracking.tcl +++ b/tests/unit/tracking.tcl @@ -94,5 +94,18 @@ start_server {tags {"tracking"}} { assert {$keys eq {otherkey1 otherkey2}} } + test {Tracking gets notification of expired keys} { + r CLIENT TRACKING off + r CLIENT TRACKING on BCAST REDIRECT $redir NOLOOP + r SET mykey myval px 1 + r SET mykeyotherkey myval ; # We should not get it + after 1000 + # Because of the internals, we know we are going to receive + # two separated notifications for the two different prefixes. + set keys1 [lsort [lindex [$rd1 read] 2]] + set keys [lsort [list {*}$keys1]] + assert {$keys eq {mykey}} + } + $rd1 close } From 48ff17e1de2a39822186b76cadd331c7fbe02fc6 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 20 Apr 2020 13:34:37 +0300 Subject: [PATCH 27/71] TLS: Fix build on older verisons of OpenSSL. --- src/tls.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tls.c b/src/tls.c index 5fac6902b..ea0c34469 100644 --- a/src/tls.c +++ b/src/tls.c @@ -168,7 +168,9 @@ int tlsConfigure(redisTLSContextConfig *ctx_config) { SSL_CTX_set_mode(ctx, SSL_MODE_ENABLE_PARTIAL_WRITE|SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER); SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER|SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); +#if defined(SSL_CTX_set_ecdh_auto) SSL_CTX_set_ecdh_auto(ctx, 1); +#endif if (SSL_CTX_use_certificate_file(ctx, ctx_config->cert_file, SSL_FILETYPE_PEM) <= 0) { ERR_error_string_n(ERR_get_error(), errbuf, sizeof(errbuf)); From 6a8f6ac1a35a26a39797680b9d5709215fb17de0 Mon Sep 17 00:00:00 2001 From: Theo Buehler Date: Wed, 22 Apr 2020 09:43:01 +0200 Subject: [PATCH 28/71] TLS: Fix build with SSL_OP_NO_CLIENT_RENEGOTIATION There is no ssl in this scope, so the build breaks. All the other options are set directly on the ctx. --- src/tls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tls.c b/src/tls.c index ea0c34469..c18aafebe 100644 --- a/src/tls.c +++ b/src/tls.c @@ -160,7 +160,7 @@ int tlsConfigure(redisTLSContextConfig *ctx_config) { #endif #ifdef SSL_OP_NO_CLIENT_RENEGOTIATION - SSL_CTX_set_options(ssl->ctx, SSL_OP_NO_CLIENT_RENEGOTIATION); + SSL_CTX_set_options(ctx, SSL_OP_NO_CLIENT_RENEGOTIATION); #endif if (ctx_config->prefer_server_ciphers) From de38fa2b65084429e522ccbe113d9360efc7ef70 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 22 Apr 2020 17:14:15 +0200 Subject: [PATCH 29/71] ACL: deny commands execution of disabled users. --- src/acl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/acl.c b/src/acl.c index 75b954c5e..9e2ed6af7 100644 --- a/src/acl.c +++ b/src/acl.c @@ -1055,6 +1055,10 @@ int ACLCheckCommandPerm(client *c, int *keyidxptr) { /* If there is no associated user, the connection can run anything. */ if (u == NULL) return ACL_OK; + /* If the user is disabled we don't allow the execution of any + * command. */ + if (!(u->flags & USER_FLAG_ENABLED)) return ACL_DENIED_CMD; + /* Check if the user can execute this command. */ if (!(u->flags & USER_FLAG_ALLCOMMANDS) && c->cmd->proc != authCommand) From 9651156a7b5c751031f1745e242448acc69a4f18 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Apr 2020 10:39:53 +0200 Subject: [PATCH 30/71] ACL GENPASS: emit 256 bits instead of 128. --- src/acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/acl.c b/src/acl.c index 9e2ed6af7..228811cba 100644 --- a/src/acl.c +++ b/src/acl.c @@ -1819,7 +1819,7 @@ void aclCommand(client *c) { dictReleaseIterator(di); setDeferredArrayLen(c,dl,arraylen); } else if (!strcasecmp(sub,"genpass") && c->argc == 2) { - char pass[32]; /* 128 bits of actual pseudo random data. */ + char pass[64]; /* 256 bits of actual pseudo random data. */ getRandomHexChars(pass,sizeof(pass)); addReplyBulkCBuffer(c,pass,sizeof(pass)); } else if (!strcasecmp(sub,"log") && (c->argc == 2 || c->argc ==3)) { From 32c66998473e1a6cd848ed3457ddc2c12ca3844e Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Apr 2020 10:53:21 +0200 Subject: [PATCH 31/71] ACL GENPASS: take number of bits as argument. --- src/acl.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/acl.c b/src/acl.c index 228811cba..bf5dd18f1 100644 --- a/src/acl.c +++ b/src/acl.c @@ -1626,7 +1626,7 @@ void addACLLogEntry(client *c, int reason, int keypos, sds username) { * ACL SETUSER ... acl rules ... * ACL DELUSER [...] * ACL GETUSER - * ACL GENPASS + * ACL GENPASS [] * ACL WHOAMI * ACL LOG [ | RESET] */ @@ -1818,10 +1818,25 @@ void aclCommand(client *c) { } dictReleaseIterator(di); setDeferredArrayLen(c,dl,arraylen); - } else if (!strcasecmp(sub,"genpass") && c->argc == 2) { - char pass[64]; /* 256 bits of actual pseudo random data. */ - getRandomHexChars(pass,sizeof(pass)); - addReplyBulkCBuffer(c,pass,sizeof(pass)); + } else if (!strcasecmp(sub,"genpass") && (c->argc == 2 || c->argc == 3)) { + #define GENPASS_MAX_BITS 4096 + char pass[GENPASS_MAX_BITS/8*2]; /* Hex representation. */ + long bits = 256; /* By default generate 256 bits passwords. */ + + if (c->argc == 3 && getLongFromObjectOrReply(c,c->argv[2],&bits,NULL) + != C_OK) return; + + if (bits <= 0 || bits > GENPASS_MAX_BITS) { + addReplyErrorFormat(c, + "ACL GENPASS argument must be the number of " + "bits for the output password, a positive number " + "up to %d",GENPASS_MAX_BITS); + return; + } + + long chars = (bits+3)/4; /* Round to number of characters to emit. */ + getRandomHexChars(pass,chars); + addReplyBulkCBuffer(c,pass,chars); } else if (!strcasecmp(sub,"log") && (c->argc == 2 || c->argc ==3)) { long count = 10; /* Number of entries to emit by default. */ @@ -1899,7 +1914,7 @@ void aclCommand(client *c) { "DELUSER [...] -- Delete a list of users.", "CAT -- List available categories.", "CAT -- List commands inside category.", -"GENPASS -- Generate a secure user password.", +"GENPASS [] -- Generate a secure user password.", "WHOAMI -- Return the current connection username.", "LOG [ | RESET] -- Show the ACL log entries.", NULL From 76aa8a43abba2939e914b5cb5035e93c32e00b76 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Apr 2020 11:17:42 +0200 Subject: [PATCH 32/71] getRandomBytes(): use HMAC-SHA256. Now that we have an interface to use this API directly, via ACL GENPASS, we are no longer sure what people could do with it. So why don't make it a strong primitive exported by Redis in order to create unique IDs and so forth? The implementation was tested against the test vectors that can be found in RFC4231. --- src/util.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/src/util.c b/src/util.c index bd8f0fb98..d173f776f 100644 --- a/src/util.c +++ b/src/util.c @@ -42,7 +42,7 @@ #include #include "util.h" -#include "sha1.h" +#include "sha256.h" /* Glob-style pattern matching. */ int stringmatchlen(const char *pattern, int patternLen, @@ -622,7 +622,7 @@ int ld2string(char *buf, size_t len, long double value, ld2string_mode mode) { void getRandomBytes(unsigned char *p, size_t len) { /* Global state. */ static int seed_initialized = 0; - static unsigned char seed[20]; /* The SHA1 seed, from /dev/urandom. */ + static unsigned char seed[64]; /* 512 bit internal block size. */ static uint64_t counter = 0; /* The counter we hash with the seed. */ if (!seed_initialized) { @@ -647,14 +647,34 @@ void getRandomBytes(unsigned char *p, size_t len) { } while(len) { - unsigned char digest[20]; - SHA1_CTX ctx; - unsigned int copylen = len > 20 ? 20 : len; + /* This implements SHA256-HMAC. */ + unsigned char digest[SHA256_BLOCK_SIZE]; + unsigned char kxor[64]; + unsigned int copylen = + len > SHA256_BLOCK_SIZE ? SHA256_BLOCK_SIZE : len; - SHA1Init(&ctx); - SHA1Update(&ctx, seed, sizeof(seed)); - SHA1Update(&ctx, (unsigned char*)&counter,sizeof(counter)); - SHA1Final(digest, &ctx); + /* IKEY: key xored with 0x36. */ + memcpy(kxor,seed,sizeof(kxor)); + for (unsigned int i = 0; i < sizeof(kxor); i++) kxor[i] ^= 0x36; + + /* Obtain HASH(IKEY||MESSAGE). */ + SHA256_CTX ctx; + sha256_init(&ctx); + sha256_update(&ctx,kxor,sizeof(kxor)); + sha256_update(&ctx,(unsigned char*)&counter,sizeof(counter)); + sha256_final(&ctx,digest); + + /* OKEY: key xored with 0x5c. */ + memcpy(kxor,seed,sizeof(kxor)); + for (unsigned int i = 0; i < sizeof(kxor); i++) kxor[i] ^= 0x5C; + + /* Obtain HASH(OKEY || HASH(IKEY||MESSAGE)). */ + sha256_init(&ctx); + sha256_update(&ctx,kxor,sizeof(kxor)); + sha256_update(&ctx,digest,SHA256_BLOCK_SIZE); + sha256_final(&ctx,digest); + + /* Increment the counter for the next iteration. */ counter++; memcpy(p,digest,copylen); From 408d4fb35d3740ccffa06ec78d704dcc97eaee1c Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Apr 2020 11:56:36 +0200 Subject: [PATCH 33/71] ACL: re-enable command execution of disabled users. After all I changed idea again: enabled/disabled should have a more clear meaning, and it only means: you can't authenticate with such user with new connections, however old connections continue to work as expected. --- src/acl.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/acl.c b/src/acl.c index bf5dd18f1..3194feb5b 100644 --- a/src/acl.c +++ b/src/acl.c @@ -1055,10 +1055,6 @@ int ACLCheckCommandPerm(client *c, int *keyidxptr) { /* If there is no associated user, the connection can run anything. */ if (u == NULL) return ACL_OK; - /* If the user is disabled we don't allow the execution of any - * command. */ - if (!(u->flags & USER_FLAG_ENABLED)) return ACL_DENIED_CMD; - /* Check if the user can execute this command. */ if (!(u->flags & USER_FLAG_ALLCOMMANDS) && c->cmd->proc != authCommand) From a2a5b1d6aeef0b11bc3bffb5745487dc6b57baa2 Mon Sep 17 00:00:00 2001 From: Valentino Geron Date: Tue, 21 Apr 2020 20:55:43 +0300 Subject: [PATCH 34/71] XREADGROUP with NOACK should propagate only one XGROUP SETID command --- src/t_stream.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 3efaa4509..8abbee02f 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -935,6 +935,8 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end streamIterator si; int64_t numfields; streamID id; + int propagate_last_id = 0; + int noack = flags & STREAM_RWR_NOACK; /* If the client is asking for some history, we serve it using a * different function, so that we return entries *solely* from its @@ -950,12 +952,14 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end arraylen_ptr = addReplyDeferredLen(c); streamIteratorStart(&si,s,start,end,rev); while(streamIteratorGetID(&si,&id,&numfields)) { - int propagate_last_id = 0; - /* Update the group last_id if needed. */ if (group && streamCompareID(&id,&group->last_id) > 0) { group->last_id = id; - propagate_last_id = 1; + /* Group last id should be propagated only if NOACK was + * specified, otherwise the last id would be included + * in XCLAIM. */ + if (noack) + propagate_last_id = 1; } /* Emit a two elements array for each item. The first is @@ -983,7 +987,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end * XGROUP SETID command. So if we find that there is already * a NACK for the entry, we need to associate it to the new * consumer. */ - if (group && !(flags & STREAM_RWR_NOACK)) { + if (group && !noack) { unsigned char buf[sizeof(streamID)]; streamEncodeID(buf,&id); @@ -1020,14 +1024,16 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end streamPropagateXCLAIM(c,spi->keyname,group,spi->groupname,idarg,nack); decrRefCount(idarg); } - } else { - if (propagate_last_id) - streamPropagateGroupID(c,spi->keyname,group,spi->groupname); } arraylen++; if (count && count == arraylen) break; } + + if (spi && propagate_last_id) { + streamPropagateGroupID(c,spi->keyname,group,spi->groupname); + } + streamIteratorStop(&si); if (arraylen_ptr) setDeferredArrayLen(c,arraylen_ptr,arraylen); return arraylen; From 6d3bd2ed5a9a4daa28321f7954e78182adebf312 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 23 Apr 2020 16:13:45 +0200 Subject: [PATCH 35/71] Minor aesthetic changes to #7135. --- src/t_stream.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 8abbee02f..596d8ad0d 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -955,11 +955,10 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end /* Update the group last_id if needed. */ if (group && streamCompareID(&id,&group->last_id) > 0) { group->last_id = id; - /* Group last id should be propagated only if NOACK was - * specified, otherwise the last id would be included - * in XCLAIM. */ - if (noack) - propagate_last_id = 1; + /* Group last ID should be propagated only if NOACK was + * specified, otherwise the last id will be included + * in the propagation of XCLAIM itself. */ + if (noack) propagate_last_id = 1; } /* Emit a two elements array for each item. The first is @@ -1030,9 +1029,8 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end if (count && count == arraylen) break; } - if (spi && propagate_last_id) { + if (spi && propagate_last_id) streamPropagateGroupID(c,spi->keyname,group,spi->groupname); - } streamIteratorStop(&si); if (arraylen_ptr) setDeferredArrayLen(c,arraylen_ptr,arraylen); From 782d9f2ff98da897e9307d930ec2e20642b2bdcc Mon Sep 17 00:00:00 2001 From: yanhui13 Date: Tue, 21 Apr 2020 16:55:05 +0800 Subject: [PATCH 36/71] optimize the output of cluster slots --- src/cluster.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index b103e2fe1..e3adaf83a 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4191,11 +4191,17 @@ void clusterReplyMultiBulkSlots(client *c) { while((de = dictNext(di)) != NULL) { clusterNode *node = dictGetVal(de); int j = 0, start = -1; + int i, nested_elements = 0; /* Skip slaves (that are iterated when producing the output of their * master) and masters not serving any slot. */ if (!nodeIsMaster(node) || node->numslots == 0) continue; + for(i = 0; i < node->numslaves; i++) { + if (nodeFailed(node->slaves[i])) continue; + nested_elements++; + } + for (j = 0; j < CLUSTER_SLOTS; j++) { int bit, i; @@ -4203,8 +4209,7 @@ void clusterReplyMultiBulkSlots(client *c) { if (start == -1) start = j; } if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) { - int nested_elements = 3; /* slots (2) + master addr (1). */ - void *nested_replylen = addReplyDeferredLen(c); + addReplyArrayLen(c, nested_elements + 3); /* slots (2) + master addr (1). */ if (bit && j == CLUSTER_SLOTS-1) j++; @@ -4234,9 +4239,7 @@ void clusterReplyMultiBulkSlots(client *c) { addReplyBulkCString(c, node->slaves[i]->ip); addReplyLongLong(c, node->slaves[i]->port); addReplyBulkCBuffer(c, node->slaves[i]->name, CLUSTER_NAMELEN); - nested_elements++; } - setDeferredArrayLen(c, nested_replylen, nested_elements); num_masters++; } } From e0add7e0f101e8ad6a6c91f9ca4454cd318cccf3 Mon Sep 17 00:00:00 2001 From: yanhui13 Date: Tue, 21 Apr 2020 16:56:10 +0800 Subject: [PATCH 37/71] add tcl test for cluster slots --- tests/cluster/tests/15-cluster-slots.tcl | 44 ++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/cluster/tests/15-cluster-slots.tcl diff --git a/tests/cluster/tests/15-cluster-slots.tcl b/tests/cluster/tests/15-cluster-slots.tcl new file mode 100644 index 000000000..dc9938ef6 --- /dev/null +++ b/tests/cluster/tests/15-cluster-slots.tcl @@ -0,0 +1,44 @@ +source "../tests/includes/init-tests.tcl" + +proc cluster_allocate_mixedSlots {n} { + set slot 16383 + while {$slot >= 0} { + set node [expr {$slot % $n}] + lappend slots_$node $slot + incr slot -1 + } + for {set j 0} {$j < $n} {incr j} { + R $j cluster addslots {*}[set slots_${j}] + } +} + +proc create_cluster_with_mixedSlot {masters slaves} { + cluster_allocate_mixedSlots $masters + if {$slaves} { + cluster_allocate_slaves $masters $slaves + } + assert_cluster_state ok +} + +test "Create a 5 nodes cluster" { + create_cluster_with_mixedSlot 5 15 +} + +test "Cluster is up" { + assert_cluster_state ok +} + +test "Cluster is writable" { + cluster_write_test 0 +} + +test "Instance #5 is a slave" { + assert {[RI 5 role] eq {slave}} +} + +test "client do not break when cluster slot" { + R 0 config set client-output-buffer-limit "normal 33554432 16777216 60" + if { [catch {R 0 cluster slots}] } { + fail "output overflow when cluster slots" + } +} \ No newline at end of file From e772b55a9ce70445a89e80cf9eb03d99b44e9679 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 24 Apr 2020 10:13:20 +0200 Subject: [PATCH 38/71] Also use propagate() in streamPropagateGroupID(). --- src/t_stream.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/t_stream.c b/src/t_stream.c index 596d8ad0d..758db637e 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -848,6 +848,11 @@ void streamPropagateXCLAIM(client *c, robj *key, streamCG *group, robj *groupnam argv[11] = createStringObject("JUSTID",6); argv[12] = createStringObject("LASTID",6); argv[13] = createObjectFromStreamID(&group->last_id); + + /* We use progagate() because this code path is not always called from + * the command execution context. Moreover this will just alter the + * consumer group state, and we don't need MULTI/EXEC wrapping because + * there is no message state cross-message atomicity required. */ propagate(server.xclaimCommand,c->db->id,argv,14,PROPAGATE_AOF|PROPAGATE_REPL); decrRefCount(argv[0]); decrRefCount(argv[3]); @@ -875,7 +880,12 @@ void streamPropagateGroupID(client *c, robj *key, streamCG *group, robj *groupna argv[2] = key; argv[3] = groupname; argv[4] = createObjectFromStreamID(&group->last_id); - alsoPropagate(server.xgroupCommand,c->db->id,argv,5,PROPAGATE_AOF|PROPAGATE_REPL); + + /* We use progagate() because this code path is not always called from + * the command execution context. Moreover this will just alter the + * consumer group state, and we don't need MULTI/EXEC wrapping because + * there is no message state cross-message atomicity required. */ + propagate(server.xgroupCommand,c->db->id,argv,5,PROPAGATE_AOF|PROPAGATE_REPL); decrRefCount(argv[0]); decrRefCount(argv[1]); decrRefCount(argv[4]); From 262da0ba78828b79344f51ab1539c587af6dc052 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 24 Apr 2020 16:49:27 +0200 Subject: [PATCH 39/71] LCS -> STRALGO LCS. STRALGO should be a container for mostly read-only string algorithms in Redis. The algorithms should have two main characteristics: 1. They should be non trivial to compute, and often not part of programming language standard libraries. 2. They should be fast enough that it is a good idea to have optimized C implementations. Next thing I would love to see? A small strings compression algorithm. --- src/server.c | 2 +- src/server.h | 2 +- src/t_string.c | 23 ++++++++++++++++++----- tests/unit/type/string.tcl | 16 ++++++++-------- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/server.c b/src/server.c index fc9b87aae..f7af79c3f 100644 --- a/src/server.c +++ b/src/server.c @@ -1006,7 +1006,7 @@ struct redisCommand redisCommandTable[] = { "admin no-script no-slowlog ok-loading ok-stale", 0,NULL,0,0,0,0,0,0}, - {"lcs",lcsCommand,-4, + {"stralgo",stralgoCommand,-2, "write use-memory @string", 0,lcsGetKeys,0,0,0,0,0,0} }; diff --git a/src/server.h b/src/server.h index d39359dce..9e1e506af 100644 --- a/src/server.h +++ b/src/server.h @@ -2389,7 +2389,7 @@ void xdelCommand(client *c); void xtrimCommand(client *c); void lolwutCommand(client *c); void aclCommand(client *c); -void lcsCommand(client *c); +void stralgoCommand(client *c); #if defined(__GNUC__) void *calloc(size_t count, size_t size) __attribute__ ((deprecated)); diff --git a/src/t_string.c b/src/t_string.c index ef382bb0c..d4eb04769 100644 --- a/src/t_string.c +++ b/src/t_string.c @@ -480,18 +480,31 @@ void strlenCommand(client *c) { addReplyLongLong(c,stringObjectLen(o)); } -/* LCS -- Longest common subsequence. + +/* STRALGO -- Implement complex algorithms on strings. * - * LCS [IDX] [MINMATCHLEN ] - * STRINGS | KEYS */ -void lcsCommand(client *c) { + * STRALGO ... arguments ... */ +void stralgoLCS(client *c); /* This implements the LCS algorithm. */ +void stralgoCommand(client *c) { + /* Select the algorithm. */ + if (!strcasecmp(c->argv[1]->ptr,"lcs")) { + stralgoLCS(c); + } else { + addReply(c,shared.syntaxerr); + } +} + +/* STRALGO [IDX] [MINMATCHLEN ] [WITHMATCHLEN] + * STRINGS | KEYS + */ +void stralgoLCS(client *c) { uint32_t i, j; long long minmatchlen = 0; sds a = NULL, b = NULL; int getlen = 0, getidx = 0, withmatchlen = 0; robj *obja = NULL, *objb = NULL; - for (j = 1; j < (uint32_t)c->argc; j++) { + for (j = 2; j < (uint32_t)c->argc; j++) { char *opt = c->argv[j]->ptr; int moreargs = (c->argc-1) - j; diff --git a/tests/unit/type/string.tcl b/tests/unit/type/string.tcl index b9ef9de7a..8126cdee8 100644 --- a/tests/unit/type/string.tcl +++ b/tests/unit/type/string.tcl @@ -424,29 +424,29 @@ start_server {tags {"string"}} { set rna2 {ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT} set rnalcs {ACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT} - test {LCS string output with STRINGS option} { - r LCS STRINGS $rna1 $rna2 + test {STRALGO LCS string output with STRINGS option} { + r STRALGO LCS STRINGS $rna1 $rna2 } $rnalcs - test {LCS len} { - r LCS LEN STRINGS $rna1 $rna2 + test {STRALGO LCS len} { + r STRALGO LCS LEN STRINGS $rna1 $rna2 } [string length $rnalcs] test {LCS with KEYS option} { r set virus1 $rna1 r set virus2 $rna2 - r LCS KEYS virus1 virus2 + r STRALGO LCS KEYS virus1 virus2 } $rnalcs test {LCS indexes} { - dict get [r LCS IDX KEYS virus1 virus2] matches + dict get [r STRALGO LCS IDX KEYS virus1 virus2] matches } {{{238 238} {239 239}} {{236 236} {238 238}} {{229 230} {236 237}} {{224 224} {235 235}} {{1 222} {13 234}}} test {LCS indexes with match len} { - dict get [r LCS IDX KEYS virus1 virus2 WITHMATCHLEN] matches + dict get [r STRALGO LCS IDX KEYS virus1 virus2 WITHMATCHLEN] matches } {{{238 238} {239 239} 1} {{236 236} {238 238} 1} {{229 230} {236 237} 2} {{224 224} {235 235} 1} {{1 222} {13 234} 222}} test {LCS indexes with match len and minimum match len} { - dict get [r LCS IDX KEYS virus1 virus2 WITHMATCHLEN MINMATCHLEN 5] matches + dict get [r STRALGO LCS IDX KEYS virus1 virus2 WITHMATCHLEN MINMATCHLEN 5] matches } {{{1 222} {13 234} 222}} } From 4c30d6d732e026ad686b8679e6803542930b4efe Mon Sep 17 00:00:00 2001 From: Dave-in-lafayette <64047859+Dave-in-lafayette@users.noreply.github.com> Date: Mon, 20 Apr 2020 16:38:06 -0700 Subject: [PATCH 40/71] fix for crash during panic before all threads are up If there's a panic before all threads have been started (say, if file descriptor 0 is closed at exec), the panic response will crash here again. --- src/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bio.c b/src/bio.c index 0662c8c4c..85f681185 100644 --- a/src/bio.c +++ b/src/bio.c @@ -266,7 +266,7 @@ void bioKillThreads(void) { int err, j; for (j = 0; j < BIO_NUM_OPS; j++) { - if (pthread_cancel(bio_threads[j]) == 0) { + if (bio_threads[j] && pthread_cancel(bio_threads[j]) == 0) { if ((err = pthread_join(bio_threads[j],NULL)) != 0) { serverLog(LL_WARNING, "Bio thread for job type #%d can be joined: %s", From aafc91fcc9f6a73b07a07120700180e57e94f917 Mon Sep 17 00:00:00 2001 From: Guy Benoish Date: Sun, 19 Apr 2020 15:59:58 +0300 Subject: [PATCH 41/71] Add the stream tag to XSETID tests --- tests/unit/type/stream.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index 5de9f0571..c2b524d7f 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -414,7 +414,7 @@ start_server {tags {"stream"} overrides {appendonly yes stream-node-max-entries } } -start_server {tags {"xsetid"}} { +start_server {tags {"stream xsetid"}} { test {XADD can CREATE an empty stream} { r XADD mystream MAXLEN 0 * a b assert {[dict get [r xinfo stream mystream] length] == 0} From 17133287237d8f5a1aef1e2ceb18d4a4dbfed7d1 Mon Sep 17 00:00:00 2001 From: Dave-in-lafayette <64047859+Dave-in-lafayette@users.noreply.github.com> Date: Mon, 20 Apr 2020 16:34:36 -0700 Subject: [PATCH 42/71] fix for unintended crash during panic response If redis crashes early, before lua is set up (like, if File Descriptor 0 is closed before exec), it will crash again trying to print memory statistics. --- src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index f7af79c3f..f2fe54e1c 100644 --- a/src/server.c +++ b/src/server.c @@ -4037,7 +4037,7 @@ sds genRedisInfoString(const char *section) { size_t zmalloc_used = zmalloc_used_memory(); size_t total_system_mem = server.system_memory_size; const char *evict_policy = evictPolicyToString(); - long long memory_lua = (long long)lua_gc(server.lua,LUA_GCCOUNT,0)*1024; + long long memory_lua = server.lua ? (long long)lua_gc(server.lua,LUA_GCCOUNT,0)*1024 : 0; struct redisMemOverhead *mh = getMemoryOverheadData(); /* Peak memory is updated from time to time by serverCron() so it From a0c54d562244c123772deccbe5f1d26bdba35b81 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 27 Apr 2020 13:35:17 +0200 Subject: [PATCH 43/71] Fix STRALGO command flags. --- src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index f2fe54e1c..db1d3780c 100644 --- a/src/server.c +++ b/src/server.c @@ -1007,7 +1007,7 @@ struct redisCommand redisCommandTable[] = { 0,NULL,0,0,0,0,0,0}, {"stralgo",stralgoCommand,-2, - "write use-memory @string", + "read-only @string", 0,lcsGetKeys,0,0,0,0,0,0} }; From 58619c12868a1b2a4b49855ff43391c121ca0b2d Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 23 Apr 2020 15:04:42 +0300 Subject: [PATCH 44/71] Keep track of meaningful replication offset in replicas too Now both master and replicas keep track of the last replication offset that contains meaningful data (ignoring the tailing pings), and both trim that tail from the replication backlog, and the offset with which they try to use for psync. the implication is that if someone missed some pings, or even have excessive pings that the promoted replica has, it'll still be able to psync (avoid full sync). the downside (which was already committed) is that replicas running old code may fail to psync, since the promoted replica trims pings form it's backlog. This commit adds a test that reproduces several cases of promotions and demotions with stale and non-stale pings Background: The mearningful offset on the master was added recently to solve a problem were the master is left all alone, injecting PINGs into it's backlog when no one is listening and then gets demoted and tries to replicate from a replica that didn't have any of the PINGs (or at least not the last ones). however, consider this case: master A has two replicas (B and C) replicating directly from it. there's no traffic at all, and also no network issues, just many pings in the tail of the backlog. now B gets promoted, A becomes a replica of B, and C remains a replica of A. when A gets demoted, it trims the pings from its backlog, and successfully replicate from B. however, C is still aware of these PINGs, when it'll disconnect and re-connect to A, it'll ask for something that's not in the backlog anymore (since A trimmed the tail of it's backlog), and be forced to do a full sync (something it didn't have to do before the meaningful offset fix). Besides that, the psync2 test was always failing randomly here and there, it turns out the reason were PINGs. Investigating it shows the following scenario: cycle 1: redis #1 is master, and all the rest are direct replicas of #1 cycle 2: redis #2 is promoted to master, #1 is a replica of #2 and #3 is replica of #1 now we see that when #1 is demoted it prints: 17339:S 21 Apr 2020 11:16:38.523 * Using the meaningful offset 3929963 instead of 3929977 to exclude the final PINGs (14 bytes difference) 17339:S 21 Apr 2020 11:16:39.391 * Trying a partial resynchronization (request e2b3f8817735fdfe5fa4626766daa938b61419e5:3929964). 17339:S 21 Apr 2020 11:16:39.392 * Successful partial resynchronization with master. and when #3 connects to the demoted #2, #2 says: 17339:S 21 Apr 2020 11:16:40.084 * Partial resynchronization not accepted: Requested offset for secondary ID was 3929978, but I can reply up to 3929964 so the issue here is that the meaningful offset feature saved the day for the demoted master (since it needs to sync from a replica that didn't get the last ping), but it didn't help one of the other replicas which did get the last ping. --- src/blocked.c | 2 +- src/networking.c | 93 +++++++++++----------- src/replication.c | 66 +++++++++------- src/server.h | 1 - tests/integration/psync2.tcl | 144 ++++++++++++++++++++++++++++++----- 5 files changed, 213 insertions(+), 93 deletions(-) diff --git a/src/blocked.c b/src/blocked.c index 00cc798d5..045369e93 100644 --- a/src/blocked.c +++ b/src/blocked.c @@ -110,7 +110,7 @@ void processUnblockedClients(void) { * the code is conceptually more correct this way. */ if (!(c->flags & CLIENT_BLOCKED)) { if (c->querybuf && sdslen(c->querybuf) > 0) { - processInputBufferAndReplicate(c); + processInputBuffer(c); } } } diff --git a/src/networking.c b/src/networking.c index 744979d16..e4d40fdf0 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1671,12 +1671,55 @@ int processMultibulkBuffer(client *c) { return C_ERR; } +/* Perform necessary tasks after a command was executed: + * + * 1. The client is reset unless there are reasons to avoid doing it. + * 2. In the case of master clients, the replication offset is updated. + * 3. Propagate commands we got from our master to replicas down the line. */ +void commandProcessed(client *c) { + int cmd_is_ping = c->cmd && c->cmd->proc == pingCommand; + long long prev_offset = c->reploff; + if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { + /* Update the applied replication offset of our master. */ + c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; + } + + /* Don't reset the client structure for clients blocked in a + * module blocking command, so that the reply callback will + * still be able to access the client argv and argc field. + * The client will be reset in unblockClientFromModule(). */ + if (!(c->flags & CLIENT_BLOCKED) || + c->btype != BLOCKED_MODULE) + { + resetClient(c); + } + + /* If the client is a master we need to compute the difference + * between the applied offset before and after processing the buffer, + * to understand how much of the replication stream was actually + * applied to the master state: this quantity, and its corresponding + * part of the replication stream, will be propagated to the + * sub-replicas and to the replication backlog. */ + if (c->flags & CLIENT_MASTER) { + long long applied = c->reploff - prev_offset; + long long prev_master_repl_meaningful_offset = server.master_repl_meaningful_offset; + if (applied) { + replicationFeedSlavesFromMasterStream(server.slaves, + c->pending_querybuf, applied); + sdsrange(c->pending_querybuf,applied,-1); + } + /* The server.master_repl_meaningful_offset variable represents + * the offset of the replication stream without the pending PINGs. */ + if (cmd_is_ping) + server.master_repl_meaningful_offset = prev_master_repl_meaningful_offset; + } +} + /* This function calls processCommand(), but also performs a few sub tasks - * that are useful in that context: + * for the client that are useful in that context: * * 1. It sets the current client to the client 'c'. - * 2. In the case of master clients, the replication offset is updated. - * 3. The client is reset unless there are reasons to avoid doing it. + * 2. calls commandProcessed() if the command was handled. * * The function returns C_ERR in case the client was freed as a side effect * of processing the command, otherwise C_OK is returned. */ @@ -1684,20 +1727,7 @@ int processCommandAndResetClient(client *c) { int deadclient = 0; server.current_client = c; if (processCommand(c) == C_OK) { - if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { - /* Update the applied replication offset of our master. */ - c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; - } - - /* Don't reset the client structure for clients blocked in a - * module blocking command, so that the reply callback will - * still be able to access the client argv and argc field. - * The client will be reset in unblockClientFromModule(). */ - if (!(c->flags & CLIENT_BLOCKED) || - c->btype != BLOCKED_MODULE) - { - resetClient(c); - } + commandProcessed(c); } if (server.current_client == NULL) deadclient = 1; server.current_client = NULL; @@ -1794,31 +1824,6 @@ void processInputBuffer(client *c) { } } -/* This is a wrapper for processInputBuffer that also cares about handling - * the replication forwarding to the sub-replicas, in case the client 'c' - * is flagged as master. Usually you want to call this instead of the - * raw processInputBuffer(). */ -void processInputBufferAndReplicate(client *c) { - if (!(c->flags & CLIENT_MASTER)) { - processInputBuffer(c); - } else { - /* If the client is a master we need to compute the difference - * between the applied offset before and after processing the buffer, - * to understand how much of the replication stream was actually - * applied to the master state: this quantity, and its corresponding - * part of the replication stream, will be propagated to the - * sub-replicas and to the replication backlog. */ - size_t prev_offset = c->reploff; - processInputBuffer(c); - size_t applied = c->reploff - prev_offset; - if (applied) { - replicationFeedSlavesFromMasterStream(server.slaves, - c->pending_querybuf, applied); - sdsrange(c->pending_querybuf,applied,-1); - } - } -} - void readQueryFromClient(connection *conn) { client *c = connGetPrivateData(conn); int nread, readlen; @@ -1886,7 +1891,7 @@ void readQueryFromClient(connection *conn) { /* There is more data in the client input buffer, continue parsing it * in case to check if there is a full command to execute. */ - processInputBufferAndReplicate(c); + processInputBuffer(c); } void getClientsMaxBuffers(unsigned long *longest_output_list, @@ -3101,7 +3106,7 @@ int handleClientsWithPendingReadsUsingThreads(void) { continue; } } - processInputBufferAndReplicate(c); + processInputBuffer(c); } return processed; } diff --git a/src/replication.c b/src/replication.c index 3e9910374..c59639cd1 100644 --- a/src/replication.c +++ b/src/replication.c @@ -39,6 +39,7 @@ #include #include +long long adjustMeaningfulReplOffset(); void replicationDiscardCachedMaster(void); void replicationResurrectCachedMaster(connection *conn); void replicationSendAck(void); @@ -2693,6 +2694,9 @@ void replicationCacheMaster(client *c) { * pending outputs to the master. */ sdsclear(server.master->querybuf); sdsclear(server.master->pending_querybuf); + /* Adjust reploff and read_reploff to the last meaningful offset we executed. + * this is the offset the replica will use for future PSYNC. */ + server.master->reploff = adjustMeaningfulReplOffset(); server.master->read_reploff = server.master->reploff; if (c->flags & CLIENT_MULTI) discardTransaction(c); listEmpty(c->reply); @@ -2717,6 +2721,38 @@ void replicationCacheMaster(client *c) { replicationHandleMasterDisconnection(); } +/* If the "meaningful" offset, that is the offset without the final PINGs + * in the stream, is different than the last offset, use it instead: + * often when the master is no longer reachable, replicas will never + * receive the PINGs, however the master will end with an incremented + * offset because of the PINGs and will not be able to incrementally + * PSYNC with the new master. + * This function trims the replication backlog when needed, and returns + * the offset to be used for future partial sync. */ +long long adjustMeaningfulReplOffset() { + if (server.master_repl_offset > server.master_repl_meaningful_offset) { + long long delta = server.master_repl_offset - + server.master_repl_meaningful_offset; + serverLog(LL_NOTICE, + "Using the meaningful offset %lld instead of %lld to exclude " + "the final PINGs (%lld bytes difference)", + server.master_repl_meaningful_offset, + server.master_repl_offset, + delta); + server.master_repl_offset = server.master_repl_meaningful_offset; + if (server.repl_backlog_histlen <= delta) { + server.repl_backlog_histlen = 0; + server.repl_backlog_idx = 0; + } else { + server.repl_backlog_histlen -= delta; + server.repl_backlog_idx = + (server.repl_backlog_idx + (server.repl_backlog_size - delta)) % + server.repl_backlog_size; + } + } + return server.master_repl_offset; +} + /* This function is called when a master is turend into a slave, in order to * create from scratch a cached master for the new client, that will allow * to PSYNC with the slave that was promoted as the new master after a @@ -2736,35 +2772,7 @@ void replicationCacheMasterUsingMyself(void) { * by replicationCreateMasterClient(). We'll later set the created * master as server.cached_master, so the replica will use such * offset for PSYNC. */ - server.master_initial_offset = server.master_repl_offset; - - /* However if the "meaningful" offset, that is the offset without - * the final PINGs in the stream, is different, use this instead: - * often when the master is no longer reachable, replicas will never - * receive the PINGs, however the master will end with an incremented - * offset because of the PINGs and will not be able to incrementally - * PSYNC with the new master. */ - if (server.master_repl_offset > server.master_repl_meaningful_offset) { - long long delta = server.master_repl_offset - - server.master_repl_meaningful_offset; - serverLog(LL_NOTICE, - "Using the meaningful offset %lld instead of %lld to exclude " - "the final PINGs (%lld bytes difference)", - server.master_repl_meaningful_offset, - server.master_repl_offset, - delta); - server.master_initial_offset = server.master_repl_meaningful_offset; - server.master_repl_offset = server.master_repl_meaningful_offset; - if (server.repl_backlog_histlen <= delta) { - server.repl_backlog_histlen = 0; - server.repl_backlog_idx = 0; - } else { - server.repl_backlog_histlen -= delta; - server.repl_backlog_idx = - (server.repl_backlog_idx + (server.repl_backlog_size - delta)) % - server.repl_backlog_size; - } - } + server.master_initial_offset = adjustMeaningfulReplOffset(); /* The master client we create can be set to any DBID, because * the new master will start its replication stream with SELECT. */ diff --git a/src/server.h b/src/server.h index 9e1e506af..41d767e13 100644 --- a/src/server.h +++ b/src/server.h @@ -1600,7 +1600,6 @@ void setDeferredSetLen(client *c, void *node, long length); void setDeferredAttributeLen(client *c, void *node, long length); void setDeferredPushLen(client *c, void *node, long length); void processInputBuffer(client *c); -void processInputBufferAndReplicate(client *c); void processGopherRequest(client *c); void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask); void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask); diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 333736ffa..4e1189e0b 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -44,6 +44,7 @@ start_server {} { set used [list $master_id] test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" { $R($master_id) slaveof no one + $R($master_id) config set repl-ping-replica-period 1 ;# increse the chance that random ping will cause issues if {$counter_value == 0} { $R($master_id) set x $counter_value } @@ -114,23 +115,20 @@ start_server {} { } } - # wait for all the slaves to be in sync with the master - set master_ofs [status $R($master_id) master_repl_offset] + # wait for all the slaves to be in sync with the master, due to pings, we have to re-sample the master constantly too wait_for_condition 500 100 { - $master_ofs == [status $R(0) master_repl_offset] && - $master_ofs == [status $R(1) master_repl_offset] && - $master_ofs == [status $R(2) master_repl_offset] && - $master_ofs == [status $R(3) master_repl_offset] && - $master_ofs == [status $R(4) master_repl_offset] + [status $R($master_id) master_repl_offset] == [status $R(0) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(1) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(2) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(3) master_repl_offset] && + [status $R($master_id) master_repl_offset] == [status $R(4) master_repl_offset] } else { - if {$debug_msg} { - for {set j 0} {$j < 5} {incr j} { - puts "$j: sync_full: [status $R($j) sync_full]" - puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" - puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" - puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" - puts "---" - } + for {set j 0} {$j < 5} {incr j} { + puts "$j: sync_full: [status $R($j) sync_full]" + puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]" + puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]" + puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]" + puts "---" } fail "Slaves are not in sync with the master after too long time." } @@ -175,9 +173,14 @@ start_server {} { $R($j) slaveof $master_host $master_port } - # Wait for slaves to sync + # Wait for replicas to sync. it is not enough to just wait for connected_slaves==4 + # since we might do the check before the master realized that they're disconnected wait_for_condition 50 1000 { - [status $R($master_id) connected_slaves] == 4 + [status $R($master_id) connected_slaves] == 4 && + [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+4)%5}]) master_link_status] == "up" } else { fail "Replica not reconnecting" } @@ -188,6 +191,7 @@ start_server {} { set slave_id [expr {($master_id+1)%5}] set sync_count [status $R($master_id) sync_full] set sync_partial [status $R($master_id) sync_partial_ok] + set sync_partial_err [status $R($master_id) sync_partial_err] catch { $R($slave_id) config rewrite $R($slave_id) debug restart @@ -197,7 +201,11 @@ start_server {} { wait_for_condition 50 1000 { [status $R($master_id) sync_partial_ok] == $sync_partial + 1 } else { - fail "Replica not reconnecting" + puts "prev sync_full: $sync_count" + puts "prev sync_partial_ok: $sync_partial" + puts "prev sync_partial_err: $sync_partial_err" + puts [$R($master_id) info stats] + fail "Replica didn't partial sync" } set new_sync_count [status $R($master_id) sync_full] assert {$sync_count == $new_sync_count} @@ -271,3 +279,103 @@ start_server {} { } }}}}} + +start_server {tags {"psync2"}} { +start_server {} { +start_server {} { +start_server {} { +start_server {} { + test {pings at the end of replication stream are ignored for psync} { + set master [srv -4 client] + set master_host [srv -4 host] + set master_port [srv -4 port] + set replica1 [srv -3 client] + set replica2 [srv -2 client] + set replica3 [srv -1 client] + set replica4 [srv -0 client] + + $replica1 replicaof $master_host $master_port + $replica2 replicaof $master_host $master_port + $replica3 replicaof $master_host $master_port + $replica4 replicaof $master_host $master_port + wait_for_condition 50 1000 { + [status $master connected_slaves] == 4 + } else { + fail "replicas didn't connect" + } + + $master incr x + wait_for_condition 50 1000 { + [$replica1 get x] == 1 && [$replica2 get x] == 1 && + [$replica3 get x] == 1 && [$replica4 get x] == 1 + } else { + fail "replicas didn't get incr" + } + + # disconnect replica1 and replica2 + # and wait for the master to send a ping to replica3 and replica4 + $replica1 replicaof no one + $replica2 replicaof 127.0.0.1 1 ;# we can't promote it to master since that will cycle the replication id + $master config set repl-ping-replica-period 1 + after 1500 + + # make everyone sync from the replica1 that didn't get the last ping from the old master + # replica4 will keep syncing from the old master which now syncs from replica1 + # and replica2 will re-connect to the old master (which went back in time) + set new_master_host [srv -3 host] + set new_master_port [srv -3 port] + $replica3 replicaof $new_master_host $new_master_port + $master replicaof $new_master_host $new_master_port + $replica2 replicaof $master_host $master_port + wait_for_condition 50 1000 { + [status $replica2 master_link_status] == "up" && + [status $replica3 master_link_status] == "up" && + [status $replica4 master_link_status] == "up" && + [status $master master_link_status] == "up" + } else { + fail "replicas didn't connect" + } + + # make sure replication is still alive and kicking + $replica1 incr x + wait_for_condition 50 1000 { + [$replica2 get x] == 2 && + [$replica3 get x] == 2 && + [$replica4 get x] == 2 && + [$master get x] == 2 + } else { + fail "replicas didn't get incr" + } + + # make sure there are full syncs other than the initial ones + assert_equal [status $master sync_full] 4 + assert_equal [status $replica1 sync_full] 0 + assert_equal [status $replica2 sync_full] 0 + assert_equal [status $replica3 sync_full] 0 + assert_equal [status $replica4 sync_full] 0 + + # force psync + $master client kill type master + $replica2 client kill type master + $replica3 client kill type master + $replica4 client kill type master + + # make sure replication is still alive and kicking + $replica1 incr x + wait_for_condition 50 1000 { + [$replica2 get x] == 3 && + [$replica3 get x] == 3 && + [$replica4 get x] == 3 && + [$master get x] == 3 + } else { + fail "replicas didn't get incr" + } + + # make sure there are full syncs other than the initial ones + assert_equal [status $master sync_full] 4 + assert_equal [status $replica1 sync_full] 0 + assert_equal [status $replica2 sync_full] 0 + assert_equal [status $replica3 sync_full] 0 + assert_equal [status $replica4 sync_full] 0 +} +}}}}} From f1cd7f588010348c0631498c0d97dfa2c6ca9a7d Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Fri, 24 Apr 2020 17:20:28 +0300 Subject: [PATCH 45/71] optimize memory usage of deferred replies When deffered reply is added the previous reply node cannot be used so all the extra space we allocated in it is wasted. in case someone uses deffered replies in a loop, each time adding a small reply, each of these reply nodes (the small string reply) would have consumed a 16k block. now when we add anther diferred reply node, we trim the unused portion of the previous reply block. see #7123 --- src/networking.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/networking.c b/src/networking.c index e4d40fdf0..7ffa99eb1 100644 --- a/src/networking.c +++ b/src/networking.c @@ -436,6 +436,36 @@ void addReplyStatusFormat(client *c, const char *fmt, ...) { sdsfree(s); } +/* Sometimes we are forced to create a new reply node, and we can't append to + * the previous one, when that happens, we wanna try to trim the unused space + * at the end of the last reply node which we won't use anymore. */ +void trimReplyUnusedTailSpace(client *c) { + listNode *ln = listLast(c->reply); + clientReplyBlock *tail = ln? listNodeValue(ln): NULL; + + /* Note that 'tail' may be NULL even if we have a tail node, becuase when + * addDeferredMultiBulkLength() is used */ + if (!tail) return; + + /* We only try to trim the space is relatively high (more than a 1/4 of the + * allocation), otherwise there's a high chance realloc will NOP. + * Also, to avoid large memmove which happens as part of realloc, we only do + * that if the used part is small. */ + if (tail->size - tail->used > tail->size / 4 && + tail->used < PROTO_REPLY_CHUNK_BYTES) + { + size_t old_size = tail->size; + tail = zrealloc(tail, tail->used + sizeof(clientReplyBlock)); + /* If realloc was a NOP, we got the same value which has internal frag */ + if (tail == listNodeValue(ln)) return; + /* take over the allocation's internal fragmentation (at least for + * memory usage tracking) */ + tail->size = zmalloc_usable(tail) - sizeof(clientReplyBlock); + c->reply_bytes += tail->size - old_size; + listNodeValue(ln) = tail; + } +} + /* Adds an empty object to the reply list that will contain the multi bulk * length, which is not known when this function is called. */ void *addReplyDeferredLen(client *c) { @@ -443,6 +473,7 @@ void *addReplyDeferredLen(client *c) { * ready to be sent, since we are sure that before returning to the * event loop setDeferredAggregateLen() will be called. */ if (prepareClientToWrite(c) != C_OK) return NULL; + trimReplyUnusedTailSpace(c); listAddNodeTail(c->reply,NULL); /* NULL is our placeholder. */ return listLast(c->reply); } From 1652f7b897073e0d4d133e0665060e690e231a22 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Fri, 24 Apr 2020 16:59:24 -0700 Subject: [PATCH 46/71] Implemented CRC64 based on slice by 4 --- src/Makefile | 4 +- src/crc64.c | 270 ++++++++++++++++++++++----------------------------- src/crc64.h | 1 + src/rdb.c | 3 +- src/server.c | 1 + 5 files changed, 123 insertions(+), 156 deletions(-) diff --git a/src/Makefile b/src/Makefile index 3f982cc8e..f9922afce 100644 --- a/src/Makefile +++ b/src/Makefile @@ -206,9 +206,9 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o gopher.o tracking.o connection.o tls.o sha256.o timeout.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o gopher.o tracking.o connection.o tls.o sha256.o timeout.o REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o crc64.o siphash.o crc16.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o crcspeed.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o siphash.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/crc64.c b/src/crc64.c index f1f764922..165941dea 100644 --- a/src/crc64.c +++ b/src/crc64.c @@ -1,16 +1,5 @@ -/* Redis uses the CRC64 variant with "Jones" coefficients and init value of 0. - * - * Specification of this CRC64 variant follows: - * Name: crc-64-jones - * Width: 64 bites - * Poly: 0xad93d23594c935a9 - * Reflected In: True - * Xor_In: 0xffffffffffffffff - * Reflected_Out: True - * Xor_Out: 0x0 - * Check("123456789"): 0xe9c6d914c4b8d9ca - * - * Copyright (c) 2012, Salvatore Sanfilippo +/* Copyright (c) 2014, Matt Stancliff + * Copyright (c) 2020, Amazon Web Services * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,159 +26,134 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ -#include +#include "crc64.h" +#include "crcspeed.h" +static uint64_t crc64_table[8][256] = {{0}}; -static const uint64_t crc64_tab[256] = { - UINT64_C(0x0000000000000000), UINT64_C(0x7ad870c830358979), - UINT64_C(0xf5b0e190606b12f2), UINT64_C(0x8f689158505e9b8b), - UINT64_C(0xc038e5739841b68f), UINT64_C(0xbae095bba8743ff6), - UINT64_C(0x358804e3f82aa47d), UINT64_C(0x4f50742bc81f2d04), - UINT64_C(0xab28ecb46814fe75), UINT64_C(0xd1f09c7c5821770c), - UINT64_C(0x5e980d24087fec87), UINT64_C(0x24407dec384a65fe), - UINT64_C(0x6b1009c7f05548fa), UINT64_C(0x11c8790fc060c183), - UINT64_C(0x9ea0e857903e5a08), UINT64_C(0xe478989fa00bd371), - UINT64_C(0x7d08ff3b88be6f81), UINT64_C(0x07d08ff3b88be6f8), - UINT64_C(0x88b81eabe8d57d73), UINT64_C(0xf2606e63d8e0f40a), - UINT64_C(0xbd301a4810ffd90e), UINT64_C(0xc7e86a8020ca5077), - UINT64_C(0x4880fbd87094cbfc), UINT64_C(0x32588b1040a14285), - UINT64_C(0xd620138fe0aa91f4), UINT64_C(0xacf86347d09f188d), - UINT64_C(0x2390f21f80c18306), UINT64_C(0x594882d7b0f40a7f), - UINT64_C(0x1618f6fc78eb277b), UINT64_C(0x6cc0863448deae02), - UINT64_C(0xe3a8176c18803589), UINT64_C(0x997067a428b5bcf0), - UINT64_C(0xfa11fe77117cdf02), UINT64_C(0x80c98ebf2149567b), - UINT64_C(0x0fa11fe77117cdf0), UINT64_C(0x75796f2f41224489), - UINT64_C(0x3a291b04893d698d), UINT64_C(0x40f16bccb908e0f4), - UINT64_C(0xcf99fa94e9567b7f), UINT64_C(0xb5418a5cd963f206), - UINT64_C(0x513912c379682177), UINT64_C(0x2be1620b495da80e), - UINT64_C(0xa489f35319033385), UINT64_C(0xde51839b2936bafc), - UINT64_C(0x9101f7b0e12997f8), UINT64_C(0xebd98778d11c1e81), - UINT64_C(0x64b116208142850a), UINT64_C(0x1e6966e8b1770c73), - UINT64_C(0x8719014c99c2b083), UINT64_C(0xfdc17184a9f739fa), - UINT64_C(0x72a9e0dcf9a9a271), UINT64_C(0x08719014c99c2b08), - UINT64_C(0x4721e43f0183060c), UINT64_C(0x3df994f731b68f75), - UINT64_C(0xb29105af61e814fe), UINT64_C(0xc849756751dd9d87), - UINT64_C(0x2c31edf8f1d64ef6), UINT64_C(0x56e99d30c1e3c78f), - UINT64_C(0xd9810c6891bd5c04), UINT64_C(0xa3597ca0a188d57d), - UINT64_C(0xec09088b6997f879), UINT64_C(0x96d1784359a27100), - UINT64_C(0x19b9e91b09fcea8b), UINT64_C(0x636199d339c963f2), - UINT64_C(0xdf7adabd7a6e2d6f), UINT64_C(0xa5a2aa754a5ba416), - UINT64_C(0x2aca3b2d1a053f9d), UINT64_C(0x50124be52a30b6e4), - UINT64_C(0x1f423fcee22f9be0), UINT64_C(0x659a4f06d21a1299), - UINT64_C(0xeaf2de5e82448912), UINT64_C(0x902aae96b271006b), - UINT64_C(0x74523609127ad31a), UINT64_C(0x0e8a46c1224f5a63), - UINT64_C(0x81e2d7997211c1e8), UINT64_C(0xfb3aa75142244891), - UINT64_C(0xb46ad37a8a3b6595), UINT64_C(0xceb2a3b2ba0eecec), - UINT64_C(0x41da32eaea507767), UINT64_C(0x3b024222da65fe1e), - UINT64_C(0xa2722586f2d042ee), UINT64_C(0xd8aa554ec2e5cb97), - UINT64_C(0x57c2c41692bb501c), UINT64_C(0x2d1ab4dea28ed965), - UINT64_C(0x624ac0f56a91f461), UINT64_C(0x1892b03d5aa47d18), - UINT64_C(0x97fa21650afae693), UINT64_C(0xed2251ad3acf6fea), - UINT64_C(0x095ac9329ac4bc9b), UINT64_C(0x7382b9faaaf135e2), - UINT64_C(0xfcea28a2faafae69), UINT64_C(0x8632586aca9a2710), - UINT64_C(0xc9622c4102850a14), UINT64_C(0xb3ba5c8932b0836d), - UINT64_C(0x3cd2cdd162ee18e6), UINT64_C(0x460abd1952db919f), - UINT64_C(0x256b24ca6b12f26d), UINT64_C(0x5fb354025b277b14), - UINT64_C(0xd0dbc55a0b79e09f), UINT64_C(0xaa03b5923b4c69e6), - UINT64_C(0xe553c1b9f35344e2), UINT64_C(0x9f8bb171c366cd9b), - UINT64_C(0x10e3202993385610), UINT64_C(0x6a3b50e1a30ddf69), - UINT64_C(0x8e43c87e03060c18), UINT64_C(0xf49bb8b633338561), - UINT64_C(0x7bf329ee636d1eea), UINT64_C(0x012b592653589793), - UINT64_C(0x4e7b2d0d9b47ba97), UINT64_C(0x34a35dc5ab7233ee), - UINT64_C(0xbbcbcc9dfb2ca865), UINT64_C(0xc113bc55cb19211c), - UINT64_C(0x5863dbf1e3ac9dec), UINT64_C(0x22bbab39d3991495), - UINT64_C(0xadd33a6183c78f1e), UINT64_C(0xd70b4aa9b3f20667), - UINT64_C(0x985b3e827bed2b63), UINT64_C(0xe2834e4a4bd8a21a), - UINT64_C(0x6debdf121b863991), UINT64_C(0x1733afda2bb3b0e8), - UINT64_C(0xf34b37458bb86399), UINT64_C(0x8993478dbb8deae0), - UINT64_C(0x06fbd6d5ebd3716b), UINT64_C(0x7c23a61ddbe6f812), - UINT64_C(0x3373d23613f9d516), UINT64_C(0x49aba2fe23cc5c6f), - UINT64_C(0xc6c333a67392c7e4), UINT64_C(0xbc1b436e43a74e9d), - UINT64_C(0x95ac9329ac4bc9b5), UINT64_C(0xef74e3e19c7e40cc), - UINT64_C(0x601c72b9cc20db47), UINT64_C(0x1ac40271fc15523e), - UINT64_C(0x5594765a340a7f3a), UINT64_C(0x2f4c0692043ff643), - UINT64_C(0xa02497ca54616dc8), UINT64_C(0xdafce7026454e4b1), - UINT64_C(0x3e847f9dc45f37c0), UINT64_C(0x445c0f55f46abeb9), - UINT64_C(0xcb349e0da4342532), UINT64_C(0xb1eceec59401ac4b), - UINT64_C(0xfebc9aee5c1e814f), UINT64_C(0x8464ea266c2b0836), - UINT64_C(0x0b0c7b7e3c7593bd), UINT64_C(0x71d40bb60c401ac4), - UINT64_C(0xe8a46c1224f5a634), UINT64_C(0x927c1cda14c02f4d), - UINT64_C(0x1d148d82449eb4c6), UINT64_C(0x67ccfd4a74ab3dbf), - UINT64_C(0x289c8961bcb410bb), UINT64_C(0x5244f9a98c8199c2), - UINT64_C(0xdd2c68f1dcdf0249), UINT64_C(0xa7f41839ecea8b30), - UINT64_C(0x438c80a64ce15841), UINT64_C(0x3954f06e7cd4d138), - UINT64_C(0xb63c61362c8a4ab3), UINT64_C(0xcce411fe1cbfc3ca), - UINT64_C(0x83b465d5d4a0eece), UINT64_C(0xf96c151de49567b7), - UINT64_C(0x76048445b4cbfc3c), UINT64_C(0x0cdcf48d84fe7545), - UINT64_C(0x6fbd6d5ebd3716b7), UINT64_C(0x15651d968d029fce), - UINT64_C(0x9a0d8ccedd5c0445), UINT64_C(0xe0d5fc06ed698d3c), - UINT64_C(0xaf85882d2576a038), UINT64_C(0xd55df8e515432941), - UINT64_C(0x5a3569bd451db2ca), UINT64_C(0x20ed197575283bb3), - UINT64_C(0xc49581ead523e8c2), UINT64_C(0xbe4df122e51661bb), - UINT64_C(0x3125607ab548fa30), UINT64_C(0x4bfd10b2857d7349), - UINT64_C(0x04ad64994d625e4d), UINT64_C(0x7e7514517d57d734), - UINT64_C(0xf11d85092d094cbf), UINT64_C(0x8bc5f5c11d3cc5c6), - UINT64_C(0x12b5926535897936), UINT64_C(0x686de2ad05bcf04f), - UINT64_C(0xe70573f555e26bc4), UINT64_C(0x9ddd033d65d7e2bd), - UINT64_C(0xd28d7716adc8cfb9), UINT64_C(0xa85507de9dfd46c0), - UINT64_C(0x273d9686cda3dd4b), UINT64_C(0x5de5e64efd965432), - UINT64_C(0xb99d7ed15d9d8743), UINT64_C(0xc3450e196da80e3a), - UINT64_C(0x4c2d9f413df695b1), UINT64_C(0x36f5ef890dc31cc8), - UINT64_C(0x79a59ba2c5dc31cc), UINT64_C(0x037deb6af5e9b8b5), - UINT64_C(0x8c157a32a5b7233e), UINT64_C(0xf6cd0afa9582aa47), - UINT64_C(0x4ad64994d625e4da), UINT64_C(0x300e395ce6106da3), - UINT64_C(0xbf66a804b64ef628), UINT64_C(0xc5bed8cc867b7f51), - UINT64_C(0x8aeeace74e645255), UINT64_C(0xf036dc2f7e51db2c), - UINT64_C(0x7f5e4d772e0f40a7), UINT64_C(0x05863dbf1e3ac9de), - UINT64_C(0xe1fea520be311aaf), UINT64_C(0x9b26d5e88e0493d6), - UINT64_C(0x144e44b0de5a085d), UINT64_C(0x6e963478ee6f8124), - UINT64_C(0x21c640532670ac20), UINT64_C(0x5b1e309b16452559), - UINT64_C(0xd476a1c3461bbed2), UINT64_C(0xaeaed10b762e37ab), - UINT64_C(0x37deb6af5e9b8b5b), UINT64_C(0x4d06c6676eae0222), - UINT64_C(0xc26e573f3ef099a9), UINT64_C(0xb8b627f70ec510d0), - UINT64_C(0xf7e653dcc6da3dd4), UINT64_C(0x8d3e2314f6efb4ad), - UINT64_C(0x0256b24ca6b12f26), UINT64_C(0x788ec2849684a65f), - UINT64_C(0x9cf65a1b368f752e), UINT64_C(0xe62e2ad306bafc57), - UINT64_C(0x6946bb8b56e467dc), UINT64_C(0x139ecb4366d1eea5), - UINT64_C(0x5ccebf68aecec3a1), UINT64_C(0x2616cfa09efb4ad8), - UINT64_C(0xa97e5ef8cea5d153), UINT64_C(0xd3a62e30fe90582a), - UINT64_C(0xb0c7b7e3c7593bd8), UINT64_C(0xca1fc72bf76cb2a1), - UINT64_C(0x45775673a732292a), UINT64_C(0x3faf26bb9707a053), - UINT64_C(0x70ff52905f188d57), UINT64_C(0x0a2722586f2d042e), - UINT64_C(0x854fb3003f739fa5), UINT64_C(0xff97c3c80f4616dc), - UINT64_C(0x1bef5b57af4dc5ad), UINT64_C(0x61372b9f9f784cd4), - UINT64_C(0xee5fbac7cf26d75f), UINT64_C(0x9487ca0fff135e26), - UINT64_C(0xdbd7be24370c7322), UINT64_C(0xa10fceec0739fa5b), - UINT64_C(0x2e675fb4576761d0), UINT64_C(0x54bf2f7c6752e8a9), - UINT64_C(0xcdcf48d84fe75459), UINT64_C(0xb71738107fd2dd20), - UINT64_C(0x387fa9482f8c46ab), UINT64_C(0x42a7d9801fb9cfd2), - UINT64_C(0x0df7adabd7a6e2d6), UINT64_C(0x772fdd63e7936baf), - UINT64_C(0xf8474c3bb7cdf024), UINT64_C(0x829f3cf387f8795d), - UINT64_C(0x66e7a46c27f3aa2c), UINT64_C(0x1c3fd4a417c62355), - UINT64_C(0x935745fc4798b8de), UINT64_C(0xe98f353477ad31a7), - UINT64_C(0xa6df411fbfb21ca3), UINT64_C(0xdc0731d78f8795da), - UINT64_C(0x536fa08fdfd90e51), UINT64_C(0x29b7d047efec8728), -}; +/******************** BEGIN GENERATED PYCRC FUNCTIONS ********************/ +/** + * Generated on Sun Dec 21 14:14:07 2014, + * by pycrc v0.8.2, https://www.tty1.net/pycrc/ + * + * LICENSE ON GENERATED CODE: + * ========================== + * As of version 0.6, pycrc is released under the terms of the MIT licence. + * The code generated by pycrc is not considered a substantial portion of the + * software, therefore the author of pycrc will not claim any copyright on + * the generated code. + * ========================== + * + * CRC configuration: + * Width = 64 + * Poly = 0xad93d23594c935a9 + * XorIn = 0xffffffffffffffff + * ReflectIn = True + * XorOut = 0x0000000000000000 + * ReflectOut = True + * Algorithm = bit-by-bit-fast + * + * Modifications after generation (by matt): + * - included finalize step in-line with update for single-call generation + * - re-worked some inner variable architectures + * - adjusted function parameters to match expected prototypes. + *****************************************************************************/ -uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) { - uint64_t j; +/** + * Reflect all bits of a \a data word of \a data_len bytes. + * + * \param data The data word to be reflected. + * \param data_len The width of \a data expressed in number of bits. + * \return The reflected data. + *****************************************************************************/ +static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) { + uint_fast64_t ret = data & 0x01; - for (j = 0; j < l; j++) { - uint8_t byte = s[j]; - crc = crc64_tab[(uint8_t)crc ^ byte] ^ (crc >> 8); + for (size_t i = 1; i < data_len; i++) { + data >>= 1; + ret = (ret << 1) | (data & 0x01); } - return crc; + + return ret; +} + +/** + * Update the crc value with new data. + * + * \param crc The current crc value. + * \param data Pointer to a buffer of \a data_len bytes. + * \param data_len Number of bytes in the \a data buffer. + * \return The updated crc value. + ******************************************************************************/ +uint64_t _crc64(uint_fast64_t crc, const void *in_data, const uint64_t len) { + const uint8_t *data = in_data; + unsigned long long bit; + + for (uint64_t offset = 0; offset < len; offset++) { + uint8_t c = data[offset]; + for (uint_fast8_t i = 0x01; i & 0xff; i <<= 1) { + bit = crc & 0x8000000000000000; + if (c & i) { + bit = !bit; + } + + crc <<= 1; + if (bit) { + crc ^= POLY; + } + } + + crc &= 0xffffffffffffffff; + } + + crc = crc & 0xffffffffffffffff; + return crc_reflect(crc, 64) ^ 0x0000000000000000; +} + +/******************** END GENERATED PYCRC FUNCTIONS ********************/ + +/* Initializes the 16KB lookup tables. */ +void crc64_init(void) { + crcspeed64native_init(_crc64, crc64_table); +} + +/* Compute crc64 */ +uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) { + return crcspeed64native(crc64_table, crc, (void *) s, l); } /* Test main */ -#ifdef REDIS_TEST +#if defined(REDIS_TEST) || defined(REDIS_TEST_MAIN) #include #define UNUSED(x) (void)(x) int crc64Test(int argc, char *argv[]) { UNUSED(argc); UNUSED(argv); - printf("e9c6d914c4b8d9ca == %016llx\n", - (unsigned long long) crc64(0,(unsigned char*)"123456789",9)); + crc64_init(); + printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n", + (uint64_t)_crc64(0, "123456789", 9)); + printf("[64speed]: e9c6d914c4b8d9ca == %016" PRIx64 "\n", + (uint64_t)crc64speed(0, "123456789", 9)); + char li[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed " + "do eiusmod tempor incididunt ut labore et dolore magna " + "aliqua. Ut enim ad minim veniam, quis nostrud exercitation " + "ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis " + "aute irure dolor in reprehenderit in voluptate velit esse " + "cillum dolore eu fugiat nulla pariatur. Excepteur sint " + "occaecat cupidatat non proident, sunt in culpa qui officia " + "deserunt mollit anim id est laborum."; + printf("[calcula]: c7794709e69683b3 == %016" PRIx64 "\n", + (uint64_t)_crc64(0, li, sizeof(li))); + printf("[64speed]: c7794709e69683b3 == %016" PRIx64 "\n", + (uint64_t)crc64(0, li, sizeof(li))); return 0; } + +#endif + +#ifdef REDIS_TEST_MAIN +int main(int argc, char *argv[]) { + return crc64Test(argc, argv); +} + #endif diff --git a/src/crc64.h b/src/crc64.h index c9fca519d..60c42345f 100644 --- a/src/crc64.h +++ b/src/crc64.h @@ -3,6 +3,7 @@ #include +void crc64_init(void); uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l); #ifdef REDIS_TEST diff --git a/src/rdb.c b/src/rdb.c index 143b6c325..9f6bf13f1 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2291,7 +2291,8 @@ int rdbLoadRio(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { if (cksum == 0) { serverLog(LL_WARNING,"RDB file was saved with checksum disabled: no check performed."); } else if (cksum != expected) { - serverLog(LL_WARNING,"Wrong RDB checksum. Aborting now."); + serverLog(LL_WARNING,"Wrong RDB checksum expected: (%llx) but " + "got (%llx). Aborting now.",expected,cksum); rdbExitReportCorruptRDB("RDB CRC error"); } } diff --git a/src/server.c b/src/server.c index db1d3780c..e265fe9ab 100644 --- a/src/server.c +++ b/src/server.c @@ -2892,6 +2892,7 @@ void initServer(void) { scriptingInit(1); slowlogInit(); latencyMonitorInit(); + crc64_init(); } /* Some steps in server initialization need to be done last (after modules From e49a60d9df158f27719d39b8465284d59847f485 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Fri, 24 Apr 2020 17:05:37 -0700 Subject: [PATCH 47/71] Made crc64 test consistent --- src/crc64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/crc64.c b/src/crc64.c index 165941dea..4cbc019f6 100644 --- a/src/crc64.c +++ b/src/crc64.c @@ -30,6 +30,7 @@ #include "crcspeed.h" static uint64_t crc64_table[8][256] = {{0}}; +#define POLY UINT64_C(0xad93d23594c935a9) /******************** BEGIN GENERATED PYCRC FUNCTIONS ********************/ /** * Generated on Sun Dec 21 14:14:07 2014, @@ -122,7 +123,7 @@ uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) { } /* Test main */ -#if defined(REDIS_TEST) || defined(REDIS_TEST_MAIN) +#ifdef REDIS_TEST #include #define UNUSED(x) (void)(x) @@ -133,7 +134,7 @@ int crc64Test(int argc, char *argv[]) { printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n", (uint64_t)_crc64(0, "123456789", 9)); printf("[64speed]: e9c6d914c4b8d9ca == %016" PRIx64 "\n", - (uint64_t)crc64speed(0, "123456789", 9)); + (uint64_t)crc64(0, "123456789", 9)); char li[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed " "do eiusmod tempor incididunt ut labore et dolore magna " "aliqua. Ut enim ad minim veniam, quis nostrud exercitation " From e853b8f137f555b8c47def4d804f6cb4b69771a8 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Fri, 24 Apr 2020 17:11:21 -0700 Subject: [PATCH 48/71] Added crcspeed library --- src/crcspeed.c | 281 +++++++++++++++++++++++++++++++++++++++++++++++++ src/crcspeed.h | 60 +++++++++++ 2 files changed, 341 insertions(+) create mode 100644 src/crcspeed.c create mode 100644 src/crcspeed.h diff --git a/src/crcspeed.c b/src/crcspeed.c new file mode 100644 index 000000000..d2d97a8c7 --- /dev/null +++ b/src/crcspeed.c @@ -0,0 +1,281 @@ +/* + * Copyright (C) 2013 Mark Adler + * Originally by: crc64.c Version 1.4 16 Dec 2013 Mark Adler + * Modifications by Matt Stancliff : + * - removed CRC64-specific behavior + * - added generation of lookup tables by parameters + * - removed inversion of CRC input/result + * - removed automatic initialization in favor of explicit initialization + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler + madler@alumni.caltech.edu + */ + +#include "crcspeed.h" + +/* Fill in a CRC constants table. */ +void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) { + uint64_t crc; + + /* generate CRCs for all single byte sequences */ + for (int n = 0; n < 256; n++) { + table[0][n] = crcfn(0, &n, 1); + } + + /* generate nested CRC table for future slice-by-8 lookup */ + for (int n = 0; n < 256; n++) { + crc = table[0][n]; + for (int k = 1; k < 8; k++) { + crc = table[0][crc & 0xff] ^ (crc >> 8); + table[k][n] = crc; + } + } +} + +void crcspeed16little_init(crcfn16 crcfn, uint16_t table[8][256]) { + uint16_t crc; + + /* generate CRCs for all single byte sequences */ + for (int n = 0; n < 256; n++) { + table[0][n] = crcfn(0, &n, 1); + } + + /* generate nested CRC table for future slice-by-8 lookup */ + for (int n = 0; n < 256; n++) { + crc = table[0][n]; + for (int k = 1; k < 8; k++) { + crc = table[0][(crc >> 8) & 0xff] ^ (crc << 8); + table[k][n] = crc; + } + } +} + +/* Reverse the bytes in a 64-bit word. */ +static inline uint64_t rev8(uint64_t a) { +#if defined(__GNUC__) || defined(__clang__) + return __builtin_bswap64(a); +#else + uint64_t m; + + m = UINT64_C(0xff00ff00ff00ff); + a = ((a >> 8) & m) | (a & m) << 8; + m = UINT64_C(0xffff0000ffff); + a = ((a >> 16) & m) | (a & m) << 16; + return a >> 32 | a << 32; +#endif +} + +/* This function is called once to initialize the CRC table for use on a + big-endian architecture. */ +void crcspeed64big_init(crcfn64 fn, uint64_t big_table[8][256]) { + /* Create the little endian table then reverse all the entires. */ + crcspeed64little_init(fn, big_table); + for (int k = 0; k < 8; k++) { + for (int n = 0; n < 256; n++) { + big_table[k][n] = rev8(big_table[k][n]); + } + } +} + +void crcspeed16big_init(crcfn16 fn, uint16_t big_table[8][256]) { + /* Create the little endian table then reverse all the entires. */ + crcspeed16little_init(fn, big_table); + for (int k = 0; k < 8; k++) { + for (int n = 0; n < 256; n++) { + big_table[k][n] = rev8(big_table[k][n]); + } + } +} + +/* Calculate a non-inverted CRC multiple bytes at a time on a little-endian + * architecture. If you need inverted CRC, invert *before* calling and invert + * *after* calling. + * 64 bit crc = process 8 bytes at once; + */ +uint64_t crcspeed64little(uint64_t little_table[8][256], uint64_t crc, + void *buf, size_t len) { + unsigned char *next = buf; + + /* process individual bytes until we reach an 8-byte aligned pointer */ + while (len && ((uintptr_t)next & 7) != 0) { + crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); + len--; + } + + /* fast middle processing, 8 bytes (aligned!) per loop */ + while (len >= 8) { + crc ^= *(uint64_t *)next; + crc = little_table[7][crc & 0xff] ^ + little_table[6][(crc >> 8) & 0xff] ^ + little_table[5][(crc >> 16) & 0xff] ^ + little_table[4][(crc >> 24) & 0xff] ^ + little_table[3][(crc >> 32) & 0xff] ^ + little_table[2][(crc >> 40) & 0xff] ^ + little_table[1][(crc >> 48) & 0xff] ^ + little_table[0][crc >> 56]; + next += 8; + len -= 8; + } + + /* process remaining bytes (can't be larger than 8) */ + while (len) { + crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); + len--; + } + + return crc; +} + +uint16_t crcspeed16little(uint16_t little_table[8][256], uint16_t crc, + void *buf, size_t len) { + unsigned char *next = buf; + + /* process individual bytes until we reach an 8-byte aligned pointer */ + while (len && ((uintptr_t)next & 7) != 0) { + crc = little_table[0][((crc >> 8) ^ *next++) & 0xff] ^ (crc << 8); + len--; + } + + /* fast middle processing, 8 bytes (aligned!) per loop */ + while (len >= 8) { + uint64_t n = *(uint64_t *)next; + crc = little_table[7][(n & 0xff) ^ ((crc >> 8) & 0xff)] ^ + little_table[6][((n >> 8) & 0xff) ^ (crc & 0xff)] ^ + little_table[5][(n >> 16) & 0xff] ^ + little_table[4][(n >> 24) & 0xff] ^ + little_table[3][(n >> 32) & 0xff] ^ + little_table[2][(n >> 40) & 0xff] ^ + little_table[1][(n >> 48) & 0xff] ^ + little_table[0][n >> 56]; + next += 8; + len -= 8; + } + + /* process remaining bytes (can't be larger than 8) */ + while (len) { + crc = little_table[0][((crc >> 8) ^ *next++) & 0xff] ^ (crc << 8); + len--; + } + + return crc; +} + +/* Calculate a non-inverted CRC eight bytes at a time on a big-endian + * architecture. + */ +uint64_t crcspeed64big(uint64_t big_table[8][256], uint64_t crc, void *buf, + size_t len) { + unsigned char *next = buf; + + crc = rev8(crc); + while (len && ((uintptr_t)next & 7) != 0) { + crc = big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8); + len--; + } + + while (len >= 8) { + crc ^= *(uint64_t *)next; + crc = big_table[0][crc & 0xff] ^ + big_table[1][(crc >> 8) & 0xff] ^ + big_table[2][(crc >> 16) & 0xff] ^ + big_table[3][(crc >> 24) & 0xff] ^ + big_table[4][(crc >> 32) & 0xff] ^ + big_table[5][(crc >> 40) & 0xff] ^ + big_table[6][(crc >> 48) & 0xff] ^ + big_table[7][crc >> 56]; + next += 8; + len -= 8; + } + + while (len) { + crc = big_table[0][(crc >> 56) ^ *next++] ^ (crc << 8); + len--; + } + + return rev8(crc); +} + +/* WARNING: Completely untested on big endian architecture. Possibly broken. */ +uint16_t crcspeed16big(uint16_t big_table[8][256], uint16_t crc_in, void *buf, + size_t len) { + unsigned char *next = buf; + uint64_t crc = crc_in; + + crc = rev8(crc); + while (len && ((uintptr_t)next & 7) != 0) { + crc = big_table[0][((crc >> (56 - 8)) ^ *next++) & 0xff] ^ (crc >> 8); + len--; + } + + while (len >= 8) { + uint64_t n = *(uint64_t *)next; + crc = big_table[0][(n & 0xff) ^ ((crc >> (56 - 8)) & 0xff)] ^ + big_table[1][((n >> 8) & 0xff) ^ (crc & 0xff)] ^ + big_table[2][(n >> 16) & 0xff] ^ + big_table[3][(n >> 24) & 0xff] ^ + big_table[4][(n >> 32) & 0xff] ^ + big_table[5][(n >> 40) & 0xff] ^ + big_table[6][(n >> 48) & 0xff] ^ + big_table[7][n >> 56]; + next += 8; + len -= 8; + } + + while (len) { + crc = big_table[0][((crc >> (56 - 8)) ^ *next++) & 0xff] ^ (crc >> 8); + len--; + } + + return rev8(crc); +} + +/* Return the CRC of buf[0..len-1] with initial crc, processing eight bytes + at a time using passed-in lookup table. + This selects one of two routines depending on the endianess of + the architecture. */ +uint64_t crcspeed64native(uint64_t table[8][256], uint64_t crc, void *buf, + size_t len) { + uint64_t n = 1; + + return *(char *)&n ? crcspeed64little(table, crc, buf, len) + : crcspeed64big(table, crc, buf, len); +} + +uint16_t crcspeed16native(uint16_t table[8][256], uint16_t crc, void *buf, + size_t len) { + uint64_t n = 1; + + return *(char *)&n ? crcspeed16little(table, crc, buf, len) + : crcspeed16big(table, crc, buf, len); +} + +/* Initialize CRC lookup table in architecture-dependent manner. */ +void crcspeed64native_init(crcfn64 fn, uint64_t table[8][256]) { + uint64_t n = 1; + + *(char *)&n ? crcspeed64little_init(fn, table) + : crcspeed64big_init(fn, table); +} + +void crcspeed16native_init(crcfn16 fn, uint16_t table[8][256]) { + uint64_t n = 1; + + *(char *)&n ? crcspeed16little_init(fn, table) + : crcspeed16big_init(fn, table); +} diff --git a/src/crcspeed.h b/src/crcspeed.h new file mode 100644 index 000000000..d7ee95ebb --- /dev/null +++ b/src/crcspeed.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2014, Matt Stancliff + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef CRCSPEED_H +#define CRCSPEED_H + +#include +#include + +typedef uint64_t (*crcfn64)(uint64_t, const void *, const uint64_t); +typedef uint16_t (*crcfn16)(uint16_t, const void *, const uint64_t); + +/* CRC-64 */ +void crcspeed64little_init(crcfn64 fn, uint64_t table[8][256]); +void crcspeed64big_init(crcfn64 fn, uint64_t table[8][256]); +void crcspeed64native_init(crcfn64 fn, uint64_t table[8][256]); + +uint64_t crcspeed64little(uint64_t table[8][256], uint64_t crc, void *buf, + size_t len); +uint64_t crcspeed64big(uint64_t table[8][256], uint64_t crc, void *buf, + size_t len); +uint64_t crcspeed64native(uint64_t table[8][256], uint64_t crc, void *buf, + size_t len); + +/* CRC-16 */ +void crcspeed16little_init(crcfn16 fn, uint16_t table[8][256]); +void crcspeed16big_init(crcfn16 fn, uint16_t table[8][256]); +void crcspeed16native_init(crcfn16 fn, uint16_t table[8][256]); + +uint16_t crcspeed16little(uint16_t table[8][256], uint16_t crc, void *buf, + size_t len); +uint16_t crcspeed16big(uint16_t table[8][256], uint16_t crc, void *buf, + size_t len); +uint16_t crcspeed16native(uint16_t table[8][256], uint16_t crc, void *buf, + size_t len); +#endif From d92f14e825c2cb391ab3523e629f68c30a9593b9 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Mon, 27 Apr 2020 23:17:19 +0300 Subject: [PATCH 49/71] allow dictFind using static robj since the recent addition of OBJ_STATIC_REFCOUNT and the assertion in incrRefCount it is now impossible to use dictFind using a static robj, because dictEncObjKeyCompare will call getDecodedObject which tries to increment the refcount just in order to decrement it later. --- src/server.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/server.c b/src/server.c index e265fe9ab..1bfca1a0b 100644 --- a/src/server.c +++ b/src/server.c @@ -1219,11 +1219,16 @@ int dictEncObjKeyCompare(void *privdata, const void *key1, o2->encoding == OBJ_ENCODING_INT) return o1->ptr == o2->ptr; - o1 = getDecodedObject(o1); - o2 = getDecodedObject(o2); + /* due to OBJ_STATIC_REFCOUNT, we rather not call sdsEncodedObject unnecessarily */ + if (!sdsEncodedObject(o1)) + o1 = getDecodedObject(o1); + if (!sdsEncodedObject(o2)) + o2 = getDecodedObject(o2); cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr); - decrRefCount(o1); - decrRefCount(o2); + if (o1!=key1) + decrRefCount(o1); + if (o2!=key2) + decrRefCount(o2); return cmp; } From ffbe6543abe266215339f2911104ba62e53907b7 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 27 Apr 2020 22:40:15 +0200 Subject: [PATCH 50/71] Rework comment in dictEncObjKeyCompare(). --- src/server.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/server.c b/src/server.c index 1bfca1a0b..1086dab48 100644 --- a/src/server.c +++ b/src/server.c @@ -1219,16 +1219,15 @@ int dictEncObjKeyCompare(void *privdata, const void *key1, o2->encoding == OBJ_ENCODING_INT) return o1->ptr == o2->ptr; - /* due to OBJ_STATIC_REFCOUNT, we rather not call sdsEncodedObject unnecessarily */ - if (!sdsEncodedObject(o1)) - o1 = getDecodedObject(o1); - if (!sdsEncodedObject(o2)) - o2 = getDecodedObject(o2); + /* Due to OBJ_STATIC_REFCOUNT, we avoid calling getDecodedObject() without + * good reasons, because it would incrRefCount() the object, which + * is invalid. So we check to make sure dictFind() works with static + * objects as well. */ + if (!sdsEncodedObject(o1)) o1 = getDecodedObject(o1); + if (!sdsEncodedObject(o2)) o2 = getDecodedObject(o2); cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr); - if (o1!=key1) - decrRefCount(o1); - if (o2!=key2) - decrRefCount(o2); + if (o1!=key1) decrRefCount(o1); + if (o2!=key2) decrRefCount(o2); return cmp; } From a8995ce3c96ec19bd41b40986854e33141088ee8 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Tue, 28 Apr 2020 09:18:01 +0300 Subject: [PATCH 51/71] fix loading race in psync2 tests --- tests/integration/psync2-pingoff.tcl | 1 + tests/integration/psync2-reg.tcl | 5 ++++- tests/integration/psync2.tcl | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl index 1cea290e7..420747d21 100644 --- a/tests/integration/psync2-pingoff.tcl +++ b/tests/integration/psync2-pingoff.tcl @@ -20,6 +20,7 @@ start_server {} { $R(1) replicaof $R_host(0) $R_port(0) $R(0) set foo bar wait_for_condition 50 1000 { + [status $R(1) master_link_status] == "up" && [$R(0) dbsize] == 1 && [$R(1) dbsize] == 1 } else { fail "Replicas not replicating from master" diff --git a/tests/integration/psync2-reg.tcl b/tests/integration/psync2-reg.tcl index b5ad021e2..71a1c0eb2 100644 --- a/tests/integration/psync2-reg.tcl +++ b/tests/integration/psync2-reg.tcl @@ -28,7 +28,10 @@ start_server {} { $R(2) slaveof $R_host(0) $R_port(0) $R(0) set foo bar wait_for_condition 50 1000 { - [$R(1) dbsize] == 1 && [$R(2) dbsize] == 1 + [status $R(1) master_link_status] == "up" && + [status $R(2) master_link_status] == "up" && + [$R(1) dbsize] == 1 && + [$R(2) dbsize] == 1 } else { fail "Replicas not replicating from master" } diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 4e1189e0b..5fe29caba 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -67,6 +67,16 @@ start_server {} { lappend used $slave_id } + # Wait for replicas to sync. so next loop won't get -LOADING error + wait_for_condition 50 1000 { + [status $R([expr {($master_id+1)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+2)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+3)%5}]) master_link_status] == "up" && + [status $R([expr {($master_id+4)%5}]) master_link_status] == "up" + } else { + fail "Replica not reconnecting" + } + # 3) Increment the counter and wait for all the instances # to converge. test "PSYNC2: cluster is consistent after failover" { From fb0a0c6451fb5cb26993bf0473a72b5a9671c5fb Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Tue, 28 Apr 2020 12:14:46 +0300 Subject: [PATCH 52/71] hickup, re-fix dictEncObjKeyCompare come to think of it, in theory (not in practice), getDecodedObject can return the same original object with refcount incremented, so the pointer comparision in the previous commit was invalid. so now instead of checking the encoding, we explicitly check the refcount. --- src/server.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/server.c b/src/server.c index 1086dab48..ab6280121 100644 --- a/src/server.c +++ b/src/server.c @@ -1223,11 +1223,11 @@ int dictEncObjKeyCompare(void *privdata, const void *key1, * good reasons, because it would incrRefCount() the object, which * is invalid. So we check to make sure dictFind() works with static * objects as well. */ - if (!sdsEncodedObject(o1)) o1 = getDecodedObject(o1); - if (!sdsEncodedObject(o2)) o2 = getDecodedObject(o2); + if (o1->refcount != OBJ_STATIC_REFCOUNT) o1 = getDecodedObject(o1); + if (o2->refcount != OBJ_STATIC_REFCOUNT) o2 = getDecodedObject(o2); cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr); - if (o1!=key1) decrRefCount(o1); - if (o2!=key2) decrRefCount(o2); + if (o1->refcount != OBJ_STATIC_REFCOUNT) decrRefCount(o1); + if (o2->refcount != OBJ_STATIC_REFCOUNT) decrRefCount(o2); return cmp; } From c028751ef0620de46ee1c1fdaf1666e1379de6ea Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Fri, 28 Feb 2020 13:35:10 +0200 Subject: [PATCH 53/71] Adds `BIN_PATH` to create-cluster Allows for setting the binaries path if used outside the upstream repo. Also documents `call` in usage clause (TODO: port to `redis-cli --cluster call` or just deprecate it). --- utils/create-cluster/create-cluster | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster index 4aef0816a..5b8bfe250 100755 --- a/utils/create-cluster/create-cluster +++ b/utils/create-cluster/create-cluster @@ -1,6 +1,7 @@ #!/bin/bash # Settings +BIN_PATH="../../" CLUSTER_HOST=127.0.0.1 PORT=30000 TIMEOUT=2000 @@ -25,7 +26,7 @@ then while [ $((PORT < ENDPORT)) != "0" ]; do PORT=$((PORT+1)) echo "Starting $PORT" - ../../src/redis-server --port $PORT --protected-mode $PROTECTED_MODE --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes ${ADDITIONAL_OPTIONS} + $BIN_PATH/redis-server --port $PORT --protected-mode $PROTECTED_MODE --cluster-enabled yes --cluster-config-file nodes-${PORT}.conf --cluster-node-timeout $TIMEOUT --appendonly yes --appendfilename appendonly-${PORT}.aof --dbfilename dump-${PORT}.rdb --logfile ${PORT}.log --daemonize yes ${ADDITIONAL_OPTIONS} done exit 0 fi @@ -37,7 +38,7 @@ then PORT=$((PORT+1)) HOSTS="$HOSTS $CLUSTER_HOST:$PORT" done - ../../src/redis-cli --cluster create $HOSTS --cluster-replicas $REPLICAS + $BIN_PATH/redis-cli --cluster create $HOSTS --cluster-replicas $REPLICAS exit 0 fi @@ -46,7 +47,7 @@ then while [ $((PORT < ENDPORT)) != "0" ]; do PORT=$((PORT+1)) echo "Stopping $PORT" - ../../src/redis-cli -p $PORT shutdown nosave + $BIN_PATH/redis-cli -p $PORT shutdown nosave done exit 0 fi @@ -57,7 +58,7 @@ then while [ 1 ]; do clear date - ../../src/redis-cli -p $PORT cluster nodes | head -30 + $BIN_PATH/redis-cli -p $PORT cluster nodes | head -30 sleep 1 done exit 0 @@ -81,7 +82,7 @@ if [ "$1" == "call" ] then while [ $((PORT < ENDPORT)) != "0" ]; do PORT=$((PORT+1)) - ../../src/redis-cli -p $PORT $2 $3 $4 $5 $6 $7 $8 $9 + $BIN_PATH/redis-cli -p $PORT $2 $3 $4 $5 $6 $7 $8 $9 done exit 0 fi @@ -101,7 +102,7 @@ then exit 0 fi -echo "Usage: $0 [start|create|stop|watch|tail|clean]" +echo "Usage: $0 [start|create|stop|watch|tail|clean|call]" echo "start -- Launch Redis Cluster instances." echo "create -- Create a cluster using redis-cli --cluster create." echo "stop -- Stop Redis Cluster instances." @@ -110,3 +111,4 @@ echo "tail -- Run tail -f of instance at base port + ID." echo "tailall -- Run tail -f for all the log files at once." echo "clean -- Remove all instances data, logs, configs." echo "clean-logs -- Remove just instances logs." +echo "call -- Call a command (up to 7 arguments) on all nodes. " From 1541e3e522ad690230fd31be1f8349e45c5fc17b Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Fri, 28 Feb 2020 13:36:50 +0200 Subject: [PATCH 54/71] Update create-cluster --- utils/create-cluster/create-cluster | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster index 5b8bfe250..4db0a6619 100755 --- a/utils/create-cluster/create-cluster +++ b/utils/create-cluster/create-cluster @@ -111,4 +111,4 @@ echo "tail -- Run tail -f of instance at base port + ID." echo "tailall -- Run tail -f for all the log files at once." echo "clean -- Remove all instances data, logs, configs." echo "clean-logs -- Remove just instances logs." -echo "call -- Call a command (up to 7 arguments) on all nodes. " +echo "call -- Call a command (up to 7 arguments) on all nodes." From 12bb6b0f088dd584af8160c5cf03e05a1ee92fa2 Mon Sep 17 00:00:00 2001 From: hwware Date: Wed, 15 Apr 2020 22:00:36 -0400 Subject: [PATCH 55/71] Fix not used marco in cluster.c --- src/cluster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index e3adaf83a..49a410d54 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -2106,7 +2106,7 @@ int clusterProcessPacket(clusterLink *link) { resetManualFailover(); server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT; server.cluster->mf_slave = sender; - pauseClients(mstime()+(CLUSTER_MF_TIMEOUT*2)); + pauseClients(mstime()+(CLUSTER_MF_TIMEOUT*CLUSTER_MF_PAUSE_MULT)); serverLog(LL_WARNING,"Manual failover requested by replica %.40s.", sender->name); } else if (type == CLUSTERMSG_TYPE_UPDATE) { From dc3d865edc9352aa7d6315cbd00f458ae53eceec Mon Sep 17 00:00:00 2001 From: Guy Benoish Date: Wed, 22 Apr 2020 16:05:57 +0300 Subject: [PATCH 56/71] Extend XINFO STREAM output Introducing XINFO STREAM FULL --- src/t_stream.c | 226 ++++++++++++++++++++++++----- tests/unit/type/stream-cgroups.tcl | 34 +++++ 2 files changed, 226 insertions(+), 34 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index 758db637e..d6a5e0009 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -2489,16 +2489,200 @@ void xtrimCommand(client *c) { addReplyLongLong(c,deleted); } +/* Helper function for xinfoCommand. + * Handles the variants of XINFO STREAM */ +void xinfoReplyWithStreamInfo(client *c, stream *s) { + int full = 1; + long long count = 0; + robj **optv = c->argv + 3; /* Options start after XINFO STREAM */ + int optc = c->argc - 3; + + /* Parse options. */ + if (optc == 0) { + full = 0; + } else { + /* Valid options are [FULL] or [FULL COUNT ] */ + if (optc != 1 && optc != 3) { + addReplySubcommandSyntaxError(c); + return; + } + + /* First option must be "FULL" */ + if (strcasecmp(optv[0]->ptr,"full")) { + addReplySubcommandSyntaxError(c); + return; + } + + if (optc == 3) { + /* First option must be "FULL" */ + if (strcasecmp(optv[1]->ptr,"count")) { + addReplySubcommandSyntaxError(c); + return; + } + if (getLongLongFromObjectOrReply(c,optv[2],&count,NULL) == C_ERR) + return; + if (count < 0) count = 0; + } + } + + addReplyMapLen(c,full ? 6 : 7); + addReplyBulkCString(c,"length"); + addReplyLongLong(c,s->length); + addReplyBulkCString(c,"radix-tree-keys"); + addReplyLongLong(c,raxSize(s->rax)); + addReplyBulkCString(c,"radix-tree-nodes"); + addReplyLongLong(c,s->rax->numnodes); + addReplyBulkCString(c,"last-generated-id"); + addReplyStreamID(c,&s->last_id); + + if (!full) { + /* XINFO STREAM */ + + addReplyBulkCString(c,"groups"); + addReplyLongLong(c,s->cgroups ? raxSize(s->cgroups) : 0); + + /* To emit the first/last entry we use streamReplyWithRange(). */ + int emitted; + streamID start, end; + start.ms = start.seq = 0; + end.ms = end.seq = UINT64_MAX; + addReplyBulkCString(c,"first-entry"); + emitted = streamReplyWithRange(c,s,&start,&end,1,0,NULL,NULL, + STREAM_RWR_RAWENTRIES,NULL); + if (!emitted) addReplyNull(c); + addReplyBulkCString(c,"last-entry"); + emitted = streamReplyWithRange(c,s,&start,&end,1,1,NULL,NULL, + STREAM_RWR_RAWENTRIES,NULL); + if (!emitted) addReplyNull(c); + } else { + /* XINFO STREAM FULL [COUNT ] */ + + /* Stream entries */ + addReplyBulkCString(c,"entries"); + streamReplyWithRange(c,s,NULL,NULL,count,0,NULL,NULL,0,NULL); + + /* Consumer groups */ + addReplyBulkCString(c,"groups"); + if (s->cgroups == NULL) { + addReplyArrayLen(c,0); + } else { + addReplyArrayLen(c,raxSize(s->cgroups)); + raxIterator ri_cgroups; + raxStart(&ri_cgroups,s->cgroups); + raxSeek(&ri_cgroups,"^",NULL,0); + while(raxNext(&ri_cgroups)) { + streamCG *cg = ri_cgroups.data; + addReplyMapLen(c,5); + + /* Name */ + addReplyBulkCString(c,"name"); + addReplyBulkCBuffer(c,ri_cgroups.key,ri_cgroups.key_len); + + /* Last delivered ID */ + addReplyBulkCString(c,"last-delivered-id"); + addReplyStreamID(c,&cg->last_id); + + /* Group PEL count */ + addReplyBulkCString(c,"pel-count"); + addReplyLongLong(c,raxSize(cg->pel)); + + /* Group PEL */ + addReplyBulkCString(c,"pending"); + long long arraylen_cg_pel = 0; + void *arrayptr_cg_pel = addReplyDeferredLen(c); + raxIterator ri_cg_pel; + raxStart(&ri_cg_pel,cg->pel); + raxSeek(&ri_cg_pel,"^",NULL,0); + while(raxNext(&ri_cg_pel) && (!count || arraylen_cg_pel < count)) { + streamNACK *nack = ri_cg_pel.data; + addReplyArrayLen(c,4); + + /* Entry ID. */ + streamID id; + streamDecodeID(ri_cg_pel.key,&id); + addReplyStreamID(c,&id); + + /* Consumer name. */ + addReplyBulkCBuffer(c,nack->consumer->name, + sdslen(nack->consumer->name)); + + /* Last delivery. */ + addReplyLongLong(c,nack->delivery_time); + + /* Number of deliveries. */ + addReplyLongLong(c,nack->delivery_count); + + arraylen_cg_pel++; + } + setDeferredArrayLen(c,arrayptr_cg_pel,arraylen_cg_pel); + raxStop(&ri_cg_pel); + + /* Consumers */ + addReplyBulkCString(c,"consumers"); + addReplyArrayLen(c,raxSize(cg->consumers)); + raxIterator ri_consumers; + raxStart(&ri_consumers,cg->consumers); + raxSeek(&ri_consumers,"^",NULL,0); + while(raxNext(&ri_consumers)) { + streamConsumer *consumer = ri_consumers.data; + addReplyMapLen(c,4); + + /* Consumer name */ + addReplyBulkCString(c,"name"); + addReplyBulkCBuffer(c,consumer->name,sdslen(consumer->name)); + + /* Seen-time */ + addReplyBulkCString(c,"seen-time"); + addReplyLongLong(c,consumer->seen_time); + + /* Consumer PEL count */ + addReplyBulkCString(c,"pel-count"); + addReplyLongLong(c,raxSize(consumer->pel)); + + /* Consumer PEL */ + addReplyBulkCString(c,"pending"); + long long arraylen_cpel = 0; + void *arrayptr_cpel = addReplyDeferredLen(c); + raxIterator ri_cpel; + raxStart(&ri_cpel,consumer->pel); + raxSeek(&ri_cpel,"^",NULL,0); + while(raxNext(&ri_cpel) && (!count || arraylen_cpel < count)) { + streamNACK *nack = ri_cpel.data; + addReplyArrayLen(c,3); + + /* Entry ID. */ + streamID id; + streamDecodeID(ri_cpel.key,&id); + addReplyStreamID(c,&id); + + /* Last delivery. */ + addReplyLongLong(c,nack->delivery_time); + + /* Number of deliveries. */ + addReplyLongLong(c,nack->delivery_count); + + arraylen_cpel++; + } + setDeferredArrayLen(c,arrayptr_cpel,arraylen_cpel); + raxStop(&ri_cpel); + } + raxStop(&ri_consumers); + } + raxStop(&ri_cgroups); + } + } +} + /* XINFO CONSUMERS * XINFO GROUPS - * XINFO STREAM + * XINFO STREAM [FULL [COUNT ]] * XINFO HELP. */ void xinfoCommand(client *c) { const char *help[] = { -"CONSUMERS -- Show consumer groups of group .", -"GROUPS -- Show the stream consumer groups.", -"STREAM -- Show information about the stream.", -"HELP -- Print this help.", +"CONSUMERS -- Show consumer groups of group .", +"GROUPS -- Show the stream consumer groups.", +"STREAM [FULL [COUNT ]] -- Show information about the stream.", +"HELP -- Print this help.", NULL }; stream *s = NULL; @@ -2578,36 +2762,10 @@ NULL addReplyStreamID(c,&cg->last_id); } raxStop(&ri); - } else if (!strcasecmp(opt,"STREAM") && c->argc == 3) { - /* XINFO STREAM (or the alias XINFO ). */ - addReplyMapLen(c,7); - addReplyBulkCString(c,"length"); - addReplyLongLong(c,s->length); - addReplyBulkCString(c,"radix-tree-keys"); - addReplyLongLong(c,raxSize(s->rax)); - addReplyBulkCString(c,"radix-tree-nodes"); - addReplyLongLong(c,s->rax->numnodes); - addReplyBulkCString(c,"groups"); - addReplyLongLong(c,s->cgroups ? raxSize(s->cgroups) : 0); - addReplyBulkCString(c,"last-generated-id"); - addReplyStreamID(c,&s->last_id); - - /* To emit the first/last entry we us the streamReplyWithRange() - * API. */ - int count; - streamID start, end; - start.ms = start.seq = 0; - end.ms = end.seq = UINT64_MAX; - addReplyBulkCString(c,"first-entry"); - count = streamReplyWithRange(c,s,&start,&end,1,0,NULL,NULL, - STREAM_RWR_RAWENTRIES,NULL); - if (!count) addReplyNull(c); - addReplyBulkCString(c,"last-entry"); - count = streamReplyWithRange(c,s,&start,&end,1,1,NULL,NULL, - STREAM_RWR_RAWENTRIES,NULL); - if (!count) addReplyNull(c); + } else if (!strcasecmp(opt,"STREAM")) { + /* XINFO STREAM [FULL [COUNT ]]. */ + xinfoReplyWithStreamInfo(c,s); } else { addReplySubcommandSyntaxError(c); } } - diff --git a/tests/unit/type/stream-cgroups.tcl b/tests/unit/type/stream-cgroups.tcl index 04661707b..dfcd735f6 100644 --- a/tests/unit/type/stream-cgroups.tcl +++ b/tests/unit/type/stream-cgroups.tcl @@ -294,6 +294,40 @@ start_server { assert {[lindex $reply 0 3] == 2} } + test {XINFO FULL output} { + r del x + r XADD x 100 a 1 + r XADD x 101 b 1 + r XADD x 102 c 1 + r XADD x 103 e 1 + r XADD x 104 f 1 + r XGROUP CREATE x g1 0 + r XGROUP CREATE x g2 0 + r XREADGROUP GROUP g1 Alice COUNT 1 STREAMS x > + r XREADGROUP GROUP g1 Bob COUNT 1 STREAMS x > + r XREADGROUP GROUP g1 Bob NOACK COUNT 1 STREAMS x > + r XREADGROUP GROUP g2 Charlie COUNT 4 STREAMS x > + r XDEL x 103 + + set reply [r XINFO STREAM x FULL] + assert_equal [llength $reply] 12 + assert_equal [lindex $reply 1] 4 ;# stream length + assert_equal [lindex $reply 9] "{100-0 {a 1}} {101-0 {b 1}} {102-0 {c 1}} {104-0 {f 1}}" ;# entries + assert_equal [lindex $reply 11 0 1] "g1" ;# first group name + assert_equal [lindex $reply 11 0 7 0 0] "100-0" ;# first entry in group's PEL + assert_equal [lindex $reply 11 0 9 0 1] "Alice" ;# first consumer + assert_equal [lindex $reply 11 0 9 0 7 0 0] "100-0" ;# first entry in first consumer's PEL + assert_equal [lindex $reply 11 1 1] "g2" ;# second group name + assert_equal [lindex $reply 11 1 9 0 1] "Charlie" ;# first consumer + assert_equal [lindex $reply 11 1 9 0 7 0 0] "100-0" ;# first entry in first consumer's PEL + assert_equal [lindex $reply 11 1 9 0 7 1 0] "101-0" ;# second entry in first consumer's PEL + + set reply [r XINFO STREAM x FULL COUNT 1] + assert_equal [llength $reply] 12 + assert_equal [lindex $reply 1] 4 + assert_equal [lindex $reply 9] "{100-0 {a 1}}" + } + start_server {} { set master [srv -1 client] set master_host [srv -1 host] From d66ac30fd4e6083523dace4c74307a037fb68592 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 28 Apr 2020 16:40:15 +0200 Subject: [PATCH 57/71] Fix create-cluster BIN_PATH. --- utils/create-cluster/create-cluster | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster index 4db0a6619..931f6f521 100755 --- a/utils/create-cluster/create-cluster +++ b/utils/create-cluster/create-cluster @@ -1,7 +1,7 @@ #!/bin/bash # Settings -BIN_PATH="../../" +BIN_PATH="../../src/" CLUSTER_HOST=127.0.0.1 PORT=30000 TIMEOUT=2000 From e9811c3b12542569ad9f50478bb3899da522fe46 Mon Sep 17 00:00:00 2001 From: srzhao Date: Tue, 26 Nov 2019 10:43:57 +0800 Subject: [PATCH 58/71] fix pipelined WAIT performance issue. If client gets blocked again in `processUnblockedClients`, redis will not send `REPLCONF GETACK *` to slaves untill next eventloop, so the client will be blocked for 100ms by default(10hz) if no other file event fired. move server.get_ack_from_slaves sinppet after `processUnblockedClients`, so that both the first WAIT command that puts client in blocked context and the following WAIT command processed in processUnblockedClients would trigger redis-sever to send `REPLCONF GETACK *`, so that the eventloop would get `REPLCONG ACK ` from slaves and unblocked ASAP. --- src/server.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/server.c b/src/server.c index ab6280121..150476276 100644 --- a/src/server.c +++ b/src/server.c @@ -2110,6 +2110,19 @@ void beforeSleep(struct aeEventLoop *eventLoop) { if (server.active_expire_enabled && server.masterhost == NULL) activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST); + /* Unblock all the clients blocked for synchronous replication + * in WAIT. */ + if (listLength(server.clients_waiting_acks)) + processClientsWaitingReplicas(); + + /* Check if there are clients unblocked by modules that implement + * blocking commands. */ + if (moduleCount()) moduleHandleBlockedClients(); + + /* Try to process pending commands for clients that were just unblocked. */ + if (listLength(server.unblocked_clients)) + processUnblockedClients(); + /* Send all the slaves an ACK request if at least one client blocked * during the previous event loop iteration. */ if (server.get_ack_from_slaves) { @@ -2125,19 +2138,6 @@ void beforeSleep(struct aeEventLoop *eventLoop) { server.get_ack_from_slaves = 0; } - /* Unblock all the clients blocked for synchronous replication - * in WAIT. */ - if (listLength(server.clients_waiting_acks)) - processClientsWaitingReplicas(); - - /* Check if there are clients unblocked by modules that implement - * blocking commands. */ - if (moduleCount()) moduleHandleBlockedClients(); - - /* Try to process pending commands for clients that were just unblocked. */ - if (listLength(server.unblocked_clients)) - processUnblockedClients(); - /* Send the invalidation messages to clients participating to the * client side caching protocol in broadcasting (BCAST) mode. */ trackingBroadcastInvalidationMessages(); From 78b9c097c94bcad980d1a04512d66a59d0e78632 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Apr 2020 11:16:30 +0200 Subject: [PATCH 59/71] Comment clearly why we moved some code in #6623. --- src/server.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index 150476276..659604ef3 100644 --- a/src/server.c +++ b/src/server.c @@ -2124,7 +2124,10 @@ void beforeSleep(struct aeEventLoop *eventLoop) { processUnblockedClients(); /* Send all the slaves an ACK request if at least one client blocked - * during the previous event loop iteration. */ + * during the previous event loop iteration. Note that we do this after + * processUnblockedClients(), so if there are multiple pipelined WAITs + * and the just unblocked WAIT gets blocked again, we don't have to wait + * a server cron cycle in absence of other event loop events. See #6623. */ if (server.get_ack_from_slaves) { robj *argv[3]; From 20a9fe531ca8a520400a92e43268bc113c7e2c32 Mon Sep 17 00:00:00 2001 From: Guy Benoish Date: Tue, 28 Apr 2020 17:58:25 +0300 Subject: [PATCH 60/71] XINFO STREAM FULL should have a default COUNT of 10 --- src/t_stream.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index d6a5e0009..5c1b9a523 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -2493,7 +2493,7 @@ void xtrimCommand(client *c) { * Handles the variants of XINFO STREAM */ void xinfoReplyWithStreamInfo(client *c, stream *s) { int full = 1; - long long count = 0; + long long count = 10; /* Default COUNT is 10 so we don't block the server */ robj **optv = c->argv + 3; /* Options start after XINFO STREAM */ int optc = c->argc - 3; @@ -2521,7 +2521,7 @@ void xinfoReplyWithStreamInfo(client *c, stream *s) { } if (getLongLongFromObjectOrReply(c,optv[2],&count,NULL) == C_ERR) return; - if (count < 0) count = 0; + if (count < 0) count = 10; } } @@ -2548,11 +2548,11 @@ void xinfoReplyWithStreamInfo(client *c, stream *s) { end.ms = end.seq = UINT64_MAX; addReplyBulkCString(c,"first-entry"); emitted = streamReplyWithRange(c,s,&start,&end,1,0,NULL,NULL, - STREAM_RWR_RAWENTRIES,NULL); + STREAM_RWR_RAWENTRIES,NULL); if (!emitted) addReplyNull(c); addReplyBulkCString(c,"last-entry"); emitted = streamReplyWithRange(c,s,&start,&end,1,1,NULL,NULL, - STREAM_RWR_RAWENTRIES,NULL); + STREAM_RWR_RAWENTRIES,NULL); if (!emitted) addReplyNull(c); } else { /* XINFO STREAM FULL [COUNT ] */ @@ -2682,6 +2682,10 @@ void xinfoCommand(client *c) { "CONSUMERS -- Show consumer groups of group .", "GROUPS -- Show the stream consumer groups.", "STREAM [FULL [COUNT ]] -- Show information about the stream.", +" FULL will return the full state of the stream,", +" including all entries, groups, consumers and PELs.", +" It's possible to show only the first stream/PEL entries", +" by using the COUNT modifier (Default is 10)", "HELP -- Print this help.", NULL }; From dbf803bf9c542172fd783ba30e6c32b09ab24077 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Apr 2020 12:37:12 +0200 Subject: [PATCH 61/71] redis-cli: try to make clusterManagerFixOpenSlot() more readable. Also improve the message to make clear that there is no *clear* owner, not that there is no owner at all. --- src/redis-cli.c | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 72480d08c..469dbb0ff 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4596,20 +4596,26 @@ static int clusterManagerFixOpenSlot(int slot) { /* Try to obtain the current slot owner, according to the current * nodes configuration. */ int success = 1; - list *owners = listCreate(); + list *owners = listCreate(); /* List of nodes claiming some ownership. + it could be stating in the configuration + to have the node ownership, or just + holding keys for such slot. */ list *migrating = listCreate(); list *importing = listCreate(); sds migrating_str = sdsempty(); sds importing_str = sdsempty(); - clusterManagerNode *owner = NULL; + clusterManagerNode *owner = NULL; /* The obvious slot owner if any. */ + + /* Iterate all the nodes, looking for potential owners of this slot. */ listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; - if (n->slots[slot]) listAddNodeTail(owners, n); - else { + if (n->slots[slot]) { + listAddNodeTail(owners, n); + } else { redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOT %d", slot); success = clusterManagerCheckRedisReply(n, r, NULL); @@ -4623,7 +4629,14 @@ static int clusterManagerFixOpenSlot(int slot) { if (!success) goto cleanup; } } + + /* If we have only a single potential owner for this slot, + * set it as "owner". */ if (listLength(owners) == 1) owner = listFirst(owners)->value; + + /* Scan the list of nodes again, in order to populate the + * list of nodes in importing or migrating state for + * this slot. */ listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; @@ -4655,6 +4668,7 @@ static int clusterManagerFixOpenSlot(int slot) { } } } + /* If the node is neither migrating nor importing and it's not * the owner, then is added to the importing list in case * it has keys in the slot. */ @@ -4679,11 +4693,12 @@ static int clusterManagerFixOpenSlot(int slot) { printf("Set as migrating in: %s\n", migrating_str); if (sdslen(importing_str) > 0) printf("Set as importing in: %s\n", importing_str); + /* If there is no slot owner, set as owner the node with the biggest * number of keys, among the set of migrating / importing nodes. */ if (owner == NULL) { - clusterManagerLogInfo(">>> Nobody claims ownership, " - "selecting an owner...\n"); + clusterManagerLogInfo(">>> No single clear owner for the slot, " + "selecting an owner by # of keys...\n"); owner = clusterManagerGetNodeWithMostKeysInSlot(cluster_manager.nodes, slot, NULL); // If we still don't have an owner, we can't fix it. @@ -4714,6 +4729,7 @@ static int clusterManagerFixOpenSlot(int slot) { clusterManagerRemoveNodeFromList(migrating, owner); clusterManagerRemoveNodeFromList(importing, owner); } + /* If there are multiple owners of the slot, we need to fix it * so that a single node is the owner and all the other nodes * are in importing state. Later the fix can be handled by one @@ -4746,6 +4762,7 @@ static int clusterManagerFixOpenSlot(int slot) { } } int move_opts = CLUSTER_MANAGER_OPT_VERBOSE; + /* Case 1: The slot is in migrating state in one node, and in * importing state in 1 node. That's trivial to address. */ if (listLength(migrating) == 1 && listLength(importing) == 1) { @@ -4757,6 +4774,7 @@ static int clusterManagerFixOpenSlot(int slot) { move_opts |= CLUSTER_MANAGER_OPT_UPDATE; success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL); } + /* Case 2: There are multiple nodes that claim the slot as importing, * they probably got keys about the slot after a restart so opened * the slot. In this case we just move all the keys to the owner @@ -4787,6 +4805,7 @@ static int clusterManagerFixOpenSlot(int slot) { if (!success) goto cleanup; } } + /* Case 3: The slot is in migrating state in one node but multiple * other nodes claim to be in importing state and don't have any key in * the slot. We search for the importing node having the same ID as From 9a9953d331f481fbc3435e4e4efff010d7ddfa4d Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Apr 2020 16:28:16 +0200 Subject: [PATCH 62/71] redis-cli: simplify cluster nodes coverage display. --- src/redis-cli.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 469dbb0ff..82a46216c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4295,17 +4295,18 @@ static int clusterManagerGetCoveredSlots(char *all_slots) { } static void clusterManagerPrintSlotsList(list *slots) { + clusterManagerNode n = {0}; listIter li; listNode *ln; listRewind(slots, &li); - sds first = NULL; while ((ln = listNext(&li)) != NULL) { - sds slot = ln->value; - if (!first) first = slot; - else printf(", "); - printf("%s", slot); + int slot = atoi(ln->value); + if (slot >= 0 && slot < CLUSTER_MANAGER_SLOTS) + n.slots[slot] = 1; } - printf("\n"); + sds nodeslist = clusterManagerNodeSlotsString(&n); + printf("%s\n", nodeslist); + sdsfree(nodeslist); } /* Return the node, among 'nodes' with the greatest number of keys @@ -4398,15 +4399,10 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { int i, fixed = 0; list *none = NULL, *single = NULL, *multi = NULL; clusterManagerLogInfo(">>> Fixing slots coverage...\n"); - printf("List of not covered slots: \n"); - int uncovered_count = 0; - sds log = sdsempty(); for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { int covered = all_slots[i]; if (!covered) { - sds key = sdsfromlonglong((long long) i); - if (uncovered_count++ > 0) printf(","); - printf("%s", (char *) key); + sds slot = sdsfromlonglong((long long) i); list *slot_nodes = listCreate(); sds slot_nodes_str = sdsempty(); listIter li; @@ -4433,13 +4429,11 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { } freeReplyObject(reply); } - log = sdscatfmt(log, "\nSlot %S has keys in %u nodes: %S", - key, listLength(slot_nodes), slot_nodes_str); sdsfree(slot_nodes_str); - dictAdd(clusterManagerUncoveredSlots, key, slot_nodes); + dictAdd(clusterManagerUncoveredSlots, slot, slot_nodes); } } - printf("\n%s\n", log); + /* For every slot, take action depending on the actual condition: * 1) No node has keys for this slot. * 2) A single node has keys for this slot. @@ -4581,7 +4575,6 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { } } cleanup: - sdsfree(log); if (none) listRelease(none); if (single) listRelease(single); if (multi) listRelease(multi); From 99569af4aa83a626d7166ee09799ba7c3a09a0c8 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Apr 2020 16:57:06 +0200 Subject: [PATCH 63/71] redis-cli: safer cluster fix with unreachalbe masters. --- src/redis-cli.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 82a46216c..ec0f58f5f 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -123,6 +123,7 @@ #define CLUSTER_MANAGER_CMD_FLAG_COPY 1 << 7 #define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 8 #define CLUSTER_MANAGER_CMD_FLAG_CHECK_OWNERS 1 << 9 +#define CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS 1 << 10 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -1599,6 +1600,9 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-search-multiple-owners")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_CHECK_OWNERS; + } else if (!strcmp(argv[i],"--cluster-fix-with-unreachable-masters")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS; #ifdef USE_OPENSSL } else if (!strcmp(argv[i],"--tls")) { config.tls = 1; @@ -2146,6 +2150,7 @@ static int evalMode(int argc, char **argv) { static struct clusterManager { list *nodes; /* List of nodes in the configuration. */ list *errors; + int unreachable_masters; /* Masters we are not able to reach. */ } cluster_manager; /* Used by clusterManagerFixSlotsCoverage */ @@ -2288,7 +2293,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { "search-multiple-owners"}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", - "search-multiple-owners"}, + "search-multiple-owners,fix-with-unreachable-masters"}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline ," "replace"}, @@ -4013,7 +4018,9 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | CLUSTER_MANAGER_FLAG_DISCONNECT | CLUSTER_MANAGER_FLAG_FAIL)) + { goto invalid_friend; + } listAddNodeTail(cluster_manager.nodes, friend); } else { clusterManagerLogErr("[ERR] Unable to load info for " @@ -4023,6 +4030,8 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { } continue; invalid_friend: + if (!(friend->flags & CLUSTER_MANAGER_FLAG_SLAVE)) + cluster_manager.unreachable_masters++; freeClusterManagerNode(friend); } listRelease(node->friends); @@ -4396,6 +4405,14 @@ static clusterManagerNode *clusterManagerNodeMasterRandom() { } static int clusterManagerFixSlotsCoverage(char *all_slots) { + int force_fix = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS; + + if (cluster_manager.unreachable_masters > 0 && !force_fix) { + clusterManagerLogWarn("*** Fixing slots coverage with %d unreachable masters is dangerous: redis-cli will assume that slots about masters that are not reachable are not covered, and will try to reassign them to the reachable nodes. This can cause data loss and is rarely what you want to do. If you really want to proceed use the --cluster-fix-with-unreachable-masters option.\n", cluster_manager.unreachable_masters); + exit(1); + } + int i, fixed = 0; list *none = NULL, *single = NULL, *multi = NULL; clusterManagerLogInfo(">>> Fixing slots coverage...\n"); @@ -4585,6 +4602,14 @@ cleanup: * more nodes. This function fixes this condition by migrating keys where * it seems more sensible. */ static int clusterManagerFixOpenSlot(int slot) { + int force_fix = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS; + + if (cluster_manager.unreachable_masters > 0 && !force_fix) { + clusterManagerLogWarn("*** Fixing open slots with %d unreachable masters is dangerous: redis-cli will assume that slots about masters that are not reachable are not covered, and will try to reassign them to the reachable nodes. This can cause data loss and is rarely what you want to do. If you really want to proceed use the --cluster-fix-with-unreachable-masters option.\n", cluster_manager.unreachable_masters); + exit(1); + } + clusterManagerLogInfo(">>> Fixing open slot %d\n", slot); /* Try to obtain the current slot owner, according to the current * nodes configuration. */ From aec7e4a8362ed4d06f853f8fe7a371b6e49d2149 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 29 Apr 2020 18:49:42 +0200 Subject: [PATCH 64/71] Fix tracking table max keys option in redis.conf. --- redis.conf | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/redis.conf b/redis.conf index fab0a5898..6cec636bc 100644 --- a/redis.conf +++ b/redis.conf @@ -626,20 +626,23 @@ replica-priority 100 # to track the keys fetched by many clients. # # For this reason it is possible to configure a maximum fill value for the -# invalidation table. By default it is set to 10%, and once this limit is -# reached, Redis will start to evict caching slots in the invalidation table -# even if keys are not modified, just to reclaim memory: this will in turn +# invalidation table. By default it is set to 1M of keys, and once this limit +# is reached, Redis will start to evict keys in the invalidation table +# even if they were not modified, just to reclaim memory: this will in turn # force the clients to invalidate the cached values. Basically the table -# maximum fill rate is a trade off between the memory you want to spend server +# maximum size is a trade off between the memory you want to spend server # side to track information about who cached what, and the ability of clients # to retain cached objects in memory. # -# If you set the value to 0, it means there are no limits, and all the 16 -# millions of caching slots can be used at the same time. In the "stats" -# INFO section, you can find information about the amount of caching slots -# used at every given moment. +# If you set the value to 0, it means there are no limits, and Redis will +# retain as many keys as needed in the invalidation table. +# In the "stats" INFO section, you can find information about the number of +# keys in the invalidation table at every given moment. # -# tracking-table-max-fill 10 +# Note: when key tracking is used in broadcasting mode, no memory is used +# in the server side so this setting is useless. +# +# tracking-table-max-keys 1000000 ################################## SECURITY ################################### From 4f61650c3c9faa6e9eb693852c7306280e26b5ee Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 30 Apr 2020 09:58:06 +0200 Subject: [PATCH 65/71] CLIENT KILL USER . --- src/networking.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/networking.c b/src/networking.c index 7ffa99eb1..767206ab9 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2101,6 +2101,7 @@ void clientCommand(client *c) { "KILL