From 63eb1114892070a82da04d2659f8985f4763c085 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Wed, 21 Feb 2018 11:04:13 +0200 Subject: [PATCH 001/304] Fix zrealloc to behave similarly to je_realloc when size is 0 According to C11, the behavior of realloc with size 0 is now deprecated. it can either behave as free(ptr) and return NULL, or return a valid pointer. but in zmalloc it can lead to zmalloc_oom_handler and panic. and that can affect modules that use it. It looks like both glibc allocator and jemalloc behave like so: realloc(malloc(32),0) returns NULL realloc(NULL,0) returns a valid pointer This commit changes zmalloc to behave the same --- src/zmalloc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/zmalloc.c b/src/zmalloc.c index 094dd80fa..01ac8c797 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -147,6 +147,10 @@ void *zrealloc(void *ptr, size_t size) { size_t oldsize; void *newptr; + if (size == 0 && ptr!=NULL) { + zfree(ptr); + return NULL; + } if (ptr == NULL) return zmalloc(size); #ifdef HAVE_MALLOC_SIZE oldsize = zmalloc_size(ptr); From 327594b0307421dcfde4e6bc96029d43c5579b26 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Mon, 13 Aug 2018 17:43:29 +0300 Subject: [PATCH 002/304] Add log when server dies of SIGTERM during loading this is very confusing to see the server disappears as if it got SIGKILL when it was not the case. --- src/server.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server.c b/src/server.c index b537ee04a..0c665033a 100644 --- a/src/server.c +++ b/src/server.c @@ -3780,6 +3780,7 @@ static void sigShutdownHandler(int sig) { rdbRemoveTempFile(getpid()); exit(1); /* Exit with an error since this was not a clean shutdown. */ } else if (server.loading) { + serverLogFromHandler(LL_WARNING, "Received shutdown signal during loading, exiting now."); exit(0); } From 26b109ae23b4ad50b0952c6239558877697eb1ce Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 31 Dec 2018 11:51:03 +0200 Subject: [PATCH 003/304] Make dbSwapDatabases take args as long This prevents an integer overflow bug. Closes #5737. --- src/db.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index 62c8aa131..bac15a6bb 100644 --- a/src/db.c +++ b/src/db.c @@ -997,7 +997,7 @@ void scanDatabaseForReadyLists(redisDb *db) { * * Returns C_ERR if at least one of the DB ids are out of range, otherwise * C_OK is returned. */ -int dbSwapDatabases(int id1, int id2) { +int dbSwapDatabases(long id1, long id2) { if (id1 < 0 || id1 >= server.dbnum || id2 < 0 || id2 >= server.dbnum) return C_ERR; if (id1 == id2) return C_OK; From d8f237a761967ea742babc812f2ebb5b049f871e Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Tue, 15 Jan 2019 07:26:19 +0000 Subject: [PATCH 004/304] Fixed a rounding bug in geo.tcl --- tests/unit/geo.tcl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/unit/geo.tcl b/tests/unit/geo.tcl index 604697be4..49e421ee9 100644 --- a/tests/unit/geo.tcl +++ b/tests/unit/geo.tcl @@ -61,6 +61,7 @@ set regression_vectors { {939895 151 59.149620271823181 65.204186651485145} {1412 156 149.29737817929004 15.95807862745508} {564862 149 84.062063109158544 -65.685403922426232} + {1546032440391 16751 -1.8175081637769495 20.665668878082954} } set rv_idx 0 @@ -274,8 +275,19 @@ start_server {tags {"geo"}} { foreach place $diff { set mydist [geo_distance $lon $lat $search_lon $search_lat] set mydist [expr $mydist/1000] - if {($mydist / $radius_km) > 0.999} {incr rounding_errors} + if {($mydist / $radius_km) > 0.999} { + incr rounding_errors + continue + } + if {$mydist < $radius_m} { + # This is a false positive for redis since given the + # same points the higher precision calculation provided + # by TCL shows the point within range + incr rounding_errors + continue + } } + # Make sure this is a real error and not a rounidng issue. if {[llength $diff] == $rounding_errors} { set res $res2; # Error silenced From efd66faa9c7d88da02549f435e56050152cba3b9 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Mon, 18 Feb 2019 22:48:55 +0800 Subject: [PATCH 005/304] benchmark: add auth check in benchmark When we run benchmark but forget to set the right requirepass, benchmark should return error. --- src/redis-benchmark.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 31f91eb0f..4f0f3404a 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -204,6 +204,12 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { if (redisBufferRead(c->context) != REDIS_OK) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); + } + else if (strlen(c->context->reader->buf)>=32 + && !strncmp(c->context->reader->buf,"-NOAUTH Authentication required.", 32)) + { + fprintf(stderr,"Error: %s\n",c->context->reader->buf); + exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { From f66aed65984f731d7183a78cfa12d7fc4d7d06f0 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Wed, 27 Feb 2019 21:20:00 +0800 Subject: [PATCH 006/304] fix: fix sentinel command table and new flags format --- src/sentinel.c | 13 +++++++++---- src/server.h | 1 + 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/sentinel.c b/src/sentinel.c index 4d03c9c12..92ea75436 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -450,11 +450,11 @@ struct redisCommand sentinelcmds[] = { {"punsubscribe",punsubscribeCommand,-1,"",0,NULL,0,0,0,0,0}, {"publish",sentinelPublishCommand,3,"",0,NULL,0,0,0,0,0}, {"info",sentinelInfoCommand,-1,"",0,NULL,0,0,0,0,0}, - {"role",sentinelRoleCommand,1,"l",0,NULL,0,0,0,0,0}, - {"client",clientCommand,-2,"rs",0,NULL,0,0,0,0,0}, + {"role",sentinelRoleCommand,1,"ok-loading",0,NULL,0,0,0,0,0}, + {"client",clientCommand,-2,"read-only no-script",0,NULL,0,0,0,0,0}, {"shutdown",shutdownCommand,-1,"",0,NULL,0,0,0,0,0}, - {"auth",authCommand,2,"sltF",0,NULL,0,0,0,0,0}, - {"hello",helloCommand,-2,"sF",0,NULL,0,0,0,0,0} + {"auth",authCommand,2,"no-script ok-loading ok-stale fast",0,NULL,0,0,0,0,0}, + {"hello",helloCommand,-2,"no-script fast",0,NULL,0,0,0,0,0} }; /* This function overwrites a few normal Redis config default with Sentinel @@ -477,6 +477,11 @@ void initSentinel(void) { retval = dictAdd(server.commands, sdsnew(cmd->name), cmd); serverAssert(retval == DICT_OK); + + /* Translate the command string flags description into an actual + * set of flags. */ + if (populateCommandTableParseFlags(cmd,cmd->sflags) == C_ERR) + serverPanic("Unsupported command flag"); } /* Initialize various data structures. */ diff --git a/src/server.h b/src/server.h index 994952654..c29a40b6b 100644 --- a/src/server.h +++ b/src/server.h @@ -2264,6 +2264,7 @@ void serverLogHexDump(int level, char *descr, void *value, size_t len); int memtest_preserving_test(unsigned long *m, size_t bytes, int passes); void mixDigest(unsigned char *digest, void *ptr, size_t len); void xorDigest(unsigned char *digest, void *ptr, size_t len); +int populateCommandTableParseFlags(struct redisCommand *c, char *strflags); #define redisDebug(fmt, ...) \ printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__) From 4bfc1763c0d283204cfbaaacefe87a4883f819c8 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Wed, 13 Mar 2019 20:46:33 +0800 Subject: [PATCH 007/304] fix: fix benchmark cannot exit when NOAUTH err happens --- src/redis-benchmark.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 2c53bc936..edeaf3a25 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -419,11 +419,10 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); } - else if (strlen(c->context->reader->buf)>=32 - && !strncmp(c->context->reader->buf,"-NOAUTH Authentication required.", 32)) + else if (NULL != strstr(c->context->reader->buf,"NOAUTH")) { fprintf(stderr,"Error: %s\n",c->context->reader->buf); - exit(1); + exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { From b4f77cc43aefb0710fc11d3103c2fa094d10e029 Mon Sep 17 00:00:00 2001 From: Jim Brunner Date: Wed, 13 Mar 2019 16:31:24 +0000 Subject: [PATCH 008/304] Addition of OnUnload function --- src/module.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/module.c b/src/module.c index 5ad999751..ba38ed8d4 100644 --- a/src/module.c +++ b/src/module.c @@ -4799,6 +4799,23 @@ int moduleUnload(sds name) { errno = EBUSY; return REDISMODULE_ERR; } + + /* Give module a chance to clean up. */ + int (*onunload)(void *); + onunload = (int (*)(void *))(unsigned long) dlsym(module->handle, "RedisModule_OnUnload"); + if (onunload) { + RedisModuleCtx ctx = REDISMODULE_CTX_INIT; + ctx.module = module; + ctx.client = moduleFreeContextReusedClient; + int unload_status = onunload((void*)&ctx); + moduleFreeContext(&ctx); + + if (unload_status == REDISMODULE_ERR) { + serverLog(LL_WARNING, "Module %s OnUnload failed. Unload canceled.", name); + errno = ECANCELED; + return REDISMODULE_ERR; + } + } moduleUnregisterCommands(module); From 3137f26d4c6fe0ca6e9ea4dc9e31eac89ec548e9 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 3 Jun 2018 15:37:48 +0300 Subject: [PATCH 009/304] Add RedisModule_Assert() API call. --- src/module.c | 10 ++++++++++ src/redismodule.h | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/src/module.c b/src/module.c index 8954fcdf0..c6e6e5989 100644 --- a/src/module.c +++ b/src/module.c @@ -3461,6 +3461,15 @@ void RM_LogIOError(RedisModuleIO *io, const char *levelstr, const char *fmt, ... va_end(ap); } +/* Redis-like assert function. + * + * A failed assertion will shut down the server and produce logging information + * that looks identical to information generated by Redis itself. + */ +void RM__Assert(const char *estr, const char *file, int line) { + _serverAssert(estr, file, line); +} + /* -------------------------------------------------------------------------- * Blocking clients from modules * -------------------------------------------------------------------------- */ @@ -4993,6 +5002,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(EmitAOF); REGISTER_API(Log); REGISTER_API(LogIOError); + REGISTER_API(_Assert); REGISTER_API(StringAppendBuffer); REGISTER_API(RetainString); REGISTER_API(StringCompare); diff --git a/src/redismodule.h b/src/redismodule.h index d18c38881..db32df049 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -271,6 +271,7 @@ void REDISMODULE_API_FUNC(RedisModule_SaveFloat)(RedisModuleIO *io, float value) float REDISMODULE_API_FUNC(RedisModule_LoadFloat)(RedisModuleIO *io); void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...); void REDISMODULE_API_FUNC(RedisModule_LogIOError)(RedisModuleIO *io, const char *levelstr, const char *fmt, ...); +void REDISMODULE_API_FUNC(RedisModule__Assert)(const char *estr, const char *file, int line); int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len); void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b); @@ -433,6 +434,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(EmitAOF); REDISMODULE_GET_API(Log); REDISMODULE_GET_API(LogIOError); + REDISMODULE_GET_API(_Assert); REDISMODULE_GET_API(StringAppendBuffer); REDISMODULE_GET_API(RetainString); REDISMODULE_GET_API(StringCompare); @@ -499,6 +501,8 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int return REDISMODULE_OK; } +#define RedisModule_Assert(_e) ((_e)?(void)0 : (RedisModule__Assert(#_e,__FILE__,__LINE__),exit(1))) + #else /* Things only defined for the modules core, not exported to modules From dd405d4026274d139216deef695f99df91ee69bd Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Wed, 30 Nov 2016 21:47:02 +0200 Subject: [PATCH 010/304] Add RedisModule_GetKeyNameFromIO(). --- src/aof.c | 2 +- src/cluster.c | 10 +++++----- src/module.c | 9 +++++++++ src/rdb.c | 14 +++++++------- src/rdb.h | 4 ++-- src/redis-check-rdb.c | 2 +- src/redismodule.h | 2 ++ src/server.h | 4 +++- 8 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/aof.c b/src/aof.c index cafcf961c..615eebd01 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1239,7 +1239,7 @@ int rewriteModuleObject(rio *r, robj *key, robj *o) { RedisModuleIO io; moduleValue *mv = o->ptr; moduleType *mt = mv->type; - moduleInitIOContext(io,mt,r); + moduleInitIOContext(io,mt,r,key); mt->aof_rewrite(&io,key,mv->value); if (io.ctx) { moduleFreeContext(io.ctx); diff --git a/src/cluster.c b/src/cluster.c index 50a9ae687..c85e3791d 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -4776,7 +4776,7 @@ NULL /* Generates a DUMP-format representation of the object 'o', adding it to the * io stream pointed by 'rio'. This function can't fail. */ -void createDumpPayload(rio *payload, robj *o) { +void createDumpPayload(rio *payload, robj *o, robj *key) { unsigned char buf[2]; uint64_t crc; @@ -4784,7 +4784,7 @@ void createDumpPayload(rio *payload, robj *o) { * byte followed by the serialized object. This is understood by RESTORE. */ rioInitWithBuffer(payload,sdsempty()); serverAssert(rdbSaveObjectType(payload,o)); - serverAssert(rdbSaveObject(payload,o)); + serverAssert(rdbSaveObject(payload,o,key)); /* Write the footer, this is how it looks like: * ----------------+---------------------+---------------+ @@ -4842,7 +4842,7 @@ void dumpCommand(client *c) { } /* Create the DUMP encoded representation. */ - createDumpPayload(&payload,o); + createDumpPayload(&payload,o,c->argv[1]); /* Transfer to the client */ dumpobj = createObject(OBJ_STRING,payload.io.buffer.ptr); @@ -4915,7 +4915,7 @@ void restoreCommand(client *c) { rioInitWithBuffer(&payload,c->argv[3]->ptr); if (((type = rdbLoadObjectType(&payload)) == -1) || - ((obj = rdbLoadObject(type,&payload)) == NULL)) + ((obj = rdbLoadObject(type,&payload,c->argv[1])) == NULL)) { addReplyError(c,"Bad data format"); return; @@ -5203,7 +5203,7 @@ try_again: /* Emit the payload argument, that is the serialized object using * the DUMP format. */ - createDumpPayload(&payload,ov[j]); + createDumpPayload(&payload,ov[j],kv[j]); serverAssertWithInfo(c,NULL, rioWriteBulkString(&cmd,payload.io.buffer.ptr, sdslen(payload.io.buffer.ptr))); diff --git a/src/module.c b/src/module.c index e69d3dc61..e1ffd7313 100644 --- a/src/module.c +++ b/src/module.c @@ -3438,6 +3438,14 @@ RedisModuleCtx *RM_GetContextFromIO(RedisModuleIO *io) { return io->ctx; } +/* Returns a RedisModuleString with the name of the key currently saving or + * loading, when an IO data type callback is called. There is no guarantee + * that the key name is always available, so this may return NULL. + */ +const RedisModuleString *RM_GetKeyNameFromIO(RedisModuleIO *io) { + return io->key; +} + /* -------------------------------------------------------------------------- * Logging * -------------------------------------------------------------------------- */ @@ -5164,6 +5172,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(RetainString); REGISTER_API(StringCompare); REGISTER_API(GetContextFromIO); + REGISTER_API(GetKeyNameFromIO); REGISTER_API(BlockClient); REGISTER_API(UnblockClient); REGISTER_API(IsBlockedReplyRequest); diff --git a/src/rdb.c b/src/rdb.c index 52dddf210..95e4766ea 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -751,7 +751,7 @@ size_t rdbSaveStreamConsumers(rio *rdb, streamCG *cg) { /* Save a Redis object. * Returns -1 on error, number of bytes written on success. */ -ssize_t rdbSaveObject(rio *rdb, robj *o) { +ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key) { ssize_t n = 0, nwritten = 0; if (o->type == OBJ_STRING) { @@ -966,7 +966,7 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { RedisModuleIO io; moduleValue *mv = o->ptr; moduleType *mt = mv->type; - moduleInitIOContext(io,mt,rdb); + moduleInitIOContext(io,mt,rdb,key); /* Write the "module" identifier as prefix, so that we'll be able * to call the right module during loading. */ @@ -996,7 +996,7 @@ ssize_t rdbSaveObject(rio *rdb, robj *o) { * this length with very little changes to the code. In the future * we could switch to a faster solution. */ size_t rdbSavedObjectLen(robj *o) { - ssize_t len = rdbSaveObject(NULL,o); + ssize_t len = rdbSaveObject(NULL,o,NULL); serverAssertWithInfo(NULL,o,len != -1); return len; } @@ -1038,7 +1038,7 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime) { /* Save type, key, value */ if (rdbSaveObjectType(rdb,val) == -1) return -1; if (rdbSaveStringObject(rdb,key) == -1) return -1; - if (rdbSaveObject(rdb,val) == -1) return -1; + if (rdbSaveObject(rdb,val,key) == -1) return -1; return 1; } @@ -1380,7 +1380,7 @@ robj *rdbLoadCheckModuleValue(rio *rdb, char *modulename) { /* Load a Redis object of the specified type from the specified file. * On success a newly allocated object is returned, otherwise NULL. */ -robj *rdbLoadObject(int rdbtype, rio *rdb) { +robj *rdbLoadObject(int rdbtype, rio *rdb, robj *key) { robj *o = NULL, *ele, *dec; uint64_t len; unsigned int i; @@ -1767,7 +1767,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb) { exit(1); } RedisModuleIO io; - moduleInitIOContext(io,mt,rdb); + moduleInitIOContext(io,mt,rdb,key); io.ver = (rdbtype == RDB_TYPE_MODULE) ? 1 : 2; /* Call the rdb_load method of the module providing the 10 bit * encoding version in the lower 10 bits of the module ID. */ @@ -2023,7 +2023,7 @@ int rdbLoadRio(rio *rdb, rdbSaveInfo *rsi, int loading_aof) { /* Read key */ if ((key = rdbLoadStringObject(rdb)) == NULL) goto eoferr; /* Read value */ - if ((val = rdbLoadObject(type,rdb)) == NULL) goto eoferr; + if ((val = rdbLoadObject(type,rdb,key)) == NULL) goto eoferr; /* Check if the key already expired. This function is used when loading * an RDB file from disk, either at startup, or when an RDB was * received from the master. In the latter case, the master is diff --git a/src/rdb.h b/src/rdb.h index 7b9486169..0acddf9ab 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -140,9 +140,9 @@ int rdbSaveBackground(char *filename, rdbSaveInfo *rsi); int rdbSaveToSlavesSockets(rdbSaveInfo *rsi); void rdbRemoveTempFile(pid_t childpid); int rdbSave(char *filename, rdbSaveInfo *rsi); -ssize_t rdbSaveObject(rio *rdb, robj *o); +ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key); size_t rdbSavedObjectLen(robj *o); -robj *rdbLoadObject(int type, rio *rdb); +robj *rdbLoadObject(int type, rio *rdb, robj *key); void backgroundSaveDoneHandler(int exitcode, int bysignal); int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime); robj *rdbLoadStringObject(rio *rdb); diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index 8de1d8f48..ec00ee71c 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -285,7 +285,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { rdbstate.keys++; /* Read value */ rdbstate.doing = RDB_CHECK_DOING_READ_OBJECT_VALUE; - if ((val = rdbLoadObject(type,&rdb)) == NULL) goto eoferr; + if ((val = rdbLoadObject(type,&rdb,key)) == NULL) goto eoferr; /* Check if the key already expired. */ if (expiretime != -1 && expiretime < now) rdbstate.already_expired++; diff --git a/src/redismodule.h b/src/redismodule.h index 272da08df..02941aa96 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -278,6 +278,7 @@ int REDISMODULE_API_FUNC(RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, Re void REDISMODULE_API_FUNC(RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b); RedisModuleCtx *REDISMODULE_API_FUNC(RedisModule_GetContextFromIO)(RedisModuleIO *io); +const RedisModuleString *REDISMODULE_API_FUNC(RedisModule_GetKeyNameFromIO)(RedisModuleIO *io); long long REDISMODULE_API_FUNC(RedisModule_Milliseconds)(void); void REDISMODULE_API_FUNC(RedisModule_DigestAddStringBuffer)(RedisModuleDigest *md, unsigned char *ele, size_t len); void REDISMODULE_API_FUNC(RedisModule_DigestAddLongLong)(RedisModuleDigest *md, long long ele); @@ -442,6 +443,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(RetainString); REDISMODULE_GET_API(StringCompare); REDISMODULE_GET_API(GetContextFromIO); + REDISMODULE_GET_API(GetKeyNameFromIO); REDISMODULE_GET_API(Milliseconds); REDISMODULE_GET_API(DigestAddStringBuffer); REDISMODULE_GET_API(DigestAddLongLong); diff --git a/src/server.h b/src/server.h index 56c3b67d3..b888266a4 100644 --- a/src/server.h +++ b/src/server.h @@ -578,16 +578,18 @@ typedef struct RedisModuleIO { int ver; /* Module serialization version: 1 (old), * 2 (current version with opcodes annotation). */ struct RedisModuleCtx *ctx; /* Optional context, see RM_GetContextFromIO()*/ + struct redisObject *key; /* Optional name of key processed */ } RedisModuleIO; /* Macro to initialize an IO context. Note that the 'ver' field is populated * inside rdb.c according to the version of the value to load. */ -#define moduleInitIOContext(iovar,mtype,rioptr) do { \ +#define moduleInitIOContext(iovar,mtype,rioptr,keyptr) do { \ iovar.rio = rioptr; \ iovar.type = mtype; \ iovar.bytes = 0; \ iovar.error = 0; \ iovar.ver = 0; \ + iovar.key = keyptr; \ iovar.ctx = NULL; \ } while(0); From 218091d2e4eb8704d2caccc8f30d12dee9435e2f Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 16 Mar 2019 09:15:12 +0100 Subject: [PATCH 011/304] HyperLogLog: fix comment in hllCount(). --- src/hyperloglog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 1e7ce3dce..e01ea6042 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -1014,8 +1014,8 @@ uint64_t hllCount(struct hllhdr *hdr, int *invalid) { double m = HLL_REGISTERS; double E; int j; - /* Note that reghisto could be just HLL_Q+1, becuase this is the - * maximum frequency of the "000...1" sequence the hash function is + /* Note that reghisto size could be just HLL_Q+2, becuase HLL_Q+1 is + * the maximum frequency of the "000...1" sequence the hash function is * able to return. However it is slow to check for sanity of the * input: instead we history array at a safe size: overflows will * just write data to wrong, but correctly allocated, places. */ From 71d9f4e59606270fb8862803ded3f0bfdd7bb03b Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 11:15:39 +0100 Subject: [PATCH 012/304] redis-check-aof: fix potential overflow. Bug signaled by @vattezhang in PR #5940 but fixed differently. --- src/redis-check-aof.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index c4d5a225e..54ed85f0d 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -33,8 +33,8 @@ #define ERROR(...) { \ char __buf[1024]; \ - sprintf(__buf, __VA_ARGS__); \ - sprintf(error, "0x%16llx: %s", (long long)epos, __buf); \ + snprintf(__buf, sizeof(__buf), __VA_ARGS__); \ + snprintf(error, sizeof(error), "0x%16llx: %s", (long long)epos, __buf); \ } static char error[1024]; From 340d03b64bf59a6c0a79a86a51e823397d5a9b57 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 11:34:40 +0100 Subject: [PATCH 013/304] replicaofCommand() refactoring: stay into 80 cols. --- src/replication.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index 3c30999af..f2adc7995 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2053,8 +2053,11 @@ void replicaofCommand(client *c) { /* Check if we are already attached to the specified slave */ if (server.masterhost && !strcasecmp(server.masterhost,c->argv[1]->ptr) && server.masterport == port) { - serverLog(LL_NOTICE,"REPLICAOF would result into synchronization with the master we are already connected with. No operation performed."); - addReplySds(c,sdsnew("+OK Already connected to specified master\r\n")); + serverLog(LL_NOTICE,"REPLICAOF would result into synchronization " + "with the master we are already connected " + "with. No operation performed."); + addReplySds(c,sdsnew("+OK Already connected to specified " + "master\r\n")); return; } /* There was no previous master or the user specified a different one, From bb6e8ba6829eab24444d6fe6950160d05be7d96c Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Fri, 23 Feb 2018 16:19:37 +0200 Subject: [PATCH 014/304] Initial command filter experiment. --- src/module.c | 76 +++++++++++++++++++++++++++++++++++++++ src/modules/Makefile | 6 +++- src/modules/hellofilter.c | 69 +++++++++++++++++++++++++++++++++++ src/redismodule.h | 8 +++++ src/server.c | 2 ++ src/server.h | 2 +- 6 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 src/modules/hellofilter.c diff --git a/src/module.c b/src/module.c index e69d3dc61..1780342ed 100644 --- a/src/module.c +++ b/src/module.c @@ -270,6 +270,28 @@ typedef struct RedisModuleDictIter { raxIterator ri; } RedisModuleDictIter; +/* Information about the command to be executed, as passed to and from a + * filter. */ +typedef struct RedisModuleFilteredCommand { + RedisModuleString **argv; + int argc; +} RedisModuleFilteredCommand; + +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd); + +typedef struct RedisModuleCommandFilter { + /* The module that registered the filter */ + RedisModule *module; + /* Filter callback function */ + RedisModuleCommandFilterFunc callback; + /* Indicates a filter is active, avoid reentrancy */ + int active; +} RedisModuleCommandFilter; + +/* Registered filters */ +static list *moduleCommandFilters; + + /* -------------------------------------------------------------------------- * Prototypes * -------------------------------------------------------------------------- */ @@ -4770,6 +4792,56 @@ int moduleUnregisterUsedAPI(RedisModule *module) { return count; } +/* -------------------------------------------------------------------------- + * Module Command Filter API + * -------------------------------------------------------------------------- */ + +/* Register a new command filter function. Filters get executed by Redis + * before processing an inbound command and can be used to manipulate the + * behavior of standard Redis commands. Filters must not attempt to + * perform Redis commands or operate on the dataset, and must restrict + * themselves to manipulation of the arguments. + */ + +int RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { + RedisModuleCommandFilter *filter = zmalloc(sizeof(*filter)); + filter->module = ctx->module; + filter->callback = callback; + filter->active = 0; + + listAddNodeTail(moduleCommandFilters, filter); + return REDISMODULE_OK; +} + +void moduleCallCommandFilters(client *c) { + if (listLength(moduleCommandFilters) == 0) return; + + listIter li; + listNode *ln; + listRewind(moduleCommandFilters,&li); + + RedisModuleFilteredCommand cmd = { + .argv = c->argv, + .argc = c->argc + }; + + while((ln = listNext(&li))) { + RedisModuleCommandFilter *filter = ln->value; + if (filter->active) continue; + + RedisModuleCtx ctx = REDISMODULE_CTX_INIT; + ctx.module = filter->module; + + filter->active = 1; + filter->callback(&ctx, &cmd); + filter->active = 0; + moduleFreeContext(&ctx); + } + + c->argv = cmd.argv; + c->argc = cmd.argc; +} + /* -------------------------------------------------------------------------- * Modules API internals * -------------------------------------------------------------------------- */ @@ -4816,6 +4888,9 @@ void moduleInitModulesSystem(void) { moduleFreeContextReusedClient->flags |= CLIENT_MODULE; moduleFreeContextReusedClient->user = NULL; /* root user. */ + /* Set up filter list */ + moduleCommandFilters = listCreate(); + moduleRegisterCoreAPI(); if (pipe(server.module_blocked_pipe) == -1) { serverLog(LL_WARNING, @@ -5219,4 +5294,5 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(DictCompare); REGISTER_API(ExportSharedAPI); REGISTER_API(GetSharedAPI); + REGISTER_API(RegisterCommandFilter); } diff --git a/src/modules/Makefile b/src/modules/Makefile index 51ffac17d..537aa0daf 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -13,7 +13,7 @@ endif .SUFFIXES: .c .so .xo .o -all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so hellodict.so +all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so hellodict.so hellofilter.so .c.xo: $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ @@ -46,6 +46,10 @@ hellotimer.so: hellotimer.xo hellodict.xo: ../redismodule.h hellodict.so: hellodict.xo + +hellofilter.xo: ../redismodule.h + +hellofilter.so: hellofilter.xo $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc testmodule.xo: ../redismodule.h diff --git a/src/modules/hellofilter.c b/src/modules/hellofilter.c new file mode 100644 index 000000000..c9e33158f --- /dev/null +++ b/src/modules/hellofilter.c @@ -0,0 +1,69 @@ +#define REDISMODULE_EXPERIMENTAL_API +#include "../redismodule.h" + +static RedisModuleString *log_key_name; + +static const char log_command_name[] = "hellofilter.log"; + +int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +{ + RedisModuleString *s = RedisModule_CreateStringFromString(ctx, argv[0]); + + int i; + for (i = 1; i < argc; i++) { + size_t arglen; + const char *arg = RedisModule_StringPtrLen(argv[i], &arglen); + + RedisModule_StringAppendBuffer(ctx, s, " ", 1); + RedisModule_StringAppendBuffer(ctx, s, arg, arglen); + } + + RedisModuleKey *log = RedisModule_OpenKey(ctx, log_key_name, REDISMODULE_WRITE|REDISMODULE_READ); + RedisModule_ListPush(log, REDISMODULE_LIST_HEAD, s); + RedisModule_CloseKey(log); + RedisModule_FreeString(ctx, s); + + size_t cmdlen; + const char *cmdname = RedisModule_StringPtrLen(argv[1], &cmdlen); + RedisModuleCallReply *reply = RedisModule_Call(ctx, cmdname, "v", &argv[2], argc - 2); + if (reply) { + RedisModule_ReplyWithCallReply(ctx, reply); + RedisModule_FreeCallReply(reply); + } else { + RedisModule_ReplyWithSimpleString(ctx, "Unknown command or invalid arguments"); + } + return REDISMODULE_OK; +} + +void HelloFilter_CommandFilter(RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd) +{ + cmd->argv = RedisModule_Realloc(cmd->argv, (cmd->argc+1)*sizeof(RedisModuleString *)); + int i; + + for (i = cmd->argc; i > 0; i--) { + cmd->argv[i] = cmd->argv[i-1]; + } + cmd->argv[0] = RedisModule_CreateString(ctx, log_command_name, sizeof(log_command_name)-1); + cmd->argc++; +} + +int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (RedisModule_Init(ctx,"hellofilter",1,REDISMODULE_APIVER_1) + == REDISMODULE_ERR) return REDISMODULE_ERR; + + if (argc != 1) { + RedisModule_Log(ctx, "warning", "Log key name not specified"); + return REDISMODULE_ERR; + } + + log_key_name = RedisModule_CreateStringFromString(ctx, argv[0]); + + if (RedisModule_CreateCommand(ctx,log_command_name, + HelloFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter) + == REDISMODULE_ERR) return REDISMODULE_ERR; + + return REDISMODULE_OK; +} diff --git a/src/redismodule.h b/src/redismodule.h index 272da08df..54ce99d96 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -163,6 +163,12 @@ typedef void (*RedisModuleTypeFreeFunc)(void *value); typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len); typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data); +typedef struct RedisModuleFilteredCommand { + RedisModuleString **argv; + int argc; +} RedisModuleFilteredCommand; +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd); + #define REDISMODULE_TYPE_METHOD_VERSION 1 typedef struct RedisModuleTypeMethods { uint64_t version; @@ -337,6 +343,7 @@ void REDISMODULE_API_FUNC(RedisModule_SetDisconnectCallback)(RedisModuleBlockedC void REDISMODULE_API_FUNC(RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint64_t flags); int REDISMODULE_API_FUNC(RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func); void *REDISMODULE_API_FUNC(RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname); +int REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb); #endif /* This is included inline inside each Redis module. */ @@ -499,6 +506,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(SetClusterFlags); REDISMODULE_GET_API(ExportSharedAPI); REDISMODULE_GET_API(GetSharedAPI); + REDISMODULE_GET_API(RegisterCommandFilter); #endif if (RedisModule_IsModuleNameBusy && RedisModule_IsModuleNameBusy(name)) return REDISMODULE_ERR; diff --git a/src/server.c b/src/server.c index 712cda1bd..66e79dea3 100644 --- a/src/server.c +++ b/src/server.c @@ -3268,6 +3268,8 @@ void call(client *c, int flags) { * other operations can be performed by the caller. Otherwise * if C_ERR is returned the client was destroyed (i.e. after QUIT). */ int processCommand(client *c) { + moduleCallCommandFilters(c); + /* The QUIT command is handled separately. Normal command procs will * go through checking for replication and QUIT will cause trouble * when FORCE_REPLICATION is enabled and would be implemented in diff --git a/src/server.h b/src/server.h index 56c3b67d3..f55213bfc 100644 --- a/src/server.h +++ b/src/server.h @@ -1489,7 +1489,7 @@ size_t moduleCount(void); void moduleAcquireGIL(void); void moduleReleaseGIL(void); void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid); - +void moduleCallCommandFilters(client *c); /* Utils */ long long ustime(void); From a3b442eb5daece99f5ac52a167ab455d302eda43 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 15:38:43 +0100 Subject: [PATCH 015/304] MANIFESTO v2. --- MANIFESTO | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/MANIFESTO b/MANIFESTO index 2b719057e..d43a58893 100644 --- a/MANIFESTO +++ b/MANIFESTO @@ -34,7 +34,21 @@ Redis Manifesto so that the complexity is obvious and more complex operations can be performed as the sum of the basic operations. -4 - Code is like a poem; it's not just something we write to reach some +4 - We believe in code efficiency. Computers get faster and faster, yet we + believe that abusing computing capabilities is not wise: the amount of + operations you can do for a given amount of energy remains anyway a + significant parameter: it allows to do more with less computers and, at + the same time, having a smaller environmental impact. Similarly Redis is + able to "scale down" to smaller devices. It is perfectly usable in a + Raspberry Pi and other small ARM based computers. Faster code having + just the layers of abstractions that are really needed will also result, + often, in more predictable performances. We think likewise about memory + usage, one of the fundamental goals of the Redis project is to + incrementally build more and more memory efficient data structures, so that + problems that were not approachable in RAM in the past will be perfectly + fine to handle in the future. + +5 - Code is like a poem; it's not just something we write to reach some practical result. Sometimes people that are far from the Redis philosophy suggest using other code written by other authors (frequently in other languages) in order to implement something Redis currently lacks. But to us @@ -45,23 +59,44 @@ Redis Manifesto when needed. At the same time, when writing the Redis story we're trying to write smaller stories that will fit in to other code. -5 - We're against complexity. We believe designing systems is a fight against +6 - We're against complexity. We believe designing systems is a fight against complexity. We'll accept to fight the complexity when it's worthwhile but we'll try hard to recognize when a small feature is not worth 1000s of lines of code. Most of the time the best way to fight complexity is by not creating it at all. -6 - Two levels of API. The Redis API has two levels: 1) a subset of the API fits +7 - Threading is not a silver bullet. Instead of making Redis threaded we + believe on the idea of an efficient (mostly) single threaded Redis core. + Multiple of such cores, that may run in the same computer or may run + in multiple computers, are abstracted away as a single big system by + higher order protocols and features: Redis Cluster and the upcoming + Redis Proxy are our main goals. A shared nothing approach is not just + much simpler (see the previous point in this document), is also optimal + in NUMA systems. In the specific case of Redis it allows for each instance + to have a more limited amount of data, making the Redis persist-by-fork + approach more sounding. In the future we may explore parallelism only for + I/O, which is the low hanging fruit: minimal complexity could provide an + improved single process experience. + +8 - Two levels of API. The Redis API has two levels: 1) a subset of the API fits naturally into a distributed version of Redis and 2) a more complex API that supports multi-key operations. Both are useful if used judiciously but there's no way to make the more complex multi-keys API distributed in an opaque way without violating our other principles. We don't want to provide the illusion of something that will work magically when actually it can't in all cases. Instead we'll provide commands to quickly migrate keys from one - instance to another to perform multi-key operations and expose the tradeoffs - to the user. + instance to another to perform multi-key operations and expose the + trade-offs to the user. -7 - We optimize for joy. We believe writing code is a lot of hard work, and the +9 - We optimize for joy. We believe writing code is a lot of hard work, and the only way it can be worth is by enjoying it. When there is no longer joy in writing code, the best thing to do is stop. To prevent this, we'll avoid taking paths that will make Redis less of a joy to develop. + +10 - All the above points are put together in what we call opportunistic + programming: trying to get the most for the user with minimal increases + in complexity (hanging fruits). Solve 95% of the problem with 5% of the + code when it is acceptable. Avoid a fixed schedule but follow the flow of + user requests, inspiration, Redis internal readiness for certain features + (sometimes many past changes reach a critical point making a previously + complex feature very easy to obtain). From a40a075ada7246b858bda75c8dca4a05f0bcf8b8 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 18 Mar 2019 15:49:52 +0100 Subject: [PATCH 016/304] MANIFESTO: simplicity and lock-in. --- MANIFESTO | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/MANIFESTO b/MANIFESTO index d43a58893..372789462 100644 --- a/MANIFESTO +++ b/MANIFESTO @@ -63,7 +63,11 @@ Redis Manifesto complexity. We'll accept to fight the complexity when it's worthwhile but we'll try hard to recognize when a small feature is not worth 1000s of lines of code. Most of the time the best way to fight complexity is by not - creating it at all. + creating it at all. Complexity is also a form of lock-in: code that is + very hard to understand cannot be modified by users in an independent way + regardless of the license. One of the main Redis goals is to remain + understandable, enough for a single programmer to have a clear idea of how + it works in detail just reading the source code for a couple of weeks. 7 - Threading is not a silver bullet. Instead of making Redis threaded we believe on the idea of an efficient (mostly) single threaded Redis core. From bc47c987d68ade4f481122cfd6cec639dbba91ad Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 18 Mar 2019 18:36:46 +0200 Subject: [PATCH 017/304] Add command filtering argument handling API. --- src/module.c | 81 +++++++++++++++++++++++++++++++++++++++ src/modules/hellofilter.c | 46 ++++++++++++++++++---- src/redismodule.h | 18 ++++++--- 3 files changed, 132 insertions(+), 13 deletions(-) diff --git a/src/module.c b/src/module.c index 1780342ed..741c546b7 100644 --- a/src/module.c +++ b/src/module.c @@ -291,6 +291,10 @@ typedef struct RedisModuleCommandFilter { /* Registered filters */ static list *moduleCommandFilters; +typedef struct RedisModuleCommandFilterCtx { + RedisModuleString **argv; + int argc; +} RedisModuleCommandFilterCtx; /* -------------------------------------------------------------------------- * Prototypes @@ -4842,6 +4846,78 @@ void moduleCallCommandFilters(client *c) { c->argc = cmd.argc; } +/* Return the number of arguments a filtered command has. The number of + * arguments include the command itself. + */ +int RM_CommandFilterArgsCount(RedisModuleCommandFilterCtx *filter) +{ + return filter->argc; +} + +/* Return the specified command argument. The first argument (position 0) is + * the command itself, and the rest are user-provided args. + */ +const RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *filter, int pos) +{ + if (pos < 0 || pos >= filter->argc) return NULL; + return filter->argv[pos]; +} + +/* Modify the filtered command by inserting a new argument at the specified + * position. The specified RedisModuleString argument may be used by Redis + * after the filter context is destroyed, so it must not be auto-memory + * allocated, freed or used elsewhere. + */ + +int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg) +{ + int i; + + if (pos < 0 || pos > filter->argc) return REDISMODULE_ERR; + + filter->argv = zrealloc(filter->argv, (filter->argc+1)*sizeof(RedisModuleString *)); + for (i = filter->argc; i > pos; i--) { + filter->argv[i] = filter->argv[i-1]; + } + filter->argv[pos] = arg; + filter->argc++; + + return REDISMODULE_OK; +} + +/* Modify the filtered command by replacing an existing argument with a new one. + * The specified RedisModuleString argument may be used by Redis after the + * filter context is destroyed, so it must not be auto-memory allocated, freed + * or used elsewhere. + */ + +int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg) +{ + if (pos < 0 || pos >= filter->argc) return REDISMODULE_ERR; + + decrRefCount(filter->argv[pos]); + filter->argv[pos] = arg; + + return REDISMODULE_OK; +} + +/* Modify the filtered command by deleting an argument at the specified + * position. + */ +int RM_CommandFilterArgDelete(RedisModuleCommandFilterCtx *filter, int pos) +{ + int i; + if (pos < 0 || pos >= filter->argc) return REDISMODULE_ERR; + + decrRefCount(filter->argv[pos]); + for (i = pos; i < filter->argc-1; i++) { + filter->argv[i] = filter->argv[i+1]; + } + filter->argc--; + + return REDISMODULE_OK; +} + /* -------------------------------------------------------------------------- * Modules API internals * -------------------------------------------------------------------------- */ @@ -5295,4 +5371,9 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(ExportSharedAPI); REGISTER_API(GetSharedAPI); REGISTER_API(RegisterCommandFilter); + REGISTER_API(CommandFilterArgsCount); + REGISTER_API(CommandFilterArgGet); + REGISTER_API(CommandFilterArgInsert); + REGISTER_API(CommandFilterArgReplace); + REGISTER_API(CommandFilterArgDelete); } diff --git a/src/modules/hellofilter.c b/src/modules/hellofilter.c index c9e33158f..84eb02c30 100644 --- a/src/modules/hellofilter.c +++ b/src/modules/hellofilter.c @@ -1,6 +1,8 @@ #define REDISMODULE_EXPERIMENTAL_API #include "../redismodule.h" +#include + static RedisModuleString *log_key_name; static const char log_command_name[] = "hellofilter.log"; @@ -35,16 +37,46 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar return REDISMODULE_OK; } -void HelloFilter_CommandFilter(RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd) +void HelloFilter_CommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterCtx *filter) { - cmd->argv = RedisModule_Realloc(cmd->argv, (cmd->argc+1)*sizeof(RedisModuleString *)); - int i; + (void) ctx; - for (i = cmd->argc; i > 0; i--) { - cmd->argv[i] = cmd->argv[i-1]; + /* Fun manipulations: + * - Remove @delme + * - Replace @replaceme + * - Append @insertbefore or @insertafter + * - Prefix with Log command if @log encounterd + */ + int log = 0; + int pos = 0; + while (pos < RedisModule_CommandFilterArgsCount(filter)) { + const RedisModuleString *arg = RedisModule_CommandFilterArgGet(filter, pos); + size_t arg_len; + const char *arg_str = RedisModule_StringPtrLen(arg, &arg_len); + + if (arg_len == 6 && !memcmp(arg_str, "@delme", 6)) { + RedisModule_CommandFilterArgDelete(filter, pos); + continue; + } + if (arg_len == 10 && !memcmp(arg_str, "@replaceme", 10)) { + RedisModule_CommandFilterArgReplace(filter, pos, + RedisModule_CreateString(NULL, "--replaced--", 12)); + } else if (arg_len == 13 && !memcmp(arg_str, "@insertbefore", 13)) { + RedisModule_CommandFilterArgInsert(filter, pos, + RedisModule_CreateString(NULL, "--inserted-before--", 19)); + pos++; + } else if (arg_len == 12 && !memcmp(arg_str, "@insertafter", 12)) { + RedisModule_CommandFilterArgInsert(filter, pos + 1, + RedisModule_CreateString(NULL, "--inserted-after--", 18)); + pos++; + } else if (arg_len == 4 && !memcmp(arg_str, "@log", 4)) { + log = 1; + } + pos++; } - cmd->argv[0] = RedisModule_CreateString(ctx, log_command_name, sizeof(log_command_name)-1); - cmd->argc++; + + if (log) RedisModule_CommandFilterArgInsert(filter, 0, + RedisModule_CreateString(NULL, log_command_name, sizeof(log_command_name)-1)); } int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { diff --git a/src/redismodule.h b/src/redismodule.h index 54ce99d96..426a6df69 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -150,6 +150,7 @@ typedef struct RedisModuleBlockedClient RedisModuleBlockedClient; typedef struct RedisModuleClusterInfo RedisModuleClusterInfo; typedef struct RedisModuleDict RedisModuleDict; typedef struct RedisModuleDictIter RedisModuleDictIter; +typedef struct RedisModuleCommandFilterCtx RedisModuleCommandFilterCtx; typedef int (*RedisModuleCmdFunc)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); typedef void (*RedisModuleDisconnectFunc)(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc); @@ -162,12 +163,7 @@ typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value typedef void (*RedisModuleTypeFreeFunc)(void *value); typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len); typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data); - -typedef struct RedisModuleFilteredCommand { - RedisModuleString **argv; - int argc; -} RedisModuleFilteredCommand; -typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd); +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleCommandFilterCtx *filter); #define REDISMODULE_TYPE_METHOD_VERSION 1 typedef struct RedisModuleTypeMethods { @@ -344,6 +340,11 @@ void REDISMODULE_API_FUNC(RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint int REDISMODULE_API_FUNC(RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func); void *REDISMODULE_API_FUNC(RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname); int REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgsCount)(RedisModuleCommandFilterCtx *filter); +const RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CommandFilterArgGet)(RedisModuleCommandFilterCtx *filter, int pos); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgInsert)(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgReplace)(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgDelete)(RedisModuleCommandFilterCtx *filter, int pos); #endif /* This is included inline inside each Redis module. */ @@ -507,6 +508,11 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(ExportSharedAPI); REDISMODULE_GET_API(GetSharedAPI); REDISMODULE_GET_API(RegisterCommandFilter); + REDISMODULE_GET_API(CommandFilterArgsCount); + REDISMODULE_GET_API(CommandFilterArgGet); + REDISMODULE_GET_API(CommandFilterArgInsert); + REDISMODULE_GET_API(CommandFilterArgReplace); + REDISMODULE_GET_API(CommandFilterArgDelete); #endif if (RedisModule_IsModuleNameBusy && RedisModule_IsModuleNameBusy(name)) return REDISMODULE_ERR; From 95881cec60b54ab00b68db306a9b0078aefd6036 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 18 Mar 2019 19:34:52 +0200 Subject: [PATCH 018/304] Add command filter Module API tests. --- tests/modules/commandfilter.tcl | 27 +++++++++++++++++++++++++++ tests/test_helper.tcl | 1 + 2 files changed, 28 insertions(+) create mode 100644 tests/modules/commandfilter.tcl diff --git a/tests/modules/commandfilter.tcl b/tests/modules/commandfilter.tcl new file mode 100644 index 000000000..f0d96b259 --- /dev/null +++ b/tests/modules/commandfilter.tcl @@ -0,0 +1,27 @@ +set testmodule [file normalize src/modules/hellofilter.so] + +start_server {tags {"modules"}} { + r module load $testmodule log-key + + test {Command Filter handles redirected commands} { + r set mykey @log + r lrange log-key 0 -1 + } "{hellofilter.log set mykey @log}" + + test {Command Filter can call RedisModule_CommandFilterArgDelete} { + r rpush mylist elem1 @delme elem2 + r lrange mylist 0 -1 + } {elem1 elem2} + + test {Command Filter can call RedisModule_CommandFilterArgInsert} { + r del mylist + r rpush mylist elem1 @insertbefore elem2 @insertafter elem3 + r lrange mylist 0 -1 + } {elem1 --inserted-before-- @insertbefore elem2 @insertafter --inserted-after-- elem3} + + test {Command Filter can call RedisModule_CommandFilterArgReplace} { + r del mylist + r rpush mylist elem1 @replaceme elem2 + r lrange mylist 0 -1 + } {elem1 --replaced-- elem2} +} diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 568eacdee..d2f281526 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -63,6 +63,7 @@ set ::all_tests { unit/lazyfree unit/wait unit/pendingquerybuf + modules/commandfilter } # Index to the next test to run in the ::all_tests list. set ::next_test 0 From fdacd1b0b5a3a9ad1bd3ba5c1e93c51fa31b74a9 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 18 Mar 2019 23:05:52 +0200 Subject: [PATCH 019/304] CommandFilter API: More cleanup. --- src/module.c | 37 +++++++++---------------------------- src/redismodule.h | 2 +- 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/src/module.c b/src/module.c index 741c546b7..c6cb8a0ca 100644 --- a/src/module.c +++ b/src/module.c @@ -270,32 +270,23 @@ typedef struct RedisModuleDictIter { raxIterator ri; } RedisModuleDictIter; -/* Information about the command to be executed, as passed to and from a - * filter. */ -typedef struct RedisModuleFilteredCommand { +typedef struct RedisModuleCommandFilterCtx { RedisModuleString **argv; int argc; -} RedisModuleFilteredCommand; +} RedisModuleCommandFilterCtx; -typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleFilteredCommand *cmd); +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCommandFilterCtx *filter); typedef struct RedisModuleCommandFilter { /* The module that registered the filter */ RedisModule *module; /* Filter callback function */ RedisModuleCommandFilterFunc callback; - /* Indicates a filter is active, avoid reentrancy */ - int active; } RedisModuleCommandFilter; /* Registered filters */ static list *moduleCommandFilters; -typedef struct RedisModuleCommandFilterCtx { - RedisModuleString **argv; - int argc; -} RedisModuleCommandFilterCtx; - /* -------------------------------------------------------------------------- * Prototypes * -------------------------------------------------------------------------- */ @@ -4802,16 +4793,13 @@ int moduleUnregisterUsedAPI(RedisModule *module) { /* Register a new command filter function. Filters get executed by Redis * before processing an inbound command and can be used to manipulate the - * behavior of standard Redis commands. Filters must not attempt to - * perform Redis commands or operate on the dataset, and must restrict - * themselves to manipulation of the arguments. + * behavior of standard Redis commands. */ int RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { RedisModuleCommandFilter *filter = zmalloc(sizeof(*filter)); filter->module = ctx->module; filter->callback = callback; - filter->active = 0; listAddNodeTail(moduleCommandFilters, filter); return REDISMODULE_OK; @@ -4824,26 +4812,19 @@ void moduleCallCommandFilters(client *c) { listNode *ln; listRewind(moduleCommandFilters,&li); - RedisModuleFilteredCommand cmd = { + RedisModuleCommandFilterCtx filter = { .argv = c->argv, .argc = c->argc }; while((ln = listNext(&li))) { - RedisModuleCommandFilter *filter = ln->value; - if (filter->active) continue; + RedisModuleCommandFilter *f = ln->value; - RedisModuleCtx ctx = REDISMODULE_CTX_INIT; - ctx.module = filter->module; - - filter->active = 1; - filter->callback(&ctx, &cmd); - filter->active = 0; - moduleFreeContext(&ctx); + f->callback(&filter); } - c->argv = cmd.argv; - c->argc = cmd.argc; + c->argv = filter.argv; + c->argc = filter.argc; } /* Return the number of arguments a filtered command has. The number of diff --git a/src/redismodule.h b/src/redismodule.h index 426a6df69..5df83ae6a 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -163,7 +163,7 @@ typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value typedef void (*RedisModuleTypeFreeFunc)(void *value); typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len); typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data); -typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCtx *ctx, RedisModuleCommandFilterCtx *filter); +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCommandFilterCtx *filter); #define REDISMODULE_TYPE_METHOD_VERSION 1 typedef struct RedisModuleTypeMethods { From 5bd8aae664437cace61a422434978d8bb3740ed1 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 18 Mar 2019 23:06:38 +0200 Subject: [PATCH 020/304] CommandFilter API: Support Lua and RM_call() flows. --- src/module.c | 20 +++++++++++++------- src/scripting.c | 5 +++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/module.c b/src/module.c index c6cb8a0ca..17accfb70 100644 --- a/src/module.c +++ b/src/module.c @@ -2741,12 +2741,6 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch RedisModuleCallReply *reply = NULL; int replicate = 0; /* Replicate this command? */ - cmd = lookupCommandByCString((char*)cmdname); - if (!cmd) { - errno = EINVAL; - return NULL; - } - /* Create the client and dispatch the command. */ va_start(ap, fmt); c = createClient(-1); @@ -2760,11 +2754,23 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch c->db = ctx->client->db; c->argv = argv; c->argc = argc; - c->cmd = c->lastcmd = cmd; /* We handle the above format error only when the client is setup so that * we can free it normally. */ if (argv == NULL) goto cleanup; + /* Call command filters */ + moduleCallCommandFilters(c); + + /* Lookup command now, after filters had a chance to make modifications + * if necessary. + */ + cmd = lookupCommand(c->argv[0]->ptr); + if (!cmd) { + errno = EINVAL; + goto cleanup; + } + c->cmd = c->lastcmd = cmd; + /* Basic arity checks. */ if ((cmd->arity > 0 && cmd->arity != argc) || (argc < -cmd->arity)) { errno = EINVAL; diff --git a/src/scripting.c b/src/scripting.c index cbbf43fb1..032bfdf10 100644 --- a/src/scripting.c +++ b/src/scripting.c @@ -462,6 +462,11 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) { c->argc = argc; c->user = server.lua_caller->user; + /* Process module hooks */ + moduleCallCommandFilters(c); + argv = c->argv; + argc = c->argc; + /* Log the command if debugging is active. */ if (ldb.active && ldb.step) { sds cmdlog = sdsnew(""); From 06a6d70ab59af885c4cbfffdc023a1ad05bfa5df Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Mon, 18 Mar 2019 23:07:28 +0200 Subject: [PATCH 021/304] CommandFilter API: hellofilter and tests. --- src/modules/hellofilter.c | 32 ++++++++++++++++++++++++++++---- tests/modules/commandfilter.tcl | 20 +++++++++++++++++++- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/modules/hellofilter.c b/src/modules/hellofilter.c index 84eb02c30..d5dd405aa 100644 --- a/src/modules/hellofilter.c +++ b/src/modules/hellofilter.c @@ -6,17 +6,32 @@ static RedisModuleString *log_key_name; static const char log_command_name[] = "hellofilter.log"; +static const char ping_command_name[] = "hellofilter.ping"; +static int in_module = 0; + +int HelloFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +{ + RedisModuleCallReply *reply = RedisModule_Call(ctx, "ping", "c", "@log"); + if (reply) { + RedisModule_ReplyWithCallReply(ctx, reply); + RedisModule_FreeCallReply(reply); + } else { + RedisModule_ReplyWithSimpleString(ctx, "Unknown command or invalid arguments"); + } + + return REDISMODULE_OK; +} int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { - RedisModuleString *s = RedisModule_CreateStringFromString(ctx, argv[0]); + RedisModuleString *s = RedisModule_CreateString(ctx, "", 0); int i; for (i = 1; i < argc; i++) { size_t arglen; const char *arg = RedisModule_StringPtrLen(argv[i], &arglen); - RedisModule_StringAppendBuffer(ctx, s, " ", 1); + if (i > 1) RedisModule_StringAppendBuffer(ctx, s, " ", 1); RedisModule_StringAppendBuffer(ctx, s, arg, arglen); } @@ -25,6 +40,8 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar RedisModule_CloseKey(log); RedisModule_FreeString(ctx, s); + in_module = 1; + size_t cmdlen; const char *cmdname = RedisModule_StringPtrLen(argv[1], &cmdlen); RedisModuleCallReply *reply = RedisModule_Call(ctx, cmdname, "v", &argv[2], argc - 2); @@ -34,12 +51,15 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar } else { RedisModule_ReplyWithSimpleString(ctx, "Unknown command or invalid arguments"); } + + in_module = 0; + return REDISMODULE_OK; } -void HelloFilter_CommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterCtx *filter) +void HelloFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) { - (void) ctx; + if (in_module) return; /* don't process our own RM_Call() */ /* Fun manipulations: * - Remove @delme @@ -94,6 +114,10 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) HelloFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,ping_command_name, + HelloFilter_PingCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + if (RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter) == REDISMODULE_ERR) return REDISMODULE_ERR; diff --git a/tests/modules/commandfilter.tcl b/tests/modules/commandfilter.tcl index f0d96b259..47d9c302c 100644 --- a/tests/modules/commandfilter.tcl +++ b/tests/modules/commandfilter.tcl @@ -6,7 +6,7 @@ start_server {tags {"modules"}} { test {Command Filter handles redirected commands} { r set mykey @log r lrange log-key 0 -1 - } "{hellofilter.log set mykey @log}" + } "{set mykey @log}" test {Command Filter can call RedisModule_CommandFilterArgDelete} { r rpush mylist elem1 @delme elem2 @@ -24,4 +24,22 @@ start_server {tags {"modules"}} { r rpush mylist elem1 @replaceme elem2 r lrange mylist 0 -1 } {elem1 --replaced-- elem2} + + test {Command Filter applies on RM_Call() commands} { + r del log-key + r hellofilter.ping + r lrange log-key 0 -1 + } "{ping @log}" + + test {Command Filter applies on Lua redis.call()} { + r del log-key + r eval "redis.call('ping', '@log')" 0 + r lrange log-key 0 -1 + } "{ping @log}" + + test {Command Filter applies on Lua redis.call() that calls a module} { + r del log-key + r eval "redis.call('hellofilter.ping')" 0 + r lrange log-key 0 -1 + } "{ping @log}" } From 278c7a6b6d679dfda65a35c098e1fd763a974b86 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Tue, 19 Mar 2019 13:11:37 +0200 Subject: [PATCH 022/304] Added keyspace miss notifications support --- src/db.c | 7 ++++++- src/modules/testmodule.c | 29 ++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/src/db.c b/src/db.c index 7950d5074..8c9047084 100644 --- a/src/db.c +++ b/src/db.c @@ -83,6 +83,7 @@ robj *lookupKey(redisDb *db, robj *key, int flags) { * 1. A key gets expired if it reached it's TTL. * 2. The key last access time is updated. * 3. The global keys hits/misses stats are updated (reported in INFO). + * 4. If keyspace notifications are enabled, a "miss" notification is fired. * * This API should not be used when we write to the key after obtaining * the object linked to the key, but only for read only operations. @@ -106,6 +107,7 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { * to return NULL ASAP. */ if (server.masterhost == NULL) { server.stat_keyspace_misses++; + notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); return NULL; } @@ -127,12 +129,15 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { server.current_client->cmd->flags & CMD_READONLY) { server.stat_keyspace_misses++; + notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); return NULL; } } val = lookupKey(db,key,flags); - if (val == NULL) + if (val == NULL) { server.stat_keyspace_misses++; + notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); + } else server.stat_keyspace_hits++; return val; diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index 67a861704..826dd9a7e 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -109,9 +109,9 @@ int TestStringPrintf(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { if (argc < 3) { return RedisModule_WrongArity(ctx); } - RedisModuleString *s = RedisModule_CreateStringPrintf(ctx, - "Got %d args. argv[1]: %s, argv[2]: %s", - argc, + RedisModuleString *s = RedisModule_CreateStringPrintf(ctx, + "Got %d args. argv[1]: %s, argv[2]: %s", + argc, RedisModule_StringPtrLen(argv[1], NULL), RedisModule_StringPtrLen(argv[2], NULL) ); @@ -133,7 +133,7 @@ int TestUnlink(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModuleKey *k = RedisModule_OpenKey(ctx, RedisModule_CreateStringPrintf(ctx, "unlinked"), REDISMODULE_WRITE | REDISMODULE_READ); if (!k) return failTest(ctx, "Could not create key"); - + if (REDISMODULE_ERR == RedisModule_StringSet(k, RedisModule_CreateStringPrintf(ctx, "Foobar"))) { return failTest(ctx, "Could not set string value"); } @@ -152,7 +152,7 @@ int TestUnlink(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { return failTest(ctx, "Could not verify key to be unlinked"); } return RedisModule_ReplyWithSimpleString(ctx, "OK"); - + } int NotifyCallback(RedisModuleCtx *ctx, int type, const char *event, @@ -188,6 +188,10 @@ int TestNotifications(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModule_Call(ctx, "LPUSH", "cc", "l", "y"); RedisModule_Call(ctx, "LPUSH", "cc", "l", "y"); + /* Miss some keys intentionally so we will get a "miss" notification. */ + RedisModule_Call(ctx, "GET", "c", "nosuchkey"); + RedisModule_Call(ctx, "SMEMBERS", "c", "nosuchkey"); + size_t sz; const char *rep; RedisModuleCallReply *r = RedisModule_Call(ctx, "HGET", "cc", "notifications", "foo"); @@ -225,6 +229,16 @@ int TestNotifications(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { FAIL("Wrong reply for l"); } + r = RedisModule_Call(ctx, "HGET", "cc", "notifications", "nosuchkey"); + if (r == NULL || RedisModule_CallReplyType(r) != REDISMODULE_REPLY_STRING) { + FAIL("Wrong or no reply for nosuchkey"); + } else { + rep = RedisModule_CallReplyStringPtr(r, &sz); + if (sz != 1 || *rep != '2') { + FAIL("Got reply '%.*s'. expected '2'", sz, rep); + } + } + RedisModule_Call(ctx, "FLUSHDB", ""); return RedisModule_ReplyWithSimpleString(ctx, "OK"); @@ -423,7 +437,7 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) if (RedisModule_CreateCommand(ctx,"test.ctxflags", TestCtxFlags,"readonly",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; - + if (RedisModule_CreateCommand(ctx,"test.unlink", TestUnlink,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; @@ -435,7 +449,8 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) RedisModule_SubscribeToKeyspaceEvents(ctx, REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_SET | - REDISMODULE_NOTIFY_STRING, + REDISMODULE_NOTIFY_STRING | + REDISMODULE_NOTIFY_GENERIC, NotifyCallback); if (RedisModule_CreateCommand(ctx,"test.notify", TestNotifications,"write deny-oom",1,1,1) == REDISMODULE_ERR) From 1da0d9b04cded3f65ca7c6aecbee07c02d118696 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Tue, 19 Mar 2019 19:48:47 +0200 Subject: [PATCH 023/304] CommandFilter API: Extend documentation. --- src/module.c | 48 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/src/module.c b/src/module.c index 17accfb70..ee2840225 100644 --- a/src/module.c +++ b/src/module.c @@ -4797,9 +4797,47 @@ int moduleUnregisterUsedAPI(RedisModule *module) { * Module Command Filter API * -------------------------------------------------------------------------- */ -/* Register a new command filter function. Filters get executed by Redis - * before processing an inbound command and can be used to manipulate the - * behavior of standard Redis commands. +/* Register a new command filter function. + * + * Command filtering makes it possible for modules to extend Redis by plugging + * into the execution flow of all commands. + * + * A registered filter gets called before Redis executes *any* command. This + * includes both core Redis commands and commands registered by any module. The + * filter applies in all execution paths including: + * + * 1. Invocation by a client. + * 2. Invocation through `RedisModule_Call()` by any module. + * 3. Invocation through Lua 'redis.call()`. + * 4. Replication of a command from a master. + * + * The filter executes in a special filter context, which is different and more + * limited than a RedisModuleCtx. Because the filter affects any command, it + * must be implemented in a very efficient way to reduce the performance impact + * on Redis. All Redis Module API calls that require a valid context (such as + * `RedisModule_Call()`, `RedisModule_OpenKey()`, etc.) are not supported in a + * filter context. + * + * The `RedisModuleCommandFilterCtx` can be used to inspect or modify the + * executed command and its arguments. As the filter executes before Redis + * begins processing the command, any change will affect the way the command is + * processed. For example, a module can override Redis commands this way: + * + * 1. Register a `MODULE.SET` command which implements an extended version of + * the Redis `SET` command. + * 2. Register a command filter which detects invocation of `SET` on a specific + * pattern of keys. Once detected, the filter will replace the first + * argument from `SET` to `MODULE.SET`. + * 3. When filter execution is complete, Redis considers the new command name + * and therefore executes the module's own command. + * + * Note that in the above use case, if `MODULE.SET` itself uses + * `RedisModule_Call()` the filter will be applied on that call as well. If + * that is not desired, the module itself is responsible for maintaining a flag + * to identify and avoid this form of re-entrancy. + * + * If multiple filters are registered (by the same or different modules), they + * are executed in the order of registration. */ int RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { @@ -4881,7 +4919,7 @@ int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *filter, int pos, Redi int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg) { if (pos < 0 || pos >= filter->argc) return REDISMODULE_ERR; - + decrRefCount(filter->argv[pos]); filter->argv[pos] = arg; @@ -4901,7 +4939,7 @@ int RM_CommandFilterArgDelete(RedisModuleCommandFilterCtx *filter, int pos) filter->argv[i] = filter->argv[i+1]; } filter->argc--; - + return REDISMODULE_OK; } From 61501773c977fc4e486669304705ebd4e73ac716 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 27 Sep 2018 18:12:31 +0300 Subject: [PATCH 024/304] getKeysFromCommand for TOUCH only extracted the first key. also, airty for COMMAND command was wrong. --- src/server.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server.c b/src/server.c index 712cda1bd..1341ab405 100644 --- a/src/server.c +++ b/src/server.c @@ -715,7 +715,7 @@ struct redisCommand redisCommandTable[] = { {"touch",touchCommand,-2, "read-only fast @keyspace", - 0,NULL,1,1,1,0,0,0}, + 0,NULL,1,-1,1,0,0,0}, {"pttl",pttlCommand,2, "read-only fast random @keyspace", @@ -863,7 +863,7 @@ struct redisCommand redisCommandTable[] = { "no-script @keyspace", 0,NULL,0,0,0,0,0,0}, - {"command",commandCommand,0, + {"command",commandCommand,-1, "ok-loading ok-stale random @connection", 0,NULL,0,0,0,0,0,0}, From 9b2a42636f49b484f7ec62fccb8e6ed820f86e42 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 27 Sep 2018 18:03:47 +0300 Subject: [PATCH 025/304] change SORT and SPOP to use lookupKeyWrite rather than lookupKeyRead like in SUNIONSTORE etc, commands that perform writes are expected to open all keys, even input keys, with lookupKeyWrite --- src/sort.c | 55 ++++++++++++++++++++++++++++++----------------------- src/t_set.c | 2 +- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/sort.c b/src/sort.c index 8608cd8b3..db26da158 100644 --- a/src/sort.c +++ b/src/sort.c @@ -58,7 +58,7 @@ redisSortOperation *createSortOperation(int type, robj *pattern) { * * The returned object will always have its refcount increased by 1 * when it is non-NULL. */ -robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) { +robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst, int writeflag) { char *p, *f, *k; sds spat, ssub; robj *keyobj, *fieldobj = NULL, *o; @@ -106,7 +106,10 @@ robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) { decrRefCount(subst); /* Incremented by decodeObject() */ /* Lookup substituted key */ - o = lookupKeyRead(db,keyobj); + if (!writeflag) + o = lookupKeyRead(db,keyobj); + else + o = lookupKeyWrite(db,keyobj); if (o == NULL) goto noobj; if (fieldobj) { @@ -198,30 +201,12 @@ void sortCommand(client *c) { robj *sortval, *sortby = NULL, *storekey = NULL; redisSortObject *vector; /* Resulting vector to sort */ - /* Lookup the key to sort. It must be of the right types */ - sortval = lookupKeyRead(c->db,c->argv[1]); - if (sortval && sortval->type != OBJ_SET && - sortval->type != OBJ_LIST && - sortval->type != OBJ_ZSET) - { - addReply(c,shared.wrongtypeerr); - return; - } - /* Create a list of operations to perform for every sorted element. * Operations can be GET */ operations = listCreate(); listSetFreeMethod(operations,zfree); j = 2; /* options start at argv[2] */ - /* Now we need to protect sortval incrementing its count, in the future - * SORT may have options able to overwrite/delete keys during the sorting - * and the sorted key itself may get destroyed */ - if (sortval) - incrRefCount(sortval); - else - sortval = createQuicklistObject(); - /* The SORT command has an SQL-alike syntax, parse it */ while(j < c->argc) { int leftargs = c->argc-j-1; @@ -280,11 +265,33 @@ void sortCommand(client *c) { /* Handle syntax errors set during options parsing. */ if (syntax_error) { - decrRefCount(sortval); listRelease(operations); return; } + /* Lookup the key to sort. It must be of the right types */ + if (storekey) + sortval = lookupKeyRead(c->db,c->argv[1]); + else + sortval = lookupKeyWrite(c->db,c->argv[1]); + if (sortval && sortval->type != OBJ_SET && + sortval->type != OBJ_LIST && + sortval->type != OBJ_ZSET) + { + listRelease(operations); + addReply(c,shared.wrongtypeerr); + return; + } + + /* Now we need to protect sortval incrementing its count, in the future + * SORT may have options able to overwrite/delete keys during the sorting + * and the sorted key itself may get destroyed */ + if (sortval) + incrRefCount(sortval); + else + sortval = createQuicklistObject(); + + /* When sorting a set with no sort specified, we must sort the output * so the result is consistent across scripting and replication. * @@ -452,7 +459,7 @@ void sortCommand(client *c) { robj *byval; if (sortby) { /* lookup value to sort by */ - byval = lookupKeyByPattern(c->db,sortby,vector[j].obj); + byval = lookupKeyByPattern(c->db,sortby,vector[j].obj,storekey!=NULL); if (!byval) continue; } else { /* use object itself to sort by */ @@ -515,7 +522,7 @@ void sortCommand(client *c) { while((ln = listNext(&li))) { redisSortOperation *sop = ln->value; robj *val = lookupKeyByPattern(c->db,sop->pattern, - vector[j].obj); + vector[j].obj,storekey!=NULL); if (sop->type == SORT_OP_GET) { if (!val) { @@ -545,7 +552,7 @@ void sortCommand(client *c) { while((ln = listNext(&li))) { redisSortOperation *sop = ln->value; robj *val = lookupKeyByPattern(c->db,sop->pattern, - vector[j].obj); + vector[j].obj,storekey!=NULL); if (sop->type == SORT_OP_GET) { if (!val) val = createStringObject("",0); diff --git a/src/t_set.c b/src/t_set.c index cbe55aaa4..05d9ee243 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -415,7 +415,7 @@ void spopWithCountCommand(client *c) { /* Make sure a key with the name inputted exists, and that it's type is * indeed a set. Otherwise, return nil */ - if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) + if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.null[c->resp])) == NULL || checkType(c,set,OBJ_SET)) return; /* If count is zero, serve an empty multibulk ASAP to avoid special From 4355e2974955c5d39b47df9c9f2c1e81b09fc9b9 Mon Sep 17 00:00:00 2001 From: oranagra Date: Thu, 23 Feb 2017 03:13:44 -0800 Subject: [PATCH 026/304] bugfix to restartAOF, exit will never happen since retry will get negative. also reduce an excess sleep --- src/replication.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/replication.c b/src/replication.c index f2adc7995..59e42e561 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1091,12 +1091,13 @@ void replicationCreateMasterClient(int fd, int dbid) { } void restartAOF() { - int retry = 10; - while (retry-- && startAppendOnly() == C_ERR) { + unsigned int tries, max_tries = 10; + for (tries = 0; tries < max_tries; ++tries) { + if (tries) sleep(1); + if (startAppendOnly() == C_OK) break; serverLog(LL_WARNING,"Failed enabling the AOF after successful master synchronization! Trying it again in one second."); - sleep(1); } - if (!retry) { + if (tries == max_tries) { serverLog(LL_WARNING,"FATAL: this replica instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now."); exit(1); } From 544b9b08265fbb1696ec4a27a4c09035cd099a94 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Wed, 20 Mar 2019 17:46:19 +0200 Subject: [PATCH 027/304] diskless replication - notify slave when rdb transfer failed in diskless replication - master was not notifing the slave that rdb transfer terminated on error, and lets slave wait for replication timeout --- src/replication.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/replication.c b/src/replication.c index f2adc7995..8f0d67914 100644 --- a/src/replication.c +++ b/src/replication.c @@ -593,6 +593,7 @@ int startBgsaveForReplication(int mincapa) { client *slave = ln->value; if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) { + slave->replstate = REPL_STATE_NONE; slave->flags &= ~CLIENT_SLAVE; listDelNode(server.slaves,ln); addReplyError(slave, From 50befc42ad1523942b82af81e6722b66744825f9 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Thu, 21 Mar 2019 11:47:14 +0200 Subject: [PATCH 028/304] added special flag for keyspace miss notifications --- src/db.c | 6 +++--- src/modules/testmodule.c | 2 +- src/notify.c | 6 ++++-- src/redismodule.h | 1 + src/server.h | 4 +++- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/db.c b/src/db.c index 8c9047084..afe181281 100644 --- a/src/db.c +++ b/src/db.c @@ -107,7 +107,7 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { * to return NULL ASAP. */ if (server.masterhost == NULL) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); return NULL; } @@ -129,14 +129,14 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { server.current_client->cmd->flags & CMD_READONLY) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); return NULL; } } val = lookupKey(db,key,flags); if (val == NULL) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_GENERIC, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); } else server.stat_keyspace_hits++; diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index 826dd9a7e..af78d21d7 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -450,7 +450,7 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_STRING | - REDISMODULE_NOTIFY_GENERIC, + REDISMODULE_NOTIFY_KEY_MISS, NotifyCallback); if (RedisModule_CreateCommand(ctx,"test.notify", TestNotifications,"write deny-oom",1,1,1) == REDISMODULE_ERR) diff --git a/src/notify.c b/src/notify.c index 1afb36fc0..d6c3ad403 100644 --- a/src/notify.c +++ b/src/notify.c @@ -55,6 +55,7 @@ int keyspaceEventsStringToFlags(char *classes) { case 'K': flags |= NOTIFY_KEYSPACE; break; case 'E': flags |= NOTIFY_KEYEVENT; break; case 't': flags |= NOTIFY_STREAM; break; + case 'm': flags |= NOTIFY_KEY_MISS; break; default: return -1; } } @@ -81,6 +82,7 @@ sds keyspaceEventsFlagsToString(int flags) { if (flags & NOTIFY_EXPIRED) res = sdscatlen(res,"x",1); if (flags & NOTIFY_EVICTED) res = sdscatlen(res,"e",1); if (flags & NOTIFY_STREAM) res = sdscatlen(res,"t",1); + if (flags & NOTIFY_KEY_MISS) res = sdscatlen(res,"m",1); } if (flags & NOTIFY_KEYSPACE) res = sdscatlen(res,"K",1); if (flags & NOTIFY_KEYEVENT) res = sdscatlen(res,"E",1); @@ -100,12 +102,12 @@ void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid) { int len = -1; char buf[24]; - /* If any modules are interested in events, notify the module system now. + /* If any modules are interested in events, notify the module system now. * This bypasses the notifications configuration, but the module engine * will only call event subscribers if the event type matches the types * they are interested in. */ moduleNotifyKeyspaceEvent(type, event, key, dbid); - + /* If notifications for this class of events are off, return ASAP. */ if (!(server.notify_keyspace_events & type)) return; diff --git a/src/redismodule.h b/src/redismodule.h index 272da08df..681bd600b 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -98,6 +98,7 @@ #define REDISMODULE_NOTIFY_EXPIRED (1<<8) /* x */ #define REDISMODULE_NOTIFY_EVICTED (1<<9) /* e */ #define REDISMODULE_NOTIFY_STREAM (1<<10) /* t */ +#define REDISMODULE_NOTIFY_KEY_MISS (1<<11) /* m */ #define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM) /* A */ diff --git a/src/server.h b/src/server.h index 56c3b67d3..0b433039d 100644 --- a/src/server.h +++ b/src/server.h @@ -468,7 +468,9 @@ typedef long long mstime_t; /* millisecond time type. */ #define NOTIFY_EXPIRED (1<<8) /* x */ #define NOTIFY_EVICTED (1<<9) /* e */ #define NOTIFY_STREAM (1<<10) /* t */ -#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM) /* A flag */ +#define NOTIFY_KEY_MISS (1<<11) /* m */ + +#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_KEY_MISS) /* A flag */ /* Get the first bind addr or NULL */ #define NET_FIRST_BIND_ADDR (server.bindaddr_count ? server.bindaddr[0] : NULL) From ca2eadaaac824d329c877820cd5d159607e0227b Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Thu, 21 Mar 2019 12:47:51 +0200 Subject: [PATCH 029/304] Added missing REDISMODULE_NOTIFY_KEY_MISS flag to REDISMODULE_NOTIFY_ALL --- src/redismodule.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redismodule.h b/src/redismodule.h index 681bd600b..70011f932 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -99,7 +99,7 @@ #define REDISMODULE_NOTIFY_EVICTED (1<<9) /* e */ #define REDISMODULE_NOTIFY_STREAM (1<<10) /* t */ #define REDISMODULE_NOTIFY_KEY_MISS (1<<11) /* m */ -#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM) /* A */ +#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM | REDISMODULE_NOTIFY_KEY_MISS) /* A */ /* A special pointer that we can use between the core and the module to signal From 51a54dfde3c27e162dfedcddb663350575cd7733 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Thu, 21 Mar 2019 12:48:37 +0200 Subject: [PATCH 030/304] remove extra linebreak --- src/server.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/server.h b/src/server.h index 0b433039d..b090a6374 100644 --- a/src/server.h +++ b/src/server.h @@ -469,7 +469,6 @@ typedef long long mstime_t; /* millisecond time type. */ #define NOTIFY_EVICTED (1<<9) /* e */ #define NOTIFY_STREAM (1<<10) /* t */ #define NOTIFY_KEY_MISS (1<<11) /* m */ - #define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_KEY_MISS) /* A flag */ /* Get the first bind addr or NULL */ From 8b58fbafae448f9ff106429bc5d53e1cd20bcfea Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 21 Mar 2019 12:18:55 +0100 Subject: [PATCH 031/304] Alter coding style in #4696 to conform to Redis code base. --- src/zmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zmalloc.c b/src/zmalloc.c index 4c40a7782..5e6010278 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -148,7 +148,7 @@ void *zrealloc(void *ptr, size_t size) { size_t oldsize; void *newptr; - if (size == 0 && ptr!=NULL) { + if (size == 0 && ptr != NULL) { zfree(ptr); return NULL; } From c675d44488094bc6e360f8a330375d3b9fb3b335 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Thu, 21 Mar 2019 14:44:49 +0200 Subject: [PATCH 032/304] CommandFilter API: Add unregister option. A filter handle is returned and can be used to unregister a filter. In the future it can also be used to further configure or manipulate the filter. Filters are now automatically unregistered when a module unloads. --- src/module.c | 94 +++++++++++++++++++++++++-------- src/modules/hellofilter.c | 27 ++++++++-- src/redismodule.h | 15 +++--- tests/modules/commandfilter.tcl | 22 ++++++++ 4 files changed, 126 insertions(+), 32 deletions(-) diff --git a/src/module.c b/src/module.c index ee2840225..ad7bba2eb 100644 --- a/src/module.c +++ b/src/module.c @@ -49,6 +49,7 @@ struct RedisModule { list *types; /* Module data types. */ list *usedby; /* List of modules using APIs from this one. */ list *using; /* List of modules we use some APIs of. */ + list *filters; /* List of filters the module has registered. */ }; typedef struct RedisModule RedisModule; @@ -748,6 +749,7 @@ void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int api module->types = listCreate(); module->usedby = listCreate(); module->using = listCreate(); + module->filters = listCreate(); ctx->module = module; } @@ -4793,6 +4795,28 @@ int moduleUnregisterUsedAPI(RedisModule *module) { return count; } +/* Unregister all filters registered by a module. + * This is called when a module is being unloaded. + * + * Returns the number of filters unregistered. */ +int moduleUnregisterFilters(RedisModule *module) { + listIter li; + listNode *ln; + int count = 0; + + listRewind(module->filters,&li); + while((ln = listNext(&li))) { + RedisModuleCommandFilter *filter = ln->value; + listNode *ln = listSearchKey(moduleCommandFilters,filter); + if (ln) { + listDelNode(moduleCommandFilters,ln); + count++; + } + zfree(filter); + } + return count; +} + /* -------------------------------------------------------------------------- * Module Command Filter API * -------------------------------------------------------------------------- */ @@ -4840,12 +4864,33 @@ int moduleUnregisterUsedAPI(RedisModule *module) { * are executed in the order of registration. */ -int RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { +RedisModuleCommandFilter *RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { RedisModuleCommandFilter *filter = zmalloc(sizeof(*filter)); filter->module = ctx->module; filter->callback = callback; listAddNodeTail(moduleCommandFilters, filter); + listAddNodeTail(ctx->module->filters, filter); + return filter; +} + +/* Unregister a command filter. + */ +int RM_UnregisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilter *filter) { + listNode *ln; + + /* A module can only remove its own filters */ + if (filter->module != ctx->module) return REDISMODULE_ERR; + + ln = listSearchKey(moduleCommandFilters,filter); + if (!ln) return REDISMODULE_ERR; + listDelNode(moduleCommandFilters,ln); + + ln = listSearchKey(ctx->module->filters,filter); + if (ln) { + listDelNode(moduleCommandFilters,ln); + } + return REDISMODULE_OK; } @@ -4874,18 +4919,18 @@ void moduleCallCommandFilters(client *c) { /* Return the number of arguments a filtered command has. The number of * arguments include the command itself. */ -int RM_CommandFilterArgsCount(RedisModuleCommandFilterCtx *filter) +int RM_CommandFilterArgsCount(RedisModuleCommandFilterCtx *fctx) { - return filter->argc; + return fctx->argc; } /* Return the specified command argument. The first argument (position 0) is * the command itself, and the rest are user-provided args. */ -const RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *filter, int pos) +const RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *fctx, int pos) { - if (pos < 0 || pos >= filter->argc) return NULL; - return filter->argv[pos]; + if (pos < 0 || pos >= fctx->argc) return NULL; + return fctx->argv[pos]; } /* Modify the filtered command by inserting a new argument at the specified @@ -4894,18 +4939,18 @@ const RedisModuleString *RM_CommandFilterArgGet(RedisModuleCommandFilterCtx *fil * allocated, freed or used elsewhere. */ -int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg) +int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) { int i; - if (pos < 0 || pos > filter->argc) return REDISMODULE_ERR; + if (pos < 0 || pos > fctx->argc) return REDISMODULE_ERR; - filter->argv = zrealloc(filter->argv, (filter->argc+1)*sizeof(RedisModuleString *)); - for (i = filter->argc; i > pos; i--) { - filter->argv[i] = filter->argv[i-1]; + fctx->argv = zrealloc(fctx->argv, (fctx->argc+1)*sizeof(RedisModuleString *)); + for (i = fctx->argc; i > pos; i--) { + fctx->argv[i] = fctx->argv[i-1]; } - filter->argv[pos] = arg; - filter->argc++; + fctx->argv[pos] = arg; + fctx->argc++; return REDISMODULE_OK; } @@ -4916,12 +4961,12 @@ int RM_CommandFilterArgInsert(RedisModuleCommandFilterCtx *filter, int pos, Redi * or used elsewhere. */ -int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg) +int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) { - if (pos < 0 || pos >= filter->argc) return REDISMODULE_ERR; + if (pos < 0 || pos >= fctx->argc) return REDISMODULE_ERR; - decrRefCount(filter->argv[pos]); - filter->argv[pos] = arg; + decrRefCount(fctx->argv[pos]); + fctx->argv[pos] = arg; return REDISMODULE_OK; } @@ -4929,16 +4974,16 @@ int RM_CommandFilterArgReplace(RedisModuleCommandFilterCtx *filter, int pos, Red /* Modify the filtered command by deleting an argument at the specified * position. */ -int RM_CommandFilterArgDelete(RedisModuleCommandFilterCtx *filter, int pos) +int RM_CommandFilterArgDelete(RedisModuleCommandFilterCtx *fctx, int pos) { int i; - if (pos < 0 || pos >= filter->argc) return REDISMODULE_ERR; + if (pos < 0 || pos >= fctx->argc) return REDISMODULE_ERR; - decrRefCount(filter->argv[pos]); - for (i = pos; i < filter->argc-1; i++) { - filter->argv[i] = filter->argv[i+1]; + decrRefCount(fctx->argv[pos]); + for (i = pos; i < fctx->argc-1; i++) { + fctx->argv[i] = fctx->argv[i+1]; } - filter->argc--; + fctx->argc--; return REDISMODULE_OK; } @@ -5041,6 +5086,7 @@ void moduleLoadFromQueue(void) { void moduleFreeModuleStructure(struct RedisModule *module) { listRelease(module->types); + listRelease(module->filters); sdsfree(module->name); zfree(module); } @@ -5132,6 +5178,7 @@ int moduleUnload(sds name) { moduleUnregisterCommands(module); moduleUnregisterSharedAPI(module); moduleUnregisterUsedAPI(module); + moduleUnregisterFilters(module); /* Remove any notification subscribers this module might have */ moduleUnsubscribeNotifications(module); @@ -5396,6 +5443,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(ExportSharedAPI); REGISTER_API(GetSharedAPI); REGISTER_API(RegisterCommandFilter); + REGISTER_API(UnregisterCommandFilter); REGISTER_API(CommandFilterArgsCount); REGISTER_API(CommandFilterArgGet); REGISTER_API(CommandFilterArgInsert); diff --git a/src/modules/hellofilter.c b/src/modules/hellofilter.c index d5dd405aa..9cd440df2 100644 --- a/src/modules/hellofilter.c +++ b/src/modules/hellofilter.c @@ -7,10 +7,27 @@ static RedisModuleString *log_key_name; static const char log_command_name[] = "hellofilter.log"; static const char ping_command_name[] = "hellofilter.ping"; +static const char unregister_command_name[] = "hellofilter.unregister"; static int in_module = 0; +static RedisModuleCommandFilter *filter = NULL; + +int HelloFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +{ + (void) argc; + (void) argv; + + RedisModule_ReplyWithLongLong(ctx, + RedisModule_UnregisterCommandFilter(ctx, filter)); + + return REDISMODULE_OK; +} + int HelloFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + (void) argc; + (void) argv; + RedisModuleCallReply *reply = RedisModule_Call(ctx, "ping", "c", "@log"); if (reply) { RedisModule_ReplyWithCallReply(ctx, reply); @@ -115,11 +132,15 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) return REDISMODULE_ERR; if (RedisModule_CreateCommand(ctx,ping_command_name, - HelloFilter_PingCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + HelloFilter_PingCommand,"deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; - if (RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter) - == REDISMODULE_ERR) return REDISMODULE_ERR; + if (RedisModule_CreateCommand(ctx,unregister_command_name, + HelloFilter_UnregisterCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if ((filter = RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter)) + == NULL) return REDISMODULE_ERR; return REDISMODULE_OK; } diff --git a/src/redismodule.h b/src/redismodule.h index 5df83ae6a..37b7d0d59 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -151,6 +151,7 @@ typedef struct RedisModuleClusterInfo RedisModuleClusterInfo; typedef struct RedisModuleDict RedisModuleDict; typedef struct RedisModuleDictIter RedisModuleDictIter; typedef struct RedisModuleCommandFilterCtx RedisModuleCommandFilterCtx; +typedef struct RedisModuleCommandFilter RedisModuleCommandFilter; typedef int (*RedisModuleCmdFunc)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); typedef void (*RedisModuleDisconnectFunc)(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc); @@ -339,12 +340,13 @@ void REDISMODULE_API_FUNC(RedisModule_SetDisconnectCallback)(RedisModuleBlockedC void REDISMODULE_API_FUNC(RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint64_t flags); int REDISMODULE_API_FUNC(RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func); void *REDISMODULE_API_FUNC(RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname); -int REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb); -int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgsCount)(RedisModuleCommandFilterCtx *filter); -const RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CommandFilterArgGet)(RedisModuleCommandFilterCtx *filter, int pos); -int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgInsert)(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg); -int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgReplace)(RedisModuleCommandFilterCtx *filter, int pos, RedisModuleString *arg); -int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgDelete)(RedisModuleCommandFilterCtx *filter, int pos); +RedisModuleCommandFilter *REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb); +int REDISMODULE_API_FUNC(RedisModule_UnregisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilter *filter); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgsCount)(RedisModuleCommandFilterCtx *fctx); +const RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CommandFilterArgGet)(RedisModuleCommandFilterCtx *fctx, int pos); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgInsert)(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgReplace)(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg); +int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgDelete)(RedisModuleCommandFilterCtx *fctx, int pos); #endif /* This is included inline inside each Redis module. */ @@ -508,6 +510,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(ExportSharedAPI); REDISMODULE_GET_API(GetSharedAPI); REDISMODULE_GET_API(RegisterCommandFilter); + REDISMODULE_GET_API(UnregisterCommandFilter); REDISMODULE_GET_API(CommandFilterArgsCount); REDISMODULE_GET_API(CommandFilterArgGet); REDISMODULE_GET_API(CommandFilterArgInsert); diff --git a/tests/modules/commandfilter.tcl b/tests/modules/commandfilter.tcl index 47d9c302c..8645d8279 100644 --- a/tests/modules/commandfilter.tcl +++ b/tests/modules/commandfilter.tcl @@ -42,4 +42,26 @@ start_server {tags {"modules"}} { r eval "redis.call('hellofilter.ping')" 0 r lrange log-key 0 -1 } "{ping @log}" + + test {Command Filter is unregistered implicitly on module unload} { + r del log-key + r module unload hellofilter + r set mykey @log + r lrange log-key 0 -1 + } {} + + r module load $testmodule log-key-2 + + test {Command Filter unregister works as expected} { + # Validate reloading succeeded + r set mykey @log + assert_equal "{set mykey @log}" [r lrange log-key-2 0 -1] + + # Unregister + r hellofilter.unregister + r del log-key-2 + + r set mykey @log + r lrange log-key-2 0 -1 + } {} } From 4c49e7ad6fbfc7bd75538a30fcdf7ce74bbab050 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 21 Mar 2019 17:18:24 +0100 Subject: [PATCH 033/304] Mostly aesthetic changes to restartAOF(). See #3829. --- src/replication.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/replication.c b/src/replication.c index 59e42e561..c25e7fa6f 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1093,12 +1093,16 @@ void replicationCreateMasterClient(int fd, int dbid) { void restartAOF() { unsigned int tries, max_tries = 10; for (tries = 0; tries < max_tries; ++tries) { - if (tries) sleep(1); if (startAppendOnly() == C_OK) break; - serverLog(LL_WARNING,"Failed enabling the AOF after successful master synchronization! Trying it again in one second."); + serverLog(LL_WARNING, + "Failed enabling the AOF after successful master synchronization! " + "Trying it again in one second."); + sleep(1); } if (tries == max_tries) { - serverLog(LL_WARNING,"FATAL: this replica instance finished the synchronization with its master, but the AOF can't be turned on. Exiting now."); + serverLog(LL_WARNING, + "FATAL: this replica instance finished the synchronization with " + "its master, but the AOF can't be turned on. Exiting now."); exit(1); } } From 7e191d3ea353c34d8f02b44b5afc14fd8b8c6fd1 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 21 Mar 2019 17:21:25 +0100 Subject: [PATCH 034/304] More sensible name for function: restartAOFAfterSYNC(). Related to #3829. --- src/replication.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/replication.c b/src/replication.c index c25e7fa6f..a27c29a3b 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1090,7 +1090,11 @@ void replicationCreateMasterClient(int fd, int dbid) { if (dbid != -1) selectDb(server.master,dbid); } -void restartAOF() { +/* This function will try to re-enable the AOF file after the + * master-replica synchronization: if it fails after multiple attempts + * the replica cannot be considered reliable and exists with an + * error. */ +void restartAOFAfterSYNC() { unsigned int tries, max_tries = 10; for (tries = 0; tries < max_tries; ++tries) { if (startAppendOnly() == C_OK) break; @@ -1289,7 +1293,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { cancelReplicationHandshake(); /* Re-enable the AOF if we disabled it earlier, in order to restore * the original configuration. */ - if (aof_is_enabled) restartAOF(); + if (aof_is_enabled) restartAOFAfterSYNC(); return; } /* Final setup of the connected slave <- master link */ @@ -1314,7 +1318,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { /* Restart the AOF subsystem now that we finished the sync. This * will trigger an AOF rewrite, and when done will start appending * to the new file. */ - if (aof_is_enabled) restartAOF(); + if (aof_is_enabled) restartAOFAfterSYNC(); } return; From b8568a98fd39ecd9e540d74b79ee97a68b0ce8be Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Thu, 21 Mar 2019 19:45:41 +0200 Subject: [PATCH 035/304] CommandFilter API: fix UnregisterCommandFilter. --- src/module.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/module.c b/src/module.c index ad7bba2eb..a54bd1ad8 100644 --- a/src/module.c +++ b/src/module.c @@ -4887,9 +4887,8 @@ int RM_UnregisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilter *fi listDelNode(moduleCommandFilters,ln); ln = listSearchKey(ctx->module->filters,filter); - if (ln) { - listDelNode(moduleCommandFilters,ln); - } + if (!ln) return REDISMODULE_ERR; /* Shouldn't happen */ + listDelNode(ctx->module->filters,ln); return REDISMODULE_OK; } From 898677d59e6b77a11aced5d68b32da7e94c075fe Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Thu, 21 Mar 2019 19:47:43 +0200 Subject: [PATCH 036/304] CommandFilter API: REDISMODULE_CMDFILTER_NOSELF. Add a flag to automatically protect filters from being called recursively by their own module. --- src/module.c | 28 +++++++++++++++++++++++++--- src/modules/hellofilter.c | 15 +++++++++------ src/redismodule.h | 7 ++++++- tests/modules/commandfilter.tcl | 27 ++++++++++++++++++++++----- 4 files changed, 62 insertions(+), 15 deletions(-) diff --git a/src/module.c b/src/module.c index a54bd1ad8..4ff865d2c 100644 --- a/src/module.c +++ b/src/module.c @@ -50,6 +50,7 @@ struct RedisModule { list *usedby; /* List of modules using APIs from this one. */ list *using; /* List of modules we use some APIs of. */ list *filters; /* List of filters the module has registered. */ + int in_call; /* RM_Call() nesting level */ }; typedef struct RedisModule RedisModule; @@ -283,6 +284,8 @@ typedef struct RedisModuleCommandFilter { RedisModule *module; /* Filter callback function */ RedisModuleCommandFilterFunc callback; + /* REDISMODULE_CMDFILTER_* flags */ + int flags; } RedisModuleCommandFilter; /* Registered filters */ @@ -2756,6 +2759,8 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch c->db = ctx->client->db; c->argv = argv; c->argc = argc; + if (ctx->module) ctx->module->in_call++; + /* We handle the above format error only when the client is setup so that * we can free it normally. */ if (argv == NULL) goto cleanup; @@ -2822,6 +2827,7 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch autoMemoryAdd(ctx,REDISMODULE_AM_REPLY,reply); cleanup: + if (ctx->module) ctx->module->in_call--; freeClient(c); return reply; } @@ -4857,17 +4863,27 @@ int moduleUnregisterFilters(RedisModule *module) { * * Note that in the above use case, if `MODULE.SET` itself uses * `RedisModule_Call()` the filter will be applied on that call as well. If - * that is not desired, the module itself is responsible for maintaining a flag - * to identify and avoid this form of re-entrancy. + * that is not desired, the `REDISMODULE_CMDFILTER_NOSELF` flag can be set when + * registering the filter. + * + * The `REDISMODULE_CMDFILTER_NOSELF` flag prevents execution flows that + * originate from the module's own `RM_Call()` from reaching the filter. This + * flag is effective for all execution flows, including nested ones, as long as + * the execution begins from the module's command context or a thread-safe + * context that is associated with a blocking command. + * + * Detached thread-safe contexts are *not* associated with the module and cannot + * be protected by this flag. * * If multiple filters are registered (by the same or different modules), they * are executed in the order of registration. */ -RedisModuleCommandFilter *RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback) { +RedisModuleCommandFilter *RM_RegisterCommandFilter(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc callback, int flags) { RedisModuleCommandFilter *filter = zmalloc(sizeof(*filter)); filter->module = ctx->module; filter->callback = callback; + filter->flags = flags; listAddNodeTail(moduleCommandFilters, filter); listAddNodeTail(ctx->module->filters, filter); @@ -4908,6 +4924,12 @@ void moduleCallCommandFilters(client *c) { while((ln = listNext(&li))) { RedisModuleCommandFilter *f = ln->value; + /* Skip filter if REDISMODULE_CMDFILTER_NOSELF is set and module is + * currently processing a command. + */ + if ((f->flags & REDISMODULE_CMDFILTER_NOSELF) && f->module->in_call) continue; + + /* Call filter */ f->callback(&filter); } diff --git a/src/modules/hellofilter.c b/src/modules/hellofilter.c index 9cd440df2..448e12983 100644 --- a/src/modules/hellofilter.c +++ b/src/modules/hellofilter.c @@ -8,7 +8,7 @@ static RedisModuleString *log_key_name; static const char log_command_name[] = "hellofilter.log"; static const char ping_command_name[] = "hellofilter.ping"; static const char unregister_command_name[] = "hellofilter.unregister"; -static int in_module = 0; +static int in_log_command = 0; static RedisModuleCommandFilter *filter = NULL; @@ -57,7 +57,7 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar RedisModule_CloseKey(log); RedisModule_FreeString(ctx, s); - in_module = 1; + in_log_command = 1; size_t cmdlen; const char *cmdname = RedisModule_StringPtrLen(argv[1], &cmdlen); @@ -69,14 +69,14 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar RedisModule_ReplyWithSimpleString(ctx, "Unknown command or invalid arguments"); } - in_module = 0; + in_log_command = 0; return REDISMODULE_OK; } void HelloFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) { - if (in_module) return; /* don't process our own RM_Call() */ + if (in_log_command) return; /* don't process our own RM_Call() from HelloFilter_LogCommand() */ /* Fun manipulations: * - Remove @delme @@ -120,12 +120,14 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) if (RedisModule_Init(ctx,"hellofilter",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) return REDISMODULE_ERR; - if (argc != 1) { + if (argc != 2) { RedisModule_Log(ctx, "warning", "Log key name not specified"); return REDISMODULE_ERR; } + long long noself = 0; log_key_name = RedisModule_CreateStringFromString(ctx, argv[0]); + RedisModule_StringToLongLong(argv[1], &noself); if (RedisModule_CreateCommand(ctx,log_command_name, HelloFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) @@ -139,7 +141,8 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) HelloFilter_UnregisterCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; - if ((filter = RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter)) + if ((filter = RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter, + noself ? REDISMODULE_CMDFILTER_NOSELF : 0)) == NULL) return REDISMODULE_ERR; return REDISMODULE_OK; diff --git a/src/redismodule.h b/src/redismodule.h index 37b7d0d59..e567743a4 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -132,6 +132,11 @@ * of timers that are going to expire, sorted by expire time. */ typedef uint64_t RedisModuleTimerID; +/* CommandFilter Flags */ + +/* Do filter RedisModule_Call() commands initiated by module itself. */ +#define REDISMODULE_CMDFILTER_NOSELF (1<<0) + /* ------------------------- End of common defines ------------------------ */ #ifndef REDISMODULE_CORE @@ -340,7 +345,7 @@ void REDISMODULE_API_FUNC(RedisModule_SetDisconnectCallback)(RedisModuleBlockedC void REDISMODULE_API_FUNC(RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint64_t flags); int REDISMODULE_API_FUNC(RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func); void *REDISMODULE_API_FUNC(RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname); -RedisModuleCommandFilter *REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb); +RedisModuleCommandFilter *REDISMODULE_API_FUNC(RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb, int flags); int REDISMODULE_API_FUNC(RedisModule_UnregisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilter *filter); int REDISMODULE_API_FUNC(RedisModule_CommandFilterArgsCount)(RedisModuleCommandFilterCtx *fctx); const RedisModuleString *REDISMODULE_API_FUNC(RedisModule_CommandFilterArgGet)(RedisModuleCommandFilterCtx *fctx, int pos); diff --git a/tests/modules/commandfilter.tcl b/tests/modules/commandfilter.tcl index 8645d8279..1e5c41d2b 100644 --- a/tests/modules/commandfilter.tcl +++ b/tests/modules/commandfilter.tcl @@ -1,7 +1,7 @@ set testmodule [file normalize src/modules/hellofilter.so] start_server {tags {"modules"}} { - r module load $testmodule log-key + r module load $testmodule log-key 0 test {Command Filter handles redirected commands} { r set mykey @log @@ -50,18 +50,35 @@ start_server {tags {"modules"}} { r lrange log-key 0 -1 } {} - r module load $testmodule log-key-2 + r module load $testmodule log-key 0 test {Command Filter unregister works as expected} { # Validate reloading succeeded + r del log-key r set mykey @log - assert_equal "{set mykey @log}" [r lrange log-key-2 0 -1] + assert_equal "{set mykey @log}" [r lrange log-key 0 -1] # Unregister r hellofilter.unregister - r del log-key-2 + r del log-key r set mykey @log - r lrange log-key-2 0 -1 + r lrange log-key 0 -1 } {} + + r module unload hellofilter + r module load $testmodule log-key 1 + + test {Command Filter REDISMODULE_CMDFILTER_NOSELF works as expected} { + r set mykey @log + assert_equal "{set mykey @log}" [r lrange log-key 0 -1] + + r del log-key + r hellofilter.ping + assert_equal {} [r lrange log-key 0 -1] + + r eval "redis.call('hellofilter.ping')" 0 + assert_equal {} [r lrange log-key 0 -1] + } + } From eb40ac6c8e1617962fc1b0d8fcc9f82e3ab0227c Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Tue, 21 Mar 2017 07:20:02 -0700 Subject: [PATCH 037/304] diskless fork kept streaming RDB to a disconnected slave --- src/networking.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/networking.c b/src/networking.c index c08f43e6a..09cbff387 100644 --- a/src/networking.c +++ b/src/networking.c @@ -911,6 +911,16 @@ void unlinkClient(client *c) { c->client_list_node = NULL; } + /* In the case of diskless replication the fork is writing to the + * sockets and just closing the fd isn't enough, if we don't also + * shutdown the socket the fork will continue to write to the slave + * and the salve will only find out that it was disconnected when + * it will finish reading the rdb. */ + if ((c->flags & CLIENT_SLAVE) && + (c->replstate == SLAVE_STATE_WAIT_BGSAVE_END)) { + shutdown(c->fd, SHUT_RDWR); + } + /* Unregister async I/O handlers and close the socket. */ aeDeleteFileEvent(server.el,c->fd,AE_READABLE); aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); From 1a24f23a50095d95beba39536d59d4e777418252 Mon Sep 17 00:00:00 2001 From: Dvir Volk Date: Thu, 21 Mar 2019 20:33:11 +0200 Subject: [PATCH 038/304] Renamed event name from "miss" to "keymiss" --- src/db.c | 8 ++++---- src/modules/testmodule.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/db.c b/src/db.c index afe181281..b537a29a4 100644 --- a/src/db.c +++ b/src/db.c @@ -83,7 +83,7 @@ robj *lookupKey(redisDb *db, robj *key, int flags) { * 1. A key gets expired if it reached it's TTL. * 2. The key last access time is updated. * 3. The global keys hits/misses stats are updated (reported in INFO). - * 4. If keyspace notifications are enabled, a "miss" notification is fired. + * 4. If keyspace notifications are enabled, a "keymiss" notification is fired. * * This API should not be used when we write to the key after obtaining * the object linked to the key, but only for read only operations. @@ -107,7 +107,7 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { * to return NULL ASAP. */ if (server.masterhost == NULL) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); return NULL; } @@ -129,14 +129,14 @@ robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { server.current_client->cmd->flags & CMD_READONLY) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); return NULL; } } val = lookupKey(db,key,flags); if (val == NULL) { server.stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_KEY_MISS, "miss", key, db->id); + notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); } else server.stat_keyspace_hits++; diff --git a/src/modules/testmodule.c b/src/modules/testmodule.c index af78d21d7..5381380e5 100644 --- a/src/modules/testmodule.c +++ b/src/modules/testmodule.c @@ -188,7 +188,7 @@ int TestNotifications(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModule_Call(ctx, "LPUSH", "cc", "l", "y"); RedisModule_Call(ctx, "LPUSH", "cc", "l", "y"); - /* Miss some keys intentionally so we will get a "miss" notification. */ + /* Miss some keys intentionally so we will get a "keymiss" notification. */ RedisModule_Call(ctx, "GET", "c", "nosuchkey"); RedisModule_Call(ctx, "SMEMBERS", "c", "nosuchkey"); From 2d4635b483460c18335089141db306a3bc122123 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 24 Mar 2019 12:00:33 +0200 Subject: [PATCH 039/304] fix: missing initialization. --- src/module.c | 1 + src/modules/hellofilter.c => tests/modules/commandfilter.c | 0 tests/{modules => unit/moduleapi}/commandfilter.tcl | 0 3 files changed, 1 insertion(+) rename src/modules/hellofilter.c => tests/modules/commandfilter.c (100%) rename tests/{modules => unit/moduleapi}/commandfilter.tcl (100%) diff --git a/src/module.c b/src/module.c index ff7f27cdd..f468d4996 100644 --- a/src/module.c +++ b/src/module.c @@ -753,6 +753,7 @@ void RM_SetModuleAttribs(RedisModuleCtx *ctx, const char *name, int ver, int api module->usedby = listCreate(); module->using = listCreate(); module->filters = listCreate(); + module->in_call = 0; ctx->module = module; } diff --git a/src/modules/hellofilter.c b/tests/modules/commandfilter.c similarity index 100% rename from src/modules/hellofilter.c rename to tests/modules/commandfilter.c diff --git a/tests/modules/commandfilter.tcl b/tests/unit/moduleapi/commandfilter.tcl similarity index 100% rename from tests/modules/commandfilter.tcl rename to tests/unit/moduleapi/commandfilter.tcl From a631f66710954a35f4854a98d395e29b898ff6c6 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 24 Mar 2019 12:03:03 +0200 Subject: [PATCH 040/304] Add runtest-moduleapi with commandfilter coverage. --- runtest-moduleapi | 16 +++++++++++++++ src/modules/Makefile | 7 +------ tests/modules/Makefile | 24 ++++++++++++++++++++++ tests/modules/commandfilter.c | 28 +++++++++++++------------- tests/test_helper.tcl | 1 - tests/unit/moduleapi/commandfilter.tcl | 16 +++++++-------- 6 files changed, 63 insertions(+), 29 deletions(-) create mode 100755 runtest-moduleapi create mode 100644 tests/modules/Makefile diff --git a/runtest-moduleapi b/runtest-moduleapi new file mode 100755 index 000000000..84cdb9bb8 --- /dev/null +++ b/runtest-moduleapi @@ -0,0 +1,16 @@ +#!/bin/sh +TCL_VERSIONS="8.5 8.6" +TCLSH="" + +for VERSION in $TCL_VERSIONS; do + TCL=`which tclsh$VERSION 2>/dev/null` && TCLSH=$TCL +done + +if [ -z $TCLSH ] +then + echo "You need tcl 8.5 or newer in order to run the Redis test" + exit 1 +fi + +make -C tests/modules && \ +$TCLSH tests/test_helper.tcl --single unit/moduleapi/commandfilter "${@}" diff --git a/src/modules/Makefile b/src/modules/Makefile index 537aa0daf..4f6b50f2e 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -13,7 +13,7 @@ endif .SUFFIXES: .c .so .xo .o -all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so hellodict.so hellofilter.so +all: helloworld.so hellotype.so helloblock.so testmodule.so hellocluster.so hellotimer.so hellodict.so .c.xo: $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ @@ -47,11 +47,6 @@ hellodict.xo: ../redismodule.h hellodict.so: hellodict.xo -hellofilter.xo: ../redismodule.h - -hellofilter.so: hellofilter.xo - $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc - testmodule.xo: ../redismodule.h testmodule.so: testmodule.xo diff --git a/tests/modules/Makefile b/tests/modules/Makefile new file mode 100644 index 000000000..014d20afa --- /dev/null +++ b/tests/modules/Makefile @@ -0,0 +1,24 @@ + +# find the OS +uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') + +# Compile flags for linux / osx +ifeq ($(uname_S),Linux) + SHOBJ_CFLAGS ?= -W -Wall -fno-common -g -ggdb -std=c99 -O2 + SHOBJ_LDFLAGS ?= -shared +else + SHOBJ_CFLAGS ?= -W -Wall -dynamic -fno-common -g -ggdb -std=c99 -O2 + SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup +endif + +.SUFFIXES: .c .so .xo .o + +all: commandfilter.so + +.c.xo: + $(CC) -I../../src $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ + +commandfilter.xo: ../../src/redismodule.h + +commandfilter.so: commandfilter.xo + $(LD) -o $@ $< $(SHOBJ_LDFLAGS) $(LIBS) -lc diff --git a/tests/modules/commandfilter.c b/tests/modules/commandfilter.c index 448e12983..d25d49c44 100644 --- a/tests/modules/commandfilter.c +++ b/tests/modules/commandfilter.c @@ -1,18 +1,18 @@ #define REDISMODULE_EXPERIMENTAL_API -#include "../redismodule.h" +#include "redismodule.h" #include static RedisModuleString *log_key_name; -static const char log_command_name[] = "hellofilter.log"; -static const char ping_command_name[] = "hellofilter.ping"; -static const char unregister_command_name[] = "hellofilter.unregister"; +static const char log_command_name[] = "commandfilter.log"; +static const char ping_command_name[] = "commandfilter.ping"; +static const char unregister_command_name[] = "commandfilter.unregister"; static int in_log_command = 0; static RedisModuleCommandFilter *filter = NULL; -int HelloFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +int CommandFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { (void) argc; (void) argv; @@ -23,7 +23,7 @@ int HelloFilter_UnregisterCommand(RedisModuleCtx *ctx, RedisModuleString **argv, return REDISMODULE_OK; } -int HelloFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +int CommandFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { (void) argc; (void) argv; @@ -39,7 +39,7 @@ int HelloFilter_PingCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int a return REDISMODULE_OK; } -int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) +int CommandFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { RedisModuleString *s = RedisModule_CreateString(ctx, "", 0); @@ -74,9 +74,9 @@ int HelloFilter_LogCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int ar return REDISMODULE_OK; } -void HelloFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) +void CommandFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) { - if (in_log_command) return; /* don't process our own RM_Call() from HelloFilter_LogCommand() */ + if (in_log_command) return; /* don't process our own RM_Call() from CommandFilter_LogCommand() */ /* Fun manipulations: * - Remove @delme @@ -117,7 +117,7 @@ void HelloFilter_CommandFilter(RedisModuleCommandFilterCtx *filter) } int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { - if (RedisModule_Init(ctx,"hellofilter",1,REDISMODULE_APIVER_1) + if (RedisModule_Init(ctx,"commandfilter",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) return REDISMODULE_ERR; if (argc != 2) { @@ -130,18 +130,18 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) RedisModule_StringToLongLong(argv[1], &noself); if (RedisModule_CreateCommand(ctx,log_command_name, - HelloFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + CommandFilter_LogCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; if (RedisModule_CreateCommand(ctx,ping_command_name, - HelloFilter_PingCommand,"deny-oom",1,1,1) == REDISMODULE_ERR) + CommandFilter_PingCommand,"deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; if (RedisModule_CreateCommand(ctx,unregister_command_name, - HelloFilter_UnregisterCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) + CommandFilter_UnregisterCommand,"write deny-oom",1,1,1) == REDISMODULE_ERR) return REDISMODULE_ERR; - if ((filter = RedisModule_RegisterCommandFilter(ctx, HelloFilter_CommandFilter, + if ((filter = RedisModule_RegisterCommandFilter(ctx, CommandFilter_CommandFilter, noself ? REDISMODULE_CMDFILTER_NOSELF : 0)) == NULL) return REDISMODULE_ERR; diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index d2f281526..568eacdee 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -63,7 +63,6 @@ set ::all_tests { unit/lazyfree unit/wait unit/pendingquerybuf - modules/commandfilter } # Index to the next test to run in the ::all_tests list. set ::next_test 0 diff --git a/tests/unit/moduleapi/commandfilter.tcl b/tests/unit/moduleapi/commandfilter.tcl index 1e5c41d2b..6078f64f2 100644 --- a/tests/unit/moduleapi/commandfilter.tcl +++ b/tests/unit/moduleapi/commandfilter.tcl @@ -1,4 +1,4 @@ -set testmodule [file normalize src/modules/hellofilter.so] +set testmodule [file normalize tests/modules/commandfilter.so] start_server {tags {"modules"}} { r module load $testmodule log-key 0 @@ -27,7 +27,7 @@ start_server {tags {"modules"}} { test {Command Filter applies on RM_Call() commands} { r del log-key - r hellofilter.ping + r commandfilter.ping r lrange log-key 0 -1 } "{ping @log}" @@ -39,13 +39,13 @@ start_server {tags {"modules"}} { test {Command Filter applies on Lua redis.call() that calls a module} { r del log-key - r eval "redis.call('hellofilter.ping')" 0 + r eval "redis.call('commandfilter.ping')" 0 r lrange log-key 0 -1 } "{ping @log}" test {Command Filter is unregistered implicitly on module unload} { r del log-key - r module unload hellofilter + r module unload commandfilter r set mykey @log r lrange log-key 0 -1 } {} @@ -59,14 +59,14 @@ start_server {tags {"modules"}} { assert_equal "{set mykey @log}" [r lrange log-key 0 -1] # Unregister - r hellofilter.unregister + r commandfilter.unregister r del log-key r set mykey @log r lrange log-key 0 -1 } {} - r module unload hellofilter + r module unload commandfilter r module load $testmodule log-key 1 test {Command Filter REDISMODULE_CMDFILTER_NOSELF works as expected} { @@ -74,10 +74,10 @@ start_server {tags {"modules"}} { assert_equal "{set mykey @log}" [r lrange log-key 0 -1] r del log-key - r hellofilter.ping + r commandfilter.ping assert_equal {} [r lrange log-key 0 -1] - r eval "redis.call('hellofilter.ping')" 0 + r eval "redis.call('commandfilter.ping')" 0 assert_equal {} [r lrange log-key 0 -1] } From 48d14e5aa71b43de0e7747648908b26e34e59ddb Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 24 Mar 2019 13:10:55 +0200 Subject: [PATCH 041/304] slave corrupts replication stream when module blocked client uses large reply (or POSTPONED_ARRAY) when redis appends the blocked client reply list to the real client, it didn't bother to check if it is in fact the master client. so a slave executing that module command will send replies to the master, causing the master to send the slave error responses, which will mess up the replication offset (slave will advance it's replication offset, and the master does not) --- src/module.c | 7 +------ src/networking.c | 13 +++++++++++++ src/server.h | 1 + 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/module.c b/src/module.c index ff7f27cdd..0c8197ac7 100644 --- a/src/module.c +++ b/src/module.c @@ -3747,12 +3747,7 @@ void moduleHandleBlockedClients(void) { * We need to glue such replies to the client output buffer and * free the temporary client we just used for the replies. */ if (c) { - if (bc->reply_client->bufpos) - addReplyProto(c,bc->reply_client->buf, - bc->reply_client->bufpos); - if (listLength(bc->reply_client->reply)) - listJoin(c->reply,bc->reply_client->reply); - c->reply_bytes += bc->reply_client->reply_bytes; + AddReplyFromClient(c, bc->reply_client); } freeClient(bc->reply_client); diff --git a/src/networking.c b/src/networking.c index 09cbff387..7fdd1984d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -744,6 +744,19 @@ void addReplySubcommandSyntaxError(client *c) { sdsfree(cmd); } +/* Append 'src' client output buffers into 'dst' client output buffers. + * This function clears the output buffers of 'src' */ +void AddReplyFromClient(client *dst, client *src) { + if (prepareClientToWrite(dst) != C_OK) + return; + addReplyProto(dst,src->buf, src->bufpos); + if (listLength(src->reply)) + listJoin(dst->reply,src->reply); + dst->reply_bytes += src->reply_bytes; + src->reply_bytes = 0; + src->bufpos = 0; +} + /* Copy 'src' client output buffers into 'dst' client output buffers. * The function takes care of freeing the old output buffers of the * destination client. */ diff --git a/src/server.h b/src/server.h index 95e0355a6..dfd9f7698 100644 --- a/src/server.h +++ b/src/server.h @@ -1529,6 +1529,7 @@ void addReplyNullArray(client *c); void addReplyBool(client *c, int b); void addReplyVerbatim(client *c, const char *s, size_t len, const char *ext); void addReplyProto(client *c, const char *s, size_t len); +void AddReplyFromClient(client *c, client *src); void addReplyBulk(client *c, robj *obj); void addReplyBulkCString(client *c, const char *s); void addReplyBulkCBuffer(client *c, const void *p, size_t len); From 0e00a99f3275355dcd4f8fb5d6e67b421ce40d17 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 28 Mar 2019 06:38:16 +0000 Subject: [PATCH 042/304] build fix --- src/networking.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/networking.c b/src/networking.c index 09cbff387..02fc44e75 100644 --- a/src/networking.c +++ b/src/networking.c @@ -29,6 +29,7 @@ #include "server.h" #include "atomicvar.h" +#include #include #include #include From 41fc29512c191363058175b9b255d54e0918b60f Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 17:39:22 +0200 Subject: [PATCH 043/304] Fix assert comparison in fetchClusterSlotsConfiguration(). --- src/redis-benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 12e9f7e41..4e2662f21 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -1192,7 +1192,7 @@ static int fetchClusterSlotsConfiguration(client c) { assert(reply->type == REDIS_REPLY_ARRAY); for (i = 0; i < reply->elements; i++) { redisReply *r = reply->element[i]; - assert(r->type = REDIS_REPLY_ARRAY); + assert(r->type == REDIS_REPLY_ARRAY); assert(r->elements >= 3); int from, to, slot; from = r->element[0]->integer; From b364f3fc21ca0130f56eb1ec13ff608062e67b96 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 18:06:50 +0200 Subject: [PATCH 044/304] ACL: regression test for #5998. --- tests/unit/acl.tcl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/unit/acl.tcl b/tests/unit/acl.tcl index 82c75f82d..90f2c9bbf 100644 --- a/tests/unit/acl.tcl +++ b/tests/unit/acl.tcl @@ -108,4 +108,11 @@ start_server {tags {"acl"}} { assert_match {*+debug|segfault*} $cmdstr assert_match {*+acl*} $cmdstr } + + test {ACL regression: memory leaks adding / removing subcommands} { + r AUTH default "" + r ACL setuser newuser reset -debug +debug|a +debug|b +debug|c + r ACL setuser newuser -debug + # The test framework will detect a leak if any. + } } From 1a505a3ba9b44c0cb420b37458a4ee2f1c4f92b4 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 18:08:37 +0200 Subject: [PATCH 045/304] ACL: Fix memory leak in ACLResetSubcommandsForCommand(). This commit fixes bug reported at #5998. Thanks to @tomcat1102. --- src/acl.c | 2 ++ tests/unit/acl.tcl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/acl.c b/src/acl.c index d9f431f4f..0205e51ad 100644 --- a/src/acl.c +++ b/src/acl.c @@ -542,6 +542,8 @@ struct redisCommand *ACLLookupCommand(const char *name) { * and command ID. */ void ACLResetSubcommandsForCommand(user *u, unsigned long id) { if (u->allowed_subcommands && u->allowed_subcommands[id]) { + for (int i = 0; u->allowed_subcommands[id][i]; i++) + sdsfree(u->allowed_subcommands[id][i]); zfree(u->allowed_subcommands[id]); u->allowed_subcommands[id] = NULL; } diff --git a/tests/unit/acl.tcl b/tests/unit/acl.tcl index 90f2c9bbf..058441433 100644 --- a/tests/unit/acl.tcl +++ b/tests/unit/acl.tcl @@ -109,7 +109,7 @@ start_server {tags {"acl"}} { assert_match {*+acl*} $cmdstr } - test {ACL regression: memory leaks adding / removing subcommands} { + test {ACL #5998 regression: memory leaks adding / removing subcommands} { r AUTH default "" r ACL setuser newuser reset -debug +debug|a +debug|b +debug|c r ACL setuser newuser -debug From 36fff343fb1052c62d1fbdd0c846782118117fb6 Mon Sep 17 00:00:00 2001 From: yongman Date: Tue, 9 Apr 2019 09:24:22 +0800 Subject: [PATCH 046/304] Fix memleak in bitfieldCommand --- src/bitops.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/bitops.c b/src/bitops.c index 8d03a7699..ee1ce0460 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -994,12 +994,18 @@ void bitfieldCommand(client *c) { /* Lookup for read is ok if key doesn't exit, but errors * if it's not a string. */ o = lookupKeyRead(c->db,c->argv[1]); - if (o != NULL && checkType(c,o,OBJ_STRING)) return; + if (o != NULL && checkType(c,o,OBJ_STRING)) { + zfree(ops); + return; + } } else { /* Lookup by making room up to the farest bit reached by * this operation. */ if ((o = lookupStringForBitCommand(c, - highest_write_offset)) == NULL) return; + highest_write_offset)) == NULL) { + zfree(ops); + return; + } } addReplyArrayLen(c,numops); From f4ae084e94caab308595824c6ee0857fc1cc3d29 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 Apr 2019 18:53:27 +0200 Subject: [PATCH 047/304] Aesthetic change to #5962 to conform to Redis style. --- src/module.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/module.c b/src/module.c index 0c8197ac7..1e7c0eca3 100644 --- a/src/module.c +++ b/src/module.c @@ -3746,9 +3746,7 @@ void moduleHandleBlockedClients(void) { * replies to send to the client in a thread safe context. * We need to glue such replies to the client output buffer and * free the temporary client we just used for the replies. */ - if (c) { - AddReplyFromClient(c, bc->reply_client); - } + if (c) AddReplyFromClient(c, bc->reply_client); freeClient(bc->reply_client); if (c != NULL) { From 154914e6540f165363a2979974c7334271e5eee3 Mon Sep 17 00:00:00 2001 From: James Rouzier Date: Thu, 11 Apr 2019 12:19:02 -0400 Subject: [PATCH 048/304] Fix start and end key initialize --- src/t_stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/t_stream.c b/src/t_stream.c index f4ace87a2..9e7d3d126 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -492,14 +492,14 @@ void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamI streamEncodeID(si->start_key,start); } else { si->start_key[0] = 0; - si->start_key[0] = 0; + si->start_key[1] = 0; } if (end) { streamEncodeID(si->end_key,end); } else { si->end_key[0] = UINT64_MAX; - si->end_key[0] = UINT64_MAX; + si->end_key[1] = UINT64_MAX; } /* Seek the correct node in the radix tree. */ From d21b3b9d9e94932e1c4114717a864fcf10387f25 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 15 Apr 2019 16:50:26 +0200 Subject: [PATCH 049/304] Test: disable module testing for now. --- tests/test_helper.tcl | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index d2f281526..568eacdee 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -63,7 +63,6 @@ set ::all_tests { unit/lazyfree unit/wait unit/pendingquerybuf - modules/commandfilter } # Index to the next test to run in the ::all_tests list. set ::next_test 0 From 1b3f0c047a70bbc4d1dc3d6a2aa72badd416baac Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Tue, 16 Apr 2019 17:15:23 +0300 Subject: [PATCH 050/304] Adds RedisModule_ReplyWithCString Signed-off-by: Itamar Haber --- src/module.c | 11 +++++++++++ src/redismodule.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/src/module.c b/src/module.c index c29521670..ed4613af6 100644 --- a/src/module.c +++ b/src/module.c @@ -1242,6 +1242,17 @@ int RM_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len) { return REDISMODULE_OK; } +/* Reply with a bulk string, taking in input a C buffer pointer that is + * assumed to be null-terminated. + * + * The function always returns REDISMODULE_OK. */ +int RM_ReplyWithCString(RedisModuleCtx *ctx, const char *buf) { + client *c = moduleGetReplyClient(ctx); + if (c == NULL) return REDISMODULE_OK; + addReplyBulkCBuffer(c,(char*)buf,strlen(buf)); + return REDISMODULE_OK; +} + /* Reply with a bulk string, taking in input a RedisModuleString object. * * The function always returns REDISMODULE_OK. */ diff --git a/src/redismodule.h b/src/redismodule.h index 259a5f1db..5c7643dee 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -226,6 +226,7 @@ int REDISMODULE_API_FUNC(RedisModule_ReplyWithSimpleString)(RedisModuleCtx *ctx, int REDISMODULE_API_FUNC(RedisModule_ReplyWithArray)(RedisModuleCtx *ctx, long len); void REDISMODULE_API_FUNC(RedisModule_ReplySetArrayLength)(RedisModuleCtx *ctx, long len); int REDISMODULE_API_FUNC(RedisModule_ReplyWithStringBuffer)(RedisModuleCtx *ctx, const char *buf, size_t len); +int REDISMODULE_API_FUNC(RedisModule_ReplyWithCString)(RedisModuleCtx *ctx, const char *buf); int REDISMODULE_API_FUNC(RedisModule_ReplyWithString)(RedisModuleCtx *ctx, RedisModuleString *str); int REDISMODULE_API_FUNC(RedisModule_ReplyWithNull)(RedisModuleCtx *ctx); int REDISMODULE_API_FUNC(RedisModule_ReplyWithDouble)(RedisModuleCtx *ctx, double d); @@ -376,6 +377,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(ReplyWithArray); REDISMODULE_GET_API(ReplySetArrayLength); REDISMODULE_GET_API(ReplyWithStringBuffer); + REDISMODULE_GET_API(ReplyWithCString); REDISMODULE_GET_API(ReplyWithString); REDISMODULE_GET_API(ReplyWithNull); REDISMODULE_GET_API(ReplyWithCallReply); From 346355edc1f57492fe431bd567487c28c92cbefb Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Tue, 16 Apr 2019 17:38:33 +0300 Subject: [PATCH 051/304] Uses addReplyBulkCString Signed-off-by: Itamar Haber --- src/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index ed4613af6..d46f484c4 100644 --- a/src/module.c +++ b/src/module.c @@ -1249,7 +1249,7 @@ int RM_ReplyWithStringBuffer(RedisModuleCtx *ctx, const char *buf, size_t len) { int RM_ReplyWithCString(RedisModuleCtx *ctx, const char *buf) { client *c = moduleGetReplyClient(ctx); if (c == NULL) return REDISMODULE_OK; - addReplyBulkCBuffer(c,(char*)buf,strlen(buf)); + addReplyBulkCString(c,(char*)buf); return REDISMODULE_OK; } From 971d9ee0aa7eb6463f2d95d10c8ecde9a7e3221c Mon Sep 17 00:00:00 2001 From: Itamar Haber Date: Tue, 16 Apr 2019 22:16:12 +0300 Subject: [PATCH 052/304] Adds a "Modules" section to `INFO` Fixes #6012. As long as "INFO is broken", this should be adequate IMO. Once we rework `INFO`, perhaps into RESP3, this implementation should be revisited. --- src/module.c | 19 +++++++++++++++++++ src/server.c | 7 +++++++ src/server.h | 1 + 3 files changed, 27 insertions(+) diff --git a/src/module.c b/src/module.c index c29521670..60c9a0464 100644 --- a/src/module.c +++ b/src/module.c @@ -5244,6 +5244,25 @@ void addReplyLoadedModules(client *c) { dictReleaseIterator(di); } +/* Helper function for the INFO command: adds loaded modules as to info's + * output. + * + * After the call, the passed sds info string is no longer valid and all the + * references must be substituted with the new pointer returned by the call. */ +sds genModulesInfoString(sds info) { + dictIterator *di = dictGetIterator(modules); + dictEntry *de; + + while ((de = dictNext(di)) != NULL) { + sds name = dictGetKey(de); + struct RedisModule *module = dictGetVal(de); + + info = sdscatprintf(info, "module:name=%s,ver=%d\r\n", name, module->ver); + } + dictReleaseIterator(di); + return info; +} + /* Redis MODULE command. * * MODULE LOAD [args...] */ diff --git a/src/server.c b/src/server.c index fb5d679cd..49a65ef57 100644 --- a/src/server.c +++ b/src/server.c @@ -4291,6 +4291,13 @@ sds genRedisInfoString(char *section) { (long)c_ru.ru_utime.tv_sec, (long)c_ru.ru_utime.tv_usec); } + /* Modules */ + if (allsections || defsections || !strcasecmp(section,"modules")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info,"# Modules\r\n"); + info = genModulesInfoString(info); + } + /* Command statistics */ if (allsections || !strcasecmp(section,"commandstats")) { if (sections++) info = sdscat(info,"\r\n"); diff --git a/src/server.h b/src/server.h index dfd9f7698..d832c6465 100644 --- a/src/server.h +++ b/src/server.h @@ -2268,6 +2268,7 @@ void bugReportStart(void); void serverLogObjectDebugInfo(const robj *o); void sigsegvHandler(int sig, siginfo_t *info, void *secret); sds genRedisInfoString(char *section); +sds genModulesInfoString(sds info); void enableWatchdog(int period); void disableWatchdog(void); void watchdogScheduleSignal(int period); From 1e294031345f12cefbc7434ccb13dfe1e3e42de5 Mon Sep 17 00:00:00 2001 From: chendianqiang Date: Wed, 17 Apr 2019 21:20:10 +0800 Subject: [PATCH 053/304] stop ping when client pause --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index 9175bb420..237103d90 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2579,7 +2579,7 @@ void replicationCron(void) { /* First, send PING according to ping_slave_period. */ if ((replication_cron_loops % server.repl_ping_slave_period) == 0 && - listLength(server.slaves)) + listLength(server.slaves) && !clientsArePaused()) { ping_argv[0] = createStringObject("PING",4); replicationFeedSlaves(server.slaves, server.slaveseldb, From 7bda78088dbdcb7acefe6b51339bb634adaca096 Mon Sep 17 00:00:00 2001 From: git-hulk Date: Tue, 23 Apr 2019 20:08:14 +0800 Subject: [PATCH 054/304] FIX: core dump in redis-benchmark when the `-r` is the last arg --- src/redis-benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 4e2662f21..2759e6a3c 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -1294,7 +1294,7 @@ int parseOptions(int argc, const char **argv) { if (*p < '0' || *p > '9') goto invalid; } config.randomkeys = 1; - config.randomkeys_keyspacelen = atoi(argv[++i]); + config.randomkeys_keyspacelen = atoi(next); if (config.randomkeys_keyspacelen < 0) config.randomkeys_keyspacelen = 0; } else if (!strcmp(argv[i],"-q")) { From 03c7e580d70c0b2bf026259cec717e08538ea234 Mon Sep 17 00:00:00 2001 From: vattezhang Date: Fri, 26 Apr 2019 18:50:51 +0800 Subject: [PATCH 055/304] update --- src/redis-benchmark.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 7bac6fdd4..4e2662f21 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -418,11 +418,6 @@ static void readHandler(aeEventLoop *el, int fd, void *privdata, int mask) { if (redisBufferRead(c->context) != REDIS_OK) { fprintf(stderr,"Error: %s\n",c->context->errstr); exit(1); - } - else if (NULL != strstr(c->context->reader->buf,"NOAUTH")) - { - fprintf(stderr,"Error: %s\n",c->context->reader->buf); - exit(1); } else { while(c->pending) { if (redisGetReply(c->context,&reply) != REDIS_OK) { From 42bccb59c5c2bd5adc188de6aa3e68bc3601630d Mon Sep 17 00:00:00 2001 From: vattezhang Date: Fri, 26 Apr 2019 19:47:07 +0800 Subject: [PATCH 056/304] fix: benchmark auth fails when server have requirepass --- src/redis-benchmark.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 4e2662f21..e4134c9ea 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -254,6 +254,19 @@ static redisConfig *getRedisConfig(const char *ip, int port, else fprintf(stderr,"%s: %s\n",hostsocket,err); goto fail; } + + if(config.auth){ + void *authReply = NULL; + redisAppendCommand(c, "AUTH %s", config.auth); + if (REDIS_OK != redisGetReply(c, &authReply)) goto fail; + if (reply) freeReplyObject(reply); + reply = ((redisReply *) authReply); + if (reply->type == REDIS_REPLY_ERROR) { + fprintf(stderr, "ERROR: %s\n", reply->str); + goto fail; + } + } + redisAppendCommand(c, "CONFIG GET %s", "save"); redisAppendCommand(c, "CONFIG GET %s", "appendonly"); int i = 0; From b19e3e0e4cb8dfe615a5c11330660fd6804279a6 Mon Sep 17 00:00:00 2001 From: abhay Date: Thu, 25 Apr 2019 13:50:25 +0530 Subject: [PATCH 057/304] removed obsolete warning as per - https://github.com/antirez/redis/issues/5291 --- redis.conf | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/redis.conf b/redis.conf index 5ea915905..060510768 100644 --- a/redis.conf +++ b/redis.conf @@ -942,13 +942,7 @@ aof-use-rdb-preamble yes lua-time-limit 5000 ################################ REDIS CLUSTER ############################### -# -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however -# in order to mark it as "mature" we need to wait for a non trivial percentage -# of users to deploy it in production. -# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -# + # Normal Redis instances can't be part of a Redis Cluster; only nodes that are # started as cluster nodes can. In order to start a Redis instance as a # cluster node enable the cluster support uncommenting the following: From 7ff5c6011de7749eda50b728d26fa7bb56ea7d88 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Mon, 29 Apr 2019 14:38:28 +0800 Subject: [PATCH 058/304] aof: enhance AOF_FSYNC_EVERYSEC, more details in #5985 --- src/aof.c | 34 +++++++++++++++++++++++++++++++--- src/server.h | 1 + 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/src/aof.c b/src/aof.c index 615eebd01..4744847d2 100644 --- a/src/aof.c +++ b/src/aof.c @@ -197,6 +197,12 @@ ssize_t aofRewriteBufferWrite(int fd) { * AOF file implementation * ------------------------------------------------------------------------- */ +/* Return true if an AOf fsync is currently already in progress in a + * BIO thread. */ +int aofFsyncInProgress(void) { + return bioPendingJobsOfType(BIO_AOF_FSYNC) != 0; +} + /* Starts a background task that performs fsync() against the specified * file descriptor (the one of the AOF file) in another thread. */ void aof_background_fsync(int fd) { @@ -335,10 +341,24 @@ void flushAppendOnlyFile(int force) { int sync_in_progress = 0; mstime_t latency; - if (sdslen(server.aof_buf) == 0) return; + if (sdslen(server.aof_buf) == 0) { + /* Check if we need to do fsync even the aof buffer is empty, + * because previously in AOF_FSYNC_EVERYSEC mode, fsync is + * called only when aof buffer is not empty, so if users + * stop write commands before fsync called in one second, + * the data in page cache cannot be flushed in time. */ + if (server.aof_fsync == AOF_FSYNC_EVERYSEC && + server.aof_fsync_offset != server.aof_current_size && + server.unixtime > server.aof_last_fsync && + !(sync_in_progress = aofFsyncInProgress())) { + goto try_fsync; + } else { + return; + } + } if (server.aof_fsync == AOF_FSYNC_EVERYSEC) - sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0; + sync_in_progress = aofFsyncInProgress(); if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) { /* With this append fsync policy we do background fsyncing. @@ -470,6 +490,7 @@ void flushAppendOnlyFile(int force) { server.aof_buf = sdsempty(); } +try_fsync: /* Don't fsync if no-appendfsync-on-rewrite is set to yes and there are * children doing I/O in the background. */ if (server.aof_no_fsync_on_rewrite && @@ -484,10 +505,14 @@ void flushAppendOnlyFile(int force) { redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */ latencyEndMonitor(latency); latencyAddSampleIfNeeded("aof-fsync-always",latency); + server.aof_fsync_offset = server.aof_current_size; server.aof_last_fsync = server.unixtime; } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC && server.unixtime > server.aof_last_fsync)) { - if (!sync_in_progress) aof_background_fsync(server.aof_fd); + if (!sync_in_progress) { + aof_background_fsync(server.aof_fd); + server.aof_fsync_offset = server.aof_current_size; + } server.aof_last_fsync = server.unixtime; } } @@ -694,6 +719,7 @@ int loadAppendOnlyFile(char *filename) { * operation is received. */ if (fp && redis_fstat(fileno(fp),&sb) != -1 && sb.st_size == 0) { server.aof_current_size = 0; + server.aof_fsync_offset = server.aof_current_size; fclose(fp); return C_ERR; } @@ -832,6 +858,7 @@ loaded_ok: /* DB loaded, cleanup and return C_OK to the caller. */ stopLoading(); aofUpdateCurrentSize(); server.aof_rewrite_base_size = server.aof_current_size; + server.aof_fsync_offset = server.aof_current_size; return C_OK; readerr: /* Read error. If feof(fp) is true, fall through to unexpected EOF. */ @@ -1741,6 +1768,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { server.aof_selected_db = -1; /* Make sure SELECT is re-issued */ aofUpdateCurrentSize(); server.aof_rewrite_base_size = server.aof_current_size; + server.aof_current_size = server.aof_current_size; /* Clear regular AOF buffer since its contents was just written to * the new AOF from the background rewrite buffer. */ diff --git a/src/server.h b/src/server.h index dfd9f7698..e7f01b2ef 100644 --- a/src/server.h +++ b/src/server.h @@ -1140,6 +1140,7 @@ struct redisServer { off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */ off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */ off_t aof_current_size; /* AOF current size. */ + off_t aof_fsync_offset; /* AOF offset which is already synced to disk. */ int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */ pid_t aof_child_pid; /* PID if rewriting process */ list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */ From c76bb465f2b7a2d5af77040c08128ce9310b2b44 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 5 May 2019 08:19:52 +0300 Subject: [PATCH 059/304] make replication tests more stable on slow machines solving few replication related tests race conditions which fail on slow machines bugfix in slave buffers test: since the test is executed twice, each time with a different commands count, the threshold for the delta can't be a constant. --- tests/integration/psync2.tcl | 5 ++++- tests/integration/replication-psync.tcl | 26 +++++++++++++++++++++++++ tests/unit/maxmemory.tcl | 7 ++++--- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/tests/integration/psync2.tcl b/tests/integration/psync2.tcl index 8663d6fcc..d1212b640 100644 --- a/tests/integration/psync2.tcl +++ b/tests/integration/psync2.tcl @@ -166,12 +166,15 @@ start_server {} { # Pick a random slave set slave_id [expr {($master_id+1)%5}] set sync_count [status $R($master_id) sync_full] + set sync_partial [status $R($master_id) sync_partial_ok] catch { $R($slave_id) config rewrite $R($slave_id) debug restart } + # note: just waiting for connected_slaves==4 has a race condition since + # we might do the check before the master realized that the slave disconnected wait_for_condition 50 1000 { - [status $R($master_id) connected_slaves] == 4 + [status $R($master_id) sync_partial_ok] == $sync_partial + 1 } else { fail "Replica not reconnecting" } diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl index a3bce2a4c..bf8682446 100644 --- a/tests/integration/replication-psync.tcl +++ b/tests/integration/replication-psync.tcl @@ -79,6 +79,32 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec stop_bg_complex_data $load_handle0 stop_bg_complex_data $load_handle1 stop_bg_complex_data $load_handle2 + + # Wait for the slave to reach the "online" + # state from the POV of the master. + set retry 5000 + while {$retry} { + set info [$master info] + if {[string match {*slave0:*state=online*} $info]} { + break + } else { + incr retry -1 + after 100 + } + } + if {$retry == 0} { + error "assertion:Slave not correctly synchronized" + } + + # Wait that slave acknowledge it is online so + # we are sure that DBSIZE and DEBUG DIGEST will not + # fail because of timing issues. (-LOADING error) + wait_for_condition 5000 100 { + [lindex [$slave role] 3] eq {connected} + } else { + fail "Slave still not connected after some time" + } + set retry 10 while {$retry && ([$master debug digest] ne [$slave debug digest])}\ { diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl index 1def57af5..0f64ddc18 100644 --- a/tests/unit/maxmemory.tcl +++ b/tests/unit/maxmemory.tcl @@ -161,7 +161,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} } # make sure master doesn't disconnect slave because of timeout - $master config set repl-timeout 300 ;# 5 minutes + $master config set repl-timeout 1200 ;# 20 minutes (for valgrind and slow machines) $master config set maxmemory-policy allkeys-random $master config set client-output-buffer-limit "replica 100000000 100000000 300" $master config set repl-backlog-size [expr {10*1024}] @@ -212,7 +212,8 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} assert {[$master dbsize] == 100} assert {$slave_buf > 2*1024*1024} ;# some of the data may have been pushed to the OS buffers - assert {$delta < 50*1024 && $delta > -50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB + set delta_max [expr {$cmd_count / 2}] ;# 1 byte unaccounted for, with 1M commands will consume some 1MB + assert {$delta < $delta_max && $delta > -$delta_max} $master client kill type slave set killed_used [s -1 used_memory] @@ -221,7 +222,7 @@ proc test_slave_buffers {test_name cmd_count payload_len limit_memory pipeline} set killed_used_no_repl [expr {$killed_used - $killed_mem_not_counted_for_evict}] set delta_no_repl [expr {$killed_used_no_repl - $used_no_repl}] assert {$killed_slave_buf == 0} - assert {$delta_no_repl > -50*1024 && $delta_no_repl < 50*1024} ;# 1 byte unaccounted for, with 1M commands will consume some 1MB + assert {$delta_no_repl > -$delta_max && $delta_no_repl < $delta_max} } # unfreeze slave process (after the 'test' succeeded or failed, but before we attempt to terminate the server From 7653918c7a265ddb2581c23f035fb97ffb4692e0 Mon Sep 17 00:00:00 2001 From: Yossi Gottlieb Date: Sun, 5 May 2019 20:32:53 +0300 Subject: [PATCH 060/304] Preserve client->id for blocked clients. --- src/module.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/module.c b/src/module.c index c29521670..7dee7e776 100644 --- a/src/module.c +++ b/src/module.c @@ -3866,7 +3866,10 @@ RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) { * in order to keep things like the currently selected database and similar * things. */ ctx->client = createClient(-1); - if (bc) selectDb(ctx->client,bc->dbid); + if (bc) { + selectDb(ctx->client,bc->dbid); + ctx->client->id = bc->client->id; + } return ctx; } From 0bf3acce381acbf322629d6e00b6815ce449b51d Mon Sep 17 00:00:00 2001 From: WuYunlong Date: Mon, 6 May 2019 11:46:07 +0800 Subject: [PATCH 061/304] Do not active expire keys in the background when the switch is off. --- src/server.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/server.c b/src/server.c index fb5d679cd..eaa7172a7 100644 --- a/src/server.c +++ b/src/server.c @@ -1674,10 +1674,12 @@ void clientsCron(void) { void databasesCron(void) { /* Expire keys by random sampling. Not required for slaves * as master will synthesize DELs for us. */ - if (server.active_expire_enabled && server.masterhost == NULL) { - activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); - } else if (server.masterhost != NULL) { - expireSlaveKeys(); + if (server.active_expire_enabled) { + if (server.masterhost == NULL) { + activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); + } else { + expireSlaveKeys(); + } } /* Defrag keys gradually. */ From e374c6eb5503a5eb20f8ed7b5c36fa2549824a4a Mon Sep 17 00:00:00 2001 From: liaotonglang Date: Mon, 6 May 2019 17:15:49 +0800 Subject: [PATCH 062/304] delete sdsTest() from REDIS_TEST sdsTest() defined in sds.c dit not match the call in server.c. remove it from REDIS_TEST, since test-sds defined in Makefile. --- src/server.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/server.c b/src/server.c index fb5d679cd..674eef206 100644 --- a/src/server.c +++ b/src/server.c @@ -4718,8 +4718,6 @@ int main(int argc, char **argv) { return sha1Test(argc, argv); } else if (!strcasecmp(argv[2], "util")) { return utilTest(argc, argv); - } else if (!strcasecmp(argv[2], "sds")) { - return sdsTest(argc, argv); } else if (!strcasecmp(argv[2], "endianconv")) { return endianconvTest(argc, argv); } else if (!strcasecmp(argv[2], "crc64")) { From 62261aa905583015cf30c47bdaabbd637d1da7ac Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 24 Oct 2017 08:35:05 +0200 Subject: [PATCH 063/304] Threaded IO: implement handleClientsWithPendingWritesUsingThreads(). This is just an experiment for now, there are a couple of race conditions, mostly harmless for the performance gain experiment that this commit represents so far. The general idea here is to take Redis single threaded and instead fan-out on expansive kernel calls: write(2) in this case, but the same concept could be easily implemented for read(2) and protcol parsing. However just threading writes like in this commit, is enough to evaluate if the approach is sounding. --- src/networking.c | 156 +++++++++++++++++++++++++++++++++++++++++++++-- src/server.c | 11 ++-- src/server.h | 4 ++ 3 files changed, 162 insertions(+), 9 deletions(-) diff --git a/src/networking.c b/src/networking.c index ffb435625..3958e4f5e 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1065,9 +1065,17 @@ void freeClient(client *c) { * a context where calling freeClient() is not possible, because the client * should be valid for the continuation of the flow of the program. */ void freeClientAsync(client *c) { + /* We need to handle concurrent access to the server.clients_to_close list + * only in the freeClientAsync() function, since it's the only function that + * may access the list while Redis uses I/O threads. All the other accesses + * are in the context of the main thread while the other threads are + * idle. */ + static pthread_mutex_t async_free_queue_mutex = PTHREAD_MUTEX_INITIALIZER; if (c->flags & CLIENT_CLOSE_ASAP || c->flags & CLIENT_LUA) return; c->flags |= CLIENT_CLOSE_ASAP; + pthread_mutex_lock(&async_free_queue_mutex); listAddNodeTail(server.clients_to_close,c); + pthread_mutex_unlock(&async_free_queue_mutex); } void freeClientsInAsyncFreeQueue(void) { @@ -1091,7 +1099,12 @@ client *lookupClientByID(uint64_t id) { } /* Write data in output buffers to client. Return C_OK if the client - * is still valid after the call, C_ERR if it was freed. */ + * is still valid after the call, C_ERR if it was freed because of some + * error. + * + * This function is called by threads, but always with handler_installed + * set to 0. So when handler_installed is set to 0 the function must be + * thread safe. */ int writeToClient(int fd, client *c, int handler_installed) { ssize_t nwritten = 0, totwritten = 0; size_t objlen; @@ -1153,14 +1166,15 @@ int writeToClient(int fd, client *c, int handler_installed) { zmalloc_used_memory() < server.maxmemory) && !(c->flags & CLIENT_SLAVE)) break; } + /* FIXME: Fixme, use atomic var for this. */ server.stat_net_output_bytes += totwritten; if (nwritten == -1) { if (errno == EAGAIN) { nwritten = 0; } else { - serverLog(LL_VERBOSE, - "Error writing to client: %s", strerror(errno)); - freeClient(c); + // serverLog(LL_VERBOSE, + // "Error writing to client: %s", strerror(errno)); + freeClientAsync(c); return C_ERR; } } @@ -1173,11 +1187,15 @@ int writeToClient(int fd, client *c, int handler_installed) { } if (!clientHasPendingReplies(c)) { c->sentlen = 0; + /* Note that writeToClient() is called in a threaded way, but + * adDeleteFileEvent() is not thread safe: however writeToClient() + * is always called with handler_installed set to 0 from threads + * so we are fine. */ if (handler_installed) aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE); /* Close connection after entire reply has been sent. */ if (c->flags & CLIENT_CLOSE_AFTER_REPLY) { - freeClient(c); + freeClientAsync(c); return C_ERR; } } @@ -2452,3 +2470,131 @@ int processEventsWhileBlocked(void) { } return count; } + +/* ============================================================================= + * Threaded I/O + * =========================================================================== */ + +#define SERVER_MAX_IO_THREADS 32 + +pthread_t io_threads[SERVER_MAX_IO_THREADS]; +pthread_mutex_t io_threads_done_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t io_threads_done_cond = PTHREAD_COND_INITIALIZER; +pthread_mutex_t io_threads_idle_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t io_threads_idle_cond = PTHREAD_COND_INITIALIZER; +pthread_cond_t io_threads_start_cond = PTHREAD_COND_INITIALIZER; +int io_threads_done = 0; /* Number of threads that completed the work. */ +int io_threads_idle = 0; /* Number of threads in idle state ready to go. */ +list *io_threads_list[SERVER_MAX_IO_THREADS]; + +void *IOThreadMain(void *myid) { + /* The ID is the thread number (from 0 to server.iothreads_num-1), and is + * used by the thread to just manipulate a single sub-array of clients. */ + long id = (unsigned long)myid; + + while(1) { + /* ... Wait for start ... */ + pthread_mutex_lock(&io_threads_idle_mutex); + io_threads_idle++; + pthread_cond_signal(&io_threads_idle_cond); + printf("[%ld] Waiting start...\n", id); + pthread_cond_wait(&io_threads_start_cond,&io_threads_idle_mutex); + printf("[%ld] Started\n", id); + pthread_mutex_unlock(&io_threads_idle_mutex); + printf("%d to handle\n", (int)listLength(io_threads_list[id])); + + /* ... Process ... */ + listIter li; + listNode *ln; + listRewind(io_threads_list[id],&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + writeToClient(c->fd,c,0); + } + listEmpty(io_threads_list[id]); + + /* Report success. */ + pthread_mutex_lock(&io_threads_done_mutex); + io_threads_done++; + pthread_cond_signal(&io_threads_done_cond); + pthread_mutex_unlock(&io_threads_done_mutex); + printf("[%ld] Done\n", id); + } +} + +/* Initialize the data structures needed for threaded I/O. */ +void initThreadedIO(void) { + pthread_t tid; + + server.io_threads_num = 4; + for (int i = 0; i < server.io_threads_num; i++) { + if (pthread_create(&tid,NULL,IOThreadMain,(void*)(long)i) != 0) { + serverLog(LL_WARNING,"Fatal: Can't initialize IO thread."); + exit(1); + } + io_threads[i] = tid; + io_threads_list[i] = listCreate(); + } +} + +int handleClientsWithPendingWritesUsingThreads(void) { + int processed = listLength(server.clients_pending_write); + if (processed == 0) return 0; /* Return ASAP if there are no clients. */ + + printf("%d TOTAL\n", processed); + + /* Wait for all threads to be ready. */ + pthread_mutex_lock(&io_threads_idle_mutex); + while(io_threads_idle < server.io_threads_num) { + pthread_cond_wait(&io_threads_idle_cond,&io_threads_idle_mutex); + } + printf("All threads are idle: %d\n", io_threads_idle); + io_threads_idle = 0; + pthread_mutex_unlock(&io_threads_idle_mutex); + + /* Distribute the clients across N different lists. */ + listIter li; + listNode *ln; + listRewind(server.clients_pending_write,&li); + int item_id = 0; + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + c->flags &= ~CLIENT_PENDING_WRITE; + int target_id = item_id % server.io_threads_num; + listAddNodeTail(io_threads_list[target_id],c); + item_id++; + } + + /* Start all threads. */ + printf("Send start condition\n"); + pthread_mutex_lock(&io_threads_done_mutex); + io_threads_done = 0; + pthread_cond_broadcast(&io_threads_start_cond); + pthread_mutex_unlock(&io_threads_done_mutex); + + /* Wait for all threads to end their work. */ + pthread_mutex_lock(&io_threads_done_mutex); + while(io_threads_done < server.io_threads_num) { + pthread_cond_wait(&io_threads_done_cond,&io_threads_done_mutex); + } + pthread_mutex_unlock(&io_threads_done_mutex); + printf("All threads finshed\n"); + + /* Run the list of clients again to install the write handler where + * needed. */ + listRewind(server.clients_pending_write,&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + + /* Install the write handler if there are pending writes in some + * of the clients. */ + if (clientHasPendingReplies(c) && + aeCreateFileEvent(server.el, c->fd, AE_WRITABLE, + sendReplyToClient, c) == AE_ERR) + { + freeClientAsync(c); + } + } + listEmpty(server.clients_pending_write); + return processed; +} diff --git a/src/server.c b/src/server.c index fb5d679cd..c437880d5 100644 --- a/src/server.c +++ b/src/server.c @@ -1981,9 +1981,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { flushAppendOnlyFile(0); } - /* Close clients that need to be closed asynchronous */ - freeClientsInAsyncFreeQueue(); - /* Clear the paused clients flag if needed. */ clientsArePaused(); /* Don't check return value, just use the side effect.*/ @@ -2075,7 +2072,12 @@ void beforeSleep(struct aeEventLoop *eventLoop) { flushAppendOnlyFile(0); /* Handle writes with pending output buffers. */ - handleClientsWithPendingWrites(); + /* XXX: Put a condition based on number of waiting clients: if we + * have less than a given number of clients, use non threaded code. */ + handleClientsWithPendingWritesUsingThreads(); + + /* Close clients that need to be closed asynchronous */ + freeClientsInAsyncFreeQueue(); /* Before we are going to sleep, let the threads access the dataset by * releasing the GIL. Redis main thread will not touch anything at this @@ -2861,6 +2863,7 @@ void initServer(void) { slowlogInit(); latencyMonitorInit(); bioInit(); + initThreadedIO(); server.initial_memory_usage = zmalloc_used_memory(); } diff --git a/src/server.h b/src/server.h index dfd9f7698..d2a563c96 100644 --- a/src/server.h +++ b/src/server.h @@ -1062,6 +1062,8 @@ struct redisServer { int protected_mode; /* Don't accept external connections. */ int gopher_enabled; /* If true the server will reply to gopher queries. Will still serve RESP2 queries. */ + int io_threads_num; /* Number of IO threads to use. */ + /* RDB / AOF loading information */ int loading; /* We are loading data from disk if true */ off_t loading_total_bytes; @@ -1576,12 +1578,14 @@ void pauseClients(mstime_t duration); int clientsArePaused(void); int processEventsWhileBlocked(void); int handleClientsWithPendingWrites(void); +int handleClientsWithPendingWritesUsingThreads(void); int clientHasPendingReplies(client *c); void unlinkClient(client *c); int writeToClient(int fd, client *c, int handler_installed); void linkClient(client *c); void protectClient(client *c); void unprotectClient(client *c); +void initThreadedIO(void); #ifdef __GNUC__ void addReplyErrorFormat(client *c, const char *fmt, ...) From c1cb2ae69532671c1372cf76be5bb25b07f95e24 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 Mar 2019 12:16:13 +0100 Subject: [PATCH 064/304] Threaded IO: allow to disable debug printf. --- src/networking.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/networking.c b/src/networking.c index 3958e4f5e..5dfaaf8ba 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2471,9 +2471,11 @@ int processEventsWhileBlocked(void) { return count; } -/* ============================================================================= +/* ========================================================================== * Threaded I/O - * =========================================================================== */ + * ========================================================================== */ + +int tio_debug = 0; #define SERVER_MAX_IO_THREADS 32 @@ -2497,11 +2499,11 @@ void *IOThreadMain(void *myid) { pthread_mutex_lock(&io_threads_idle_mutex); io_threads_idle++; pthread_cond_signal(&io_threads_idle_cond); - printf("[%ld] Waiting start...\n", id); + if (tio_debug) printf("[%ld] Waiting start...\n", id); pthread_cond_wait(&io_threads_start_cond,&io_threads_idle_mutex); - printf("[%ld] Started\n", id); + if (tio_debug) printf("[%ld] Started\n", id); pthread_mutex_unlock(&io_threads_idle_mutex); - printf("%d to handle\n", (int)listLength(io_threads_list[id])); + if (tio_debug) printf("%d to handle\n", (int)listLength(io_threads_list[id])); /* ... Process ... */ listIter li; @@ -2518,7 +2520,7 @@ void *IOThreadMain(void *myid) { io_threads_done++; pthread_cond_signal(&io_threads_done_cond); pthread_mutex_unlock(&io_threads_done_mutex); - printf("[%ld] Done\n", id); + if (tio_debug) printf("[%ld] Done\n", id); } } @@ -2541,14 +2543,14 @@ int handleClientsWithPendingWritesUsingThreads(void) { int processed = listLength(server.clients_pending_write); if (processed == 0) return 0; /* Return ASAP if there are no clients. */ - printf("%d TOTAL\n", processed); + if (tio_debug) printf("%d TOTAL\n", processed); /* Wait for all threads to be ready. */ pthread_mutex_lock(&io_threads_idle_mutex); while(io_threads_idle < server.io_threads_num) { pthread_cond_wait(&io_threads_idle_cond,&io_threads_idle_mutex); } - printf("All threads are idle: %d\n", io_threads_idle); + if (tio_debug) printf("All threads are idle: %d\n", io_threads_idle); io_threads_idle = 0; pthread_mutex_unlock(&io_threads_idle_mutex); @@ -2566,7 +2568,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { } /* Start all threads. */ - printf("Send start condition\n"); + if (tio_debug) printf("Send start condition\n"); pthread_mutex_lock(&io_threads_done_mutex); io_threads_done = 0; pthread_cond_broadcast(&io_threads_start_cond); @@ -2578,7 +2580,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { pthread_cond_wait(&io_threads_done_cond,&io_threads_done_mutex); } pthread_mutex_unlock(&io_threads_done_mutex); - printf("All threads finshed\n"); + if (tio_debug) printf("All threads finshed\n"); /* Run the list of clients again to install the write handler where * needed. */ From df0a28f661380bd0bad90e8d9fa8eccedcb1d57d Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 Mar 2019 12:56:48 +0100 Subject: [PATCH 065/304] Threaded IO: second attempt without signaling conditions. --- src/networking.c | 104 +++++++++++++++++++++++++---------------------- src/server.c | 2 - 2 files changed, 55 insertions(+), 51 deletions(-) diff --git a/src/networking.c b/src/networking.c index 5dfaaf8ba..cd241dac2 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2480,13 +2480,9 @@ int tio_debug = 0; #define SERVER_MAX_IO_THREADS 32 pthread_t io_threads[SERVER_MAX_IO_THREADS]; -pthread_mutex_t io_threads_done_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t io_threads_done_cond = PTHREAD_COND_INITIALIZER; -pthread_mutex_t io_threads_idle_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t io_threads_idle_cond = PTHREAD_COND_INITIALIZER; -pthread_cond_t io_threads_start_cond = PTHREAD_COND_INITIALIZER; -int io_threads_done = 0; /* Number of threads that completed the work. */ -int io_threads_idle = 0; /* Number of threads in idle state ready to go. */ +pthread_mutex_t io_threads_mutex[SERVER_MAX_IO_THREADS]; +_Atomic unsigned long io_threads_pending[SERVER_MAX_IO_THREADS]; +int io_threads_active; list *io_threads_list[SERVER_MAX_IO_THREADS]; void *IOThreadMain(void *myid) { @@ -2496,30 +2492,23 @@ void *IOThreadMain(void *myid) { while(1) { /* ... Wait for start ... */ - pthread_mutex_lock(&io_threads_idle_mutex); - io_threads_idle++; - pthread_cond_signal(&io_threads_idle_cond); - if (tio_debug) printf("[%ld] Waiting start...\n", id); - pthread_cond_wait(&io_threads_start_cond,&io_threads_idle_mutex); - if (tio_debug) printf("[%ld] Started\n", id); - pthread_mutex_unlock(&io_threads_idle_mutex); - if (tio_debug) printf("%d to handle\n", (int)listLength(io_threads_list[id])); + pthread_mutex_lock(&io_threads_mutex[id]); + if (io_threads_pending[id]) { + if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id])); - /* ... Process ... */ - listIter li; - listNode *ln; - listRewind(io_threads_list[id],&li); - while((ln = listNext(&li))) { - client *c = listNodeValue(ln); - writeToClient(c->fd,c,0); + /* ... Process ... */ + listIter li; + listNode *ln; + listRewind(io_threads_list[id],&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + writeToClient(c->fd,c,0); + io_threads_pending[id]--; + } + listEmpty(io_threads_list[id]); } - listEmpty(io_threads_list[id]); - /* Report success. */ - pthread_mutex_lock(&io_threads_done_mutex); - io_threads_done++; - pthread_cond_signal(&io_threads_done_cond); - pthread_mutex_unlock(&io_threads_done_mutex); + pthread_mutex_unlock(&io_threads_mutex[id]); if (tio_debug) printf("[%ld] Done\n", id); } } @@ -2529,30 +2518,50 @@ void initThreadedIO(void) { pthread_t tid; server.io_threads_num = 4; + io_threads_active = 0; /* We start with threads not active. */ for (int i = 0; i < server.io_threads_num; i++) { + pthread_mutex_init(&io_threads_mutex[i],NULL); + io_threads_pending[i] = 0; + io_threads_list[i] = listCreate(); + pthread_mutex_lock(&io_threads_mutex[i]); /* Thread will be stopped. */ if (pthread_create(&tid,NULL,IOThreadMain,(void*)(long)i) != 0) { serverLog(LL_WARNING,"Fatal: Can't initialize IO thread."); exit(1); } io_threads[i] = tid; - io_threads_list[i] = listCreate(); } } +void startThreadedIO(void) { + if (tio_debug) printf("--- STARTING THREADED IO ---\n"); + serverAssert(io_threads_active == 0); + for (int j = 0; j < server.io_threads_num; j++) + pthread_mutex_unlock(&io_threads_mutex[j]); + io_threads_active = 1; +} + +void stopThreadedIO(void) { + if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); + serverAssert(io_threads_active == 1); + for (int j = 0; j < server.io_threads_num; j++) + pthread_mutex_lock(&io_threads_mutex[j]); + io_threads_active = 0; +} + int handleClientsWithPendingWritesUsingThreads(void) { int processed = listLength(server.clients_pending_write); if (processed == 0) return 0; /* Return ASAP if there are no clients. */ - if (tio_debug) printf("%d TOTAL\n", processed); - - /* Wait for all threads to be ready. */ - pthread_mutex_lock(&io_threads_idle_mutex); - while(io_threads_idle < server.io_threads_num) { - pthread_cond_wait(&io_threads_idle_cond,&io_threads_idle_mutex); + /* If we have just a few clients to serve, don't use I/O threads, but the + * boring synchronous code. */ + if (processed < (server.io_threads_num*2)) { + if (io_threads_active) stopThreadedIO(); + return handleClientsWithPendingWrites(); + } else { + if (!io_threads_active) startThreadedIO(); } - if (tio_debug) printf("All threads are idle: %d\n", io_threads_idle); - io_threads_idle = 0; - pthread_mutex_unlock(&io_threads_idle_mutex); + + if (tio_debug) printf("%d TOTAL pending clients\n", processed); /* Distribute the clients across N different lists. */ listIter li; @@ -2563,23 +2572,20 @@ int handleClientsWithPendingWritesUsingThreads(void) { client *c = listNodeValue(ln); c->flags &= ~CLIENT_PENDING_WRITE; int target_id = item_id % server.io_threads_num; + pthread_mutex_lock(&io_threads_mutex[target_id]); listAddNodeTail(io_threads_list[target_id],c); + io_threads_pending[target_id]++; + pthread_mutex_unlock(&io_threads_mutex[target_id]); item_id++; } - /* Start all threads. */ - if (tio_debug) printf("Send start condition\n"); - pthread_mutex_lock(&io_threads_done_mutex); - io_threads_done = 0; - pthread_cond_broadcast(&io_threads_start_cond); - pthread_mutex_unlock(&io_threads_done_mutex); - /* Wait for all threads to end their work. */ - pthread_mutex_lock(&io_threads_done_mutex); - while(io_threads_done < server.io_threads_num) { - pthread_cond_wait(&io_threads_done_cond,&io_threads_done_mutex); + while(1) { + unsigned long pending = 0; + for (int j = 0; j < server.io_threads_num; j++) + pending += io_threads_pending[j]; + if (pending == 0) break; } - pthread_mutex_unlock(&io_threads_done_mutex); if (tio_debug) printf("All threads finshed\n"); /* Run the list of clients again to install the write handler where diff --git a/src/server.c b/src/server.c index c437880d5..de5a814d0 100644 --- a/src/server.c +++ b/src/server.c @@ -2072,8 +2072,6 @@ void beforeSleep(struct aeEventLoop *eventLoop) { flushAppendOnlyFile(0); /* Handle writes with pending output buffers. */ - /* XXX: Put a condition based on number of waiting clients: if we - * have less than a given number of clients, use non threaded code. */ handleClientsWithPendingWritesUsingThreads(); /* Close clients that need to be closed asynchronous */ From 444b5f2af5f448f35a459bf0619b9168d3897aa7 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 25 Mar 2019 16:33:23 +0100 Subject: [PATCH 066/304] Threaded IO: 3rd version: use the mutex only to stop the thread. --- src/networking.c | 52 ++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/src/networking.c b/src/networking.c index cd241dac2..17d6b1866 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2491,24 +2491,34 @@ void *IOThreadMain(void *myid) { long id = (unsigned long)myid; while(1) { - /* ... Wait for start ... */ - pthread_mutex_lock(&io_threads_mutex[id]); - if (io_threads_pending[id]) { - if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id])); - - /* ... Process ... */ - listIter li; - listNode *ln; - listRewind(io_threads_list[id],&li); - while((ln = listNext(&li))) { - client *c = listNodeValue(ln); - writeToClient(c->fd,c,0); - io_threads_pending[id]--; - } - listEmpty(io_threads_list[id]); + /* Wait for start */ + for (int j = 0; j < 1000000; j++) { + if (io_threads_pending[id] != 0) break; } - pthread_mutex_unlock(&io_threads_mutex[id]); + /* Give the main thread a chance to stop this thread. */ + if (io_threads_pending[id] == 0) { + pthread_mutex_lock(&io_threads_mutex[id]); + pthread_mutex_unlock(&io_threads_mutex[id]); + continue; + } + + serverAssert(io_threads_pending[id] != 0); + + if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id])); + + /* Process: note that the main thread will never touch our list + * before we drop the pending count to 0. */ + listIter li; + listNode *ln; + listRewind(io_threads_list[id],&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + writeToClient(c->fd,c,0); + } + listEmpty(io_threads_list[id]); + io_threads_pending[id] = 0; + if (tio_debug) printf("[%ld] Done\n", id); } } @@ -2572,13 +2582,17 @@ int handleClientsWithPendingWritesUsingThreads(void) { client *c = listNodeValue(ln); c->flags &= ~CLIENT_PENDING_WRITE; int target_id = item_id % server.io_threads_num; - pthread_mutex_lock(&io_threads_mutex[target_id]); listAddNodeTail(io_threads_list[target_id],c); - io_threads_pending[target_id]++; - pthread_mutex_unlock(&io_threads_mutex[target_id]); item_id++; } + /* Give the start condition to the waiting threads, by setting the + * start condition atomic var. */ + for (int j = 0; j < server.io_threads_num; j++) { + int count = listLength(io_threads_list[j]); + io_threads_pending[j] = count; + } + /* Wait for all threads to end their work. */ while(1) { unsigned long pending = 0; From 0353d133217ffe8ba91b4fa2bbc37e48f62f2738 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 25 Mar 2019 17:05:06 +0000 Subject: [PATCH 067/304] Threaded IO: stop threads when no longer needed + C11 in Makefile. Now threads are stopped even when the connections drop immediately to zero, not allowing the networking code to detect the condition and stop the threads. serverCron() will handle that. --- src/Makefile | 2 +- src/networking.c | 29 ++++++++++++++++++++++++----- src/server.c | 3 +++ src/server.h | 1 + 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/Makefile b/src/Makefile index 93cfdc28e..1c80e547f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,7 +20,7 @@ DEPENDENCY_TARGETS=hiredis linenoise lua NODEPS:=clean distclean # Default settings -STD=-std=c99 -pedantic -DREDIS_STATIC='' +1TD=-std=c11 -pedantic -DREDIS_STATIC='' ifneq (,$(findstring clang,$(CC))) ifneq (,$(findstring FreeBSD,$(uname_S))) STD+=-Wno-c11-extensions diff --git a/src/networking.c b/src/networking.c index 17d6b1866..d61e1f044 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2527,7 +2527,7 @@ void *IOThreadMain(void *myid) { void initThreadedIO(void) { pthread_t tid; - server.io_threads_num = 4; + server.io_threads_num = 8; io_threads_active = 0; /* We start with threads not active. */ for (int i = 0; i < server.io_threads_num; i++) { pthread_mutex_init(&io_threads_mutex[i],NULL); @@ -2543,6 +2543,7 @@ void initThreadedIO(void) { } void startThreadedIO(void) { + printf("S"); fflush(stdout); if (tio_debug) printf("--- STARTING THREADED IO ---\n"); serverAssert(io_threads_active == 0); for (int j = 0; j < server.io_threads_num; j++) @@ -2551,6 +2552,7 @@ void startThreadedIO(void) { } void stopThreadedIO(void) { + printf("E"); fflush(stdout); if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); serverAssert(io_threads_active == 1); for (int j = 0; j < server.io_threads_num; j++) @@ -2558,19 +2560,36 @@ void stopThreadedIO(void) { io_threads_active = 0; } +/* This function checks if there are not enough pending clients to justify + * taking the I/O threads active: in that case I/O threads are stopped if + * currently active. + * + * The function returns 0 if the I/O threading should be used becuase there + * are enough active threads, otherwise 1 is returned and the I/O threads + * could be possibly stopped (if already active) as a side effect. */ +int stopThreadedIOIfNeeded(void) { + int pending = listLength(server.clients_pending_write); + if (pending < (server.io_threads_num*2)) { + if (io_threads_active) stopThreadedIO(); + return 1; + } else { + return 0; + } +} + int handleClientsWithPendingWritesUsingThreads(void) { int processed = listLength(server.clients_pending_write); if (processed == 0) return 0; /* Return ASAP if there are no clients. */ /* If we have just a few clients to serve, don't use I/O threads, but the * boring synchronous code. */ - if (processed < (server.io_threads_num*2)) { - if (io_threads_active) stopThreadedIO(); + if (stopThreadedIOIfNeeded()) { return handleClientsWithPendingWrites(); - } else { - if (!io_threads_active) startThreadedIO(); } + /* Start threads if needed. */ + if (!io_threads_active) startThreadedIO(); + if (tio_debug) printf("%d TOTAL pending clients\n", processed); /* Distribute the clients across N different lists. */ diff --git a/src/server.c b/src/server.c index de5a814d0..325c9010c 100644 --- a/src/server.c +++ b/src/server.c @@ -2001,6 +2001,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { migrateCloseTimedoutSockets(); } + /* Stop the I/O threads if we don't have enough pending work. */ + stopThreadedIOIfNeeded(); + /* Start a scheduled BGSAVE if the corresponding flag is set. This is * useful when we are forced to postpone a BGSAVE because an AOF * rewrite is in progress. diff --git a/src/server.h b/src/server.h index d2a563c96..96ee37887 100644 --- a/src/server.h +++ b/src/server.h @@ -1579,6 +1579,7 @@ int clientsArePaused(void); int processEventsWhileBlocked(void); int handleClientsWithPendingWrites(void); int handleClientsWithPendingWritesUsingThreads(void); +int stopThreadedIOIfNeeded(void); int clientHasPendingReplies(client *c); void unlinkClient(client *c); int writeToClient(int fd, client *c, int handler_installed); From 79626ca3400b7ac2d1f8dd529ecfd8f2d169b73b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Mar 2019 18:39:13 +0100 Subject: [PATCH 068/304] Threaded IO: use main thread if num of threads is 1. --- src/networking.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index d61e1f044..916f29ebc 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2525,11 +2525,16 @@ void *IOThreadMain(void *myid) { /* Initialize the data structures needed for threaded I/O. */ void initThreadedIO(void) { - pthread_t tid; - server.io_threads_num = 8; io_threads_active = 0; /* We start with threads not active. */ + + /* Don't spawn any thread if the user selected a single thread: + * we'll handle I/O directly from the main thread. */ + if (server.io_threads_num == 1) return; + + /* Spawn the I/O threads. */ for (int i = 0; i < server.io_threads_num; i++) { + pthread_t tid; pthread_mutex_init(&io_threads_mutex[i],NULL); io_threads_pending[i] = 0; io_threads_list[i] = listCreate(); @@ -2569,6 +2574,10 @@ void stopThreadedIO(void) { * could be possibly stopped (if already active) as a side effect. */ int stopThreadedIOIfNeeded(void) { int pending = listLength(server.clients_pending_write); + + /* Return ASAP if IO threads are disabled (single threaded mode). */ + if (server.io_threads_num == 1) return 0; + if (pending < (server.io_threads_num*2)) { if (io_threads_active) stopThreadedIO(); return 1; From dc4d13e7511bfd82f1bc872e55e4fb1d11cc45a0 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Mar 2019 18:58:45 +0100 Subject: [PATCH 069/304] Threaded IO: make num of I/O threads configurable. --- src/config.c | 7 +++++++ src/networking.c | 3 +-- src/server.c | 1 + src/server.h | 1 + 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/config.c b/src/config.c index 1e0525594..c4a18f3bb 100644 --- a/src/config.c +++ b/src/config.c @@ -313,6 +313,11 @@ void loadServerConfigFromString(char *config) { if (server.dbnum < 1) { err = "Invalid number of databases"; goto loaderr; } + } else if (!strcasecmp(argv[0],"io-threads") && argc == 2) { + server.io_threads_num = atoi(argv[1]); + if (server.io_threads_num < 1 || server.io_threads_num > 512) { + err = "Invalid number of I/O threads"; goto loaderr; + } } else if (!strcasecmp(argv[0],"include") && argc == 2) { loadServerConfig(argv[1],NULL); } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) { @@ -1426,6 +1431,7 @@ void configGetCommand(client *c) { config_get_numerical_field("cluster-announce-bus-port",server.cluster_announce_bus_port); config_get_numerical_field("tcp-backlog",server.tcp_backlog); config_get_numerical_field("databases",server.dbnum); + config_get_numerical_field("io-threads",server.io_threads_num); config_get_numerical_field("repl-ping-slave-period",server.repl_ping_slave_period); config_get_numerical_field("repl-ping-replica-period",server.repl_ping_slave_period); config_get_numerical_field("repl-timeout",server.repl_timeout); @@ -2239,6 +2245,7 @@ int rewriteConfig(char *path) { rewriteConfigSaveOption(state); rewriteConfigUserOption(state); rewriteConfigNumericalOption(state,"databases",server.dbnum,CONFIG_DEFAULT_DBNUM); + rewriteConfigNumericalOption(state,"io-threads",server.dbnum,CONFIG_DEFAULT_IO_THREADS_NUM); rewriteConfigYesNoOption(state,"stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err,CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR); rewriteConfigYesNoOption(state,"rdbcompression",server.rdb_compression,CONFIG_DEFAULT_RDB_COMPRESSION); rewriteConfigYesNoOption(state,"rdbchecksum",server.rdb_checksum,CONFIG_DEFAULT_RDB_CHECKSUM); diff --git a/src/networking.c b/src/networking.c index 916f29ebc..275338a6f 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2525,7 +2525,6 @@ void *IOThreadMain(void *myid) { /* Initialize the data structures needed for threaded I/O. */ void initThreadedIO(void) { - server.io_threads_num = 8; io_threads_active = 0; /* We start with threads not active. */ /* Don't spawn any thread if the user selected a single thread: @@ -2576,7 +2575,7 @@ int stopThreadedIOIfNeeded(void) { int pending = listLength(server.clients_pending_write); /* Return ASAP if IO threads are disabled (single threaded mode). */ - if (server.io_threads_num == 1) return 0; + if (server.io_threads_num == 1) return 1; if (pending < (server.io_threads_num*2)) { if (io_threads_active) stopThreadedIO(); diff --git a/src/server.c b/src/server.c index 325c9010c..f6d2b47f3 100644 --- a/src/server.c +++ b/src/server.c @@ -2317,6 +2317,7 @@ void initServerConfig(void) { server.lazyfree_lazy_server_del = CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL; server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO; server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; + server.io_threads_num = CONFIG_DEFAULT_IO_THREADS_NUM; unsigned int lruclock = getLRUClock(); atomicSet(server.lruclock,lruclock); diff --git a/src/server.h b/src/server.h index 96ee37887..2e4de2bb1 100644 --- a/src/server.h +++ b/src/server.h @@ -87,6 +87,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_TCP_BACKLOG 511 /* TCP listen backlog. */ #define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* Default client timeout: infinite */ #define CONFIG_DEFAULT_DBNUM 16 +#define CONFIG_DEFAULT_IO_THREADS_NUM 1 /* Single threaded by default */ #define CONFIG_MAX_LINE 1024 #define CRON_DBS_PER_CALL 16 #define NET_MAX_WRITES_PER_EVENT (1024*64) From 793134d0e665f7fe04580df0723d5fc4fb49830c Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 27 Mar 2019 18:59:39 +0100 Subject: [PATCH 070/304] Threaded IO: hide more debugging printfs under conditional. --- src/networking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 275338a6f..caffd3be7 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2547,7 +2547,7 @@ void initThreadedIO(void) { } void startThreadedIO(void) { - printf("S"); fflush(stdout); + if (tio_debug) printf("S"); fflush(stdout); if (tio_debug) printf("--- STARTING THREADED IO ---\n"); serverAssert(io_threads_active == 0); for (int j = 0; j < server.io_threads_num; j++) @@ -2556,7 +2556,7 @@ void startThreadedIO(void) { } void stopThreadedIO(void) { - printf("E"); fflush(stdout); + if (tio_debug) printf("E"); fflush(stdout); if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); serverAssert(io_threads_active == 1); for (int j = 0; j < server.io_threads_num; j++) From 7bae8afef8f3e6ded0d626f2d565ef4db43a69ae Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 30 Mar 2019 11:26:58 +0100 Subject: [PATCH 071/304] Threaded IO: read side WIP. --- src/evict.c | 2 +- src/networking.c | 61 ++++++++++++++++++++++++++++++++++++++---------- src/server.c | 30 ++++++++++-------------- src/server.h | 28 +++++++++++----------- 4 files changed, 75 insertions(+), 46 deletions(-) diff --git a/src/evict.c b/src/evict.c index 773916ce8..176f4c362 100644 --- a/src/evict.c +++ b/src/evict.c @@ -78,7 +78,7 @@ unsigned int getLRUClock(void) { unsigned int LRU_CLOCK(void) { unsigned int lruclock; if (1000/server.hz <= LRU_CLOCK_RESOLUTION) { - atomicGet(server.lruclock,lruclock); + lruclock = server.lruclock; } else { lruclock = getLRUClock(); } diff --git a/src/networking.c b/src/networking.c index caffd3be7..fd4e990f4 100644 --- a/src/networking.c +++ b/src/networking.c @@ -35,6 +35,7 @@ #include static void setProtocolError(const char *errstr, client *c); +int postponeClientRead(client *c); /* Return the size consumed from the allocator, for the specified SDS string, * including internal fragmentation. This function is used in order to compute @@ -105,8 +106,7 @@ client *createClient(int fd) { } selectDb(c,0); - uint64_t client_id; - atomicGetIncr(server.next_client_id,client_id,1); + uint64_t client_id = ++server.next_client_id; c->id = client_id; c->resp = 2; c->fd = fd; @@ -950,6 +950,14 @@ void unlinkClient(client *c) { c->flags &= ~CLIENT_PENDING_WRITE; } + /* Remove from the list of pending reads if needed. */ + if (c->flags & CLIENT_PENDING_READ) { + ln = listSearchKey(server.clients_pending_read,c); + serverAssert(ln != NULL); + listDelNode(server.clients_pending_read,ln); + c->flags &= ~CLIENT_PENDING_READ; + } + /* When client was just unblocked because of a blocking operation, * remove it from the list of unblocked clients. */ if (c->flags & CLIENT_UNBLOCKED) { @@ -1642,13 +1650,19 @@ void processInputBuffer(client *c) { } /* This is a wrapper for processInputBuffer that also cares about handling - * the replication forwarding to the sub-slaves, in case the client 'c' + * the replication forwarding to the sub-replicas, in case the client 'c' * is flagged as master. Usually you want to call this instead of the * raw processInputBuffer(). */ void processInputBufferAndReplicate(client *c) { if (!(c->flags & CLIENT_MASTER)) { processInputBuffer(c); } else { + /* If the client is a master we need to compute the difference + * between the applied offset before and after processing the buffer, + * to understand how much of the replication stream was actually + * applied to the master state: this quantity, and its corresponding + * part of the replication stream, will be propagated to the + * sub-replicas and to the replication backlog. */ size_t prev_offset = c->reploff; processInputBuffer(c); size_t applied = c->reploff - prev_offset; @@ -1667,6 +1681,10 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { UNUSED(el); UNUSED(mask); + /* Check if we want to read from the client later when exiting from + * the event loop. This is the case if threaded I/O is enabled. */ + if (postponeClientRead(c)) return; + readlen = PROTO_IOBUF_LEN; /* If this is a multi bulk request, and we are processing a bulk reply * that is large enough, try to maximize the probability that the query @@ -1716,20 +1734,21 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty(); bytes = sdscatrepr(bytes,c->querybuf,64); - serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); +// FIXME: This may be called from an I/O thread and it is not safe to +// log from there for now. +// serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); freeClient(c); return; } - /* Time to process the buffer. If the client is a master we need to - * compute the difference between the applied offset before and after - * processing the buffer, to understand how much of the replication stream - * was actually applied to the master state: this quantity, and its - * corresponding part of the replication stream, will be propagated to - * the sub-slaves and to the replication backlog. */ - processInputBufferAndReplicate(c); + /* There is more data in the client input buffer, continue parsing it + * in case to check if there is a full command to execute. + * Don't do it if the client is flagged as CLIENT_PENDING_READ: it means + * we are currently in the context of an I/O thread. */ + if (!(c->flags & CLIENT_PENDING_READ)) + processInputBufferAndReplicate(c); } void getClientsMaxBuffers(unsigned long *longest_output_list, @@ -2566,7 +2585,9 @@ void stopThreadedIO(void) { /* This function checks if there are not enough pending clients to justify * taking the I/O threads active: in that case I/O threads are stopped if - * currently active. + * currently active. We track the pending writes as a measure of clients + * we need to handle in parallel, however the I/O threading is disabled + * globally for reads as well if we have too little pending clients. * * The function returns 0 if the I/O threading should be used becuase there * are enough active threads, otherwise 1 is returned and the I/O threads @@ -2647,3 +2668,19 @@ int handleClientsWithPendingWritesUsingThreads(void) { listEmpty(server.clients_pending_write); return processed; } + +/* Return 1 if we want to handle the client read later using threaded I/O. + * This is called by the readable handler of the event loop. + * As a side effect of calling this function the client is put in the + * pending read clients and flagged as such. */ +int postponeClientRead(client *c) { + if (io_threads_active && + !(c->flags & (CLIENT_MASTER|CLIENT_SLAVE|CLIENT_PENDING_READ))) + { + c->flags |= CLIENT_PENDING_READ; + listAddNodeHead(server.clients_pending_read,c); + return 1; + } else { + return 0; + } +} diff --git a/src/server.c b/src/server.c index f6d2b47f3..ef6b85c44 100644 --- a/src/server.c +++ b/src/server.c @@ -1728,16 +1728,17 @@ void databasesCron(void) { * every object access, and accuracy is not needed. To access a global var is * a lot faster than calling time(NULL) */ void updateCachedTime(void) { - time_t unixtime = time(NULL); - atomicSet(server.unixtime,unixtime); + server.unixtime = time(NULL); server.mstime = mstime(); - /* To get information about daylight saving time, we need to call localtime_r - * and cache the result. However calling localtime_r in this context is safe - * since we will never fork() while here, in the main thread. The logging - * function will call a thread safe version of localtime that has no locks. */ + /* To get information about daylight saving time, we need to call + * localtime_r and cache the result. However calling localtime_r in this + * context is safe since we will never fork() while here, in the main + * thread. The logging function will call a thread safe version of + * localtime that has no locks. */ struct tm tm; - localtime_r(&server.unixtime,&tm); + time_t ut = server.unixtime; + localtime_r(&ut,&tm); server.daylight_active = tm.tm_isdst; } @@ -1807,8 +1808,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { * * Note that you can change the resolution altering the * LRU_CLOCK_RESOLUTION define. */ - unsigned long lruclock = getLRUClock(); - atomicSet(server.lruclock,lruclock); + server.lruclock = getLRUClock(); /* Record the max memory used since the server was started. */ if (zmalloc_used_memory() > server.stat_peak_memory) @@ -2202,10 +2202,6 @@ void createSharedObjects(void) { void initServerConfig(void) { int j; - pthread_mutex_init(&server.next_client_id_mutex,NULL); - pthread_mutex_init(&server.lruclock_mutex,NULL); - pthread_mutex_init(&server.unixtime_mutex,NULL); - updateCachedTime(); getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE); server.runid[CONFIG_RUN_ID_SIZE] = '\0'; @@ -2319,8 +2315,7 @@ void initServerConfig(void) { server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; server.io_threads_num = CONFIG_DEFAULT_IO_THREADS_NUM; - unsigned int lruclock = getLRUClock(); - atomicSet(server.lruclock,lruclock); + server.lruclock = getLRUClock(); resetServerSaveParams(); appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */ @@ -2718,6 +2713,7 @@ void initServer(void) { server.slaves = listCreate(); server.monitors = listCreate(); server.clients_pending_write = listCreate(); + server.clients_pending_read = listCreate(); server.slaveseldb = -1; /* Force to emit the first SELECT command. */ server.unblocked_clients = listCreate(); server.ready_keys = listCreate(); @@ -3821,8 +3817,6 @@ sds genRedisInfoString(char *section) { call_uname = 0; } - unsigned int lruclock; - atomicGet(server.lruclock,lruclock); info = sdscatprintf(info, "# Server\r\n" "redis_version:%s\r\n" @@ -3866,7 +3860,7 @@ sds genRedisInfoString(char *section) { (intmax_t)(uptime/(3600*24)), server.hz, server.config_hz, - (unsigned long) lruclock, + (unsigned long) server.lruclock, server.executable ? server.executable : "", server.configfile ? server.configfile : ""); } diff --git a/src/server.h b/src/server.h index 2e4de2bb1..dcfcb55fb 100644 --- a/src/server.h +++ b/src/server.h @@ -285,6 +285,9 @@ typedef long long mstime_t; /* millisecond time type. */ #define CLIENT_LUA_DEBUG_SYNC (1<<26) /* EVAL debugging without fork() */ #define CLIENT_MODULE (1<<27) /* Non connected client used by some module. */ #define CLIENT_PROTECTED (1<<28) /* Client should not be freed for now. */ +#define CLIENT_PENDING_READ (1<<29) /* The client has pending reads and was put + in the list of clients we can read + from. */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -1018,7 +1021,7 @@ struct redisServer { dict *commands; /* Command table */ dict *orig_commands; /* Command table before command renaming. */ aeEventLoop *el; - unsigned int lruclock; /* Clock for LRU eviction */ + _Atomic unsigned int lruclock; /* Clock for LRU eviction */ int shutdown_asap; /* SHUTDOWN needed ASAP */ int activerehashing; /* Incremental rehash in serverCron() */ int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */ @@ -1052,6 +1055,7 @@ struct redisServer { list *clients; /* List of active clients */ list *clients_to_close; /* Clients to close asynchronously */ list *clients_pending_write; /* There is to write or install handler. */ + list *clients_pending_read; /* Client has pending read socket buffers. */ list *slaves, *monitors; /* List of slaves and MONITORs */ client *current_client; /* Current client, only used on crash report */ rax *clients_index; /* Active clients dictionary by client ID. */ @@ -1059,7 +1063,7 @@ struct redisServer { mstime_t clients_pause_end_time; /* Time when we undo clients_paused */ char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */ dict *migrate_cached_sockets;/* MIGRATE cached sockets */ - uint64_t next_client_id; /* Next client unique ID. Incremental. */ + _Atomic uint64_t next_client_id; /* Next client unique ID. Incremental. */ int protected_mode; /* Don't accept external connections. */ int gopher_enabled; /* If true the server will reply to gopher queries. Will still serve RESP2 queries. */ @@ -1104,8 +1108,8 @@ struct redisServer { long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */ unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */ struct malloc_stats cron_malloc_stats; /* sampled in serverCron(). */ - long long stat_net_input_bytes; /* Bytes read from network. */ - long long stat_net_output_bytes; /* Bytes written to network. */ + _Atomic long long stat_net_input_bytes; /* Bytes read from network. */ + _Atomic long long stat_net_output_bytes; /* Bytes written to network. */ size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */ size_t stat_aof_cow_bytes; /* Copy on write bytes during AOF rewrite. */ /* The following two are used to track instantaneous metrics, like @@ -1128,7 +1132,7 @@ struct redisServer { int active_defrag_cycle_min; /* minimal effort for defrag in CPU percentage */ int active_defrag_cycle_max; /* maximal effort for defrag in CPU percentage */ unsigned long active_defrag_max_scan_fields; /* maximum number of fields of set/hash/zset/list to process from within the main dict scan */ - size_t client_max_querybuf_len; /* Limit for client query buffer length */ + _Atomic size_t client_max_querybuf_len; /* Limit for client query buffer length */ int dbnum; /* Total number of configured DBs */ int supervised; /* 1 if supervised, 0 otherwise. */ int supervised_mode; /* See SUPERVISED_* */ @@ -1297,10 +1301,10 @@ struct redisServer { int list_max_ziplist_size; int list_compress_depth; /* time cache */ - time_t unixtime; /* Unix time sampled every cron cycle. */ - time_t timezone; /* Cached timezone. As set by tzset(). */ - int daylight_active; /* Currently in daylight saving time. */ - long long mstime; /* Like 'unixtime' but with milliseconds resolution. */ + _Atomic time_t unixtime; /* Unix time sampled every cron cycle. */ + time_t timezone; /* Cached timezone. As set by tzset(). */ + int daylight_active; /* Currently in daylight saving time. */ + long long mstime; /* 'unixtime' with milliseconds resolution. */ /* Pubsub */ dict *pubsub_channels; /* Map channels to list of subscribed clients */ list *pubsub_patterns; /* A list of pubsub_patterns */ @@ -1360,12 +1364,6 @@ struct redisServer { int watchdog_period; /* Software watchdog period in ms. 0 = off */ /* System hardware info */ size_t system_memory_size; /* Total memory in system as reported by OS */ - - /* Mutexes used to protect atomic variables when atomic builtins are - * not available. */ - pthread_mutex_t lruclock_mutex; - pthread_mutex_t next_client_id_mutex; - pthread_mutex_t unixtime_mutex; }; typedef struct pubsubPattern { From 1c3b585e78cde384959731171c124d682f28c764 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 31 Mar 2019 15:58:54 +0200 Subject: [PATCH 072/304] Threaded IO: read side WIP 2. --- src/networking.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/networking.c b/src/networking.c index fd4e990f4..7d8470489 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2496,13 +2496,16 @@ int processEventsWhileBlocked(void) { int tio_debug = 0; -#define SERVER_MAX_IO_THREADS 32 +#define IO_THREADS_MAX_NUM 128 +#define IO_THREADS_OP_READ 0 +#define IO_THREADS_OP_WRITE 1 -pthread_t io_threads[SERVER_MAX_IO_THREADS]; -pthread_mutex_t io_threads_mutex[SERVER_MAX_IO_THREADS]; -_Atomic unsigned long io_threads_pending[SERVER_MAX_IO_THREADS]; -int io_threads_active; -list *io_threads_list[SERVER_MAX_IO_THREADS]; +pthread_t io_threads[IO_THREADS_MAX_NUM]; +pthread_mutex_t io_threads_mutex[IO_THREADS_MAX_NUM]; +_Atomic unsigned long io_threads_pending[IO_THREADS_MAX_NUM]; +int io_threads_active; /* Are the threads currently spinning waiting I/O? */ +int io_threads_op; /* IO_THREADS_OP_WRITE or IO_THREADS_OP_READ. */ +list *io_threads_list[IO_THREADS_MAX_NUM]; void *IOThreadMain(void *myid) { /* The ID is the thread number (from 0 to server.iothreads_num-1), and is @@ -2533,7 +2536,11 @@ void *IOThreadMain(void *myid) { listRewind(io_threads_list[id],&li); while((ln = listNext(&li))) { client *c = listNodeValue(ln); - writeToClient(c->fd,c,0); + if (io_threads_op == IO_THREADS_OP_WRITE) { + writeToClient(c->fd,c,0); + } else { + readQueryFromClient(NULL,c->fd,c,0); + } } listEmpty(io_threads_list[id]); io_threads_pending[id] = 0; @@ -2550,6 +2557,12 @@ void initThreadedIO(void) { * we'll handle I/O directly from the main thread. */ if (server.io_threads_num == 1) return; + if (server.io_threads_num > IO_THREADS_MAX_NUM) { + serverLog(LL_WARNING,"Fatal: too many I/O threads configured. " + "The maximum number is %d.", IO_THREADS_MAX_NUM); + exit(1); + } + /* Spawn the I/O threads. */ for (int i = 0; i < server.io_threads_num; i++) { pthread_t tid; @@ -2684,3 +2697,6 @@ int postponeClientRead(client *c) { return 0; } } + +int handleClientsWithPendingReadsUsingThreads(void) { +} From b9b29641cfa4af61abe0fc7d6ab6e6b3ce816af2 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 31 Mar 2019 21:59:50 +0200 Subject: [PATCH 073/304] Threaded IO: read side WIP 3. --- src/networking.c | 59 +++++++++++++++++++++++++++++++++++++++++++----- src/server.c | 1 + src/server.h | 1 + 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/src/networking.c b/src/networking.c index 7d8470489..3a36badb8 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1711,12 +1711,12 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { return; } else { serverLog(LL_VERBOSE, "Reading from client: %s",strerror(errno)); - freeClient(c); + freeClientAsync(c); return; } } else if (nread == 0) { serverLog(LL_VERBOSE, "Client closed connection"); - freeClient(c); + freeClientAsync(c); return; } else if (c->flags & CLIENT_MASTER) { /* Append the query buffer to the pending (not applied) buffer @@ -1739,7 +1739,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { // serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); - freeClient(c); + freeClientAsync(c); return; } @@ -2538,8 +2538,10 @@ void *IOThreadMain(void *myid) { client *c = listNodeValue(ln); if (io_threads_op == IO_THREADS_OP_WRITE) { writeToClient(c->fd,c,0); - } else { + } else if (io_threads_op == IO_THREADS_OP_READ) { readQueryFromClient(NULL,c->fd,c,0); + } else { + serverPanic("io_threads_op value is unknown"); } } listEmpty(io_threads_list[id]); @@ -2632,7 +2634,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { /* Start threads if needed. */ if (!io_threads_active) startThreadedIO(); - if (tio_debug) printf("%d TOTAL pending clients\n", processed); + if (tio_debug) printf("%d TOTAL WRITE pending clients\n", processed); /* Distribute the clients across N different lists. */ listIter li; @@ -2649,6 +2651,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { /* Give the start condition to the waiting threads, by setting the * start condition atomic var. */ + io_threads_op = IO_THREADS_OP_WRITE; for (int j = 0; j < server.io_threads_num; j++) { int count = listLength(io_threads_list[j]); io_threads_pending[j] = count; @@ -2661,7 +2664,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { pending += io_threads_pending[j]; if (pending == 0) break; } - if (tio_debug) printf("All threads finshed\n"); + if (tio_debug) printf("I/O WRITE All threads finshed\n"); /* Run the list of clients again to install the write handler where * needed. */ @@ -2699,4 +2702,48 @@ int postponeClientRead(client *c) { } int handleClientsWithPendingReadsUsingThreads(void) { + if (!io_threads_active) return 0; + int processed = listLength(server.clients_pending_read); + if (processed == 0) return 0; + + if (tio_debug) printf("%d TOTAL READ pending clients\n", processed); + + /* Distribute the clients across N different lists. */ + listIter li; + listNode *ln; + listRewind(server.clients_pending_read,&li); + int item_id = 0; + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + int target_id = item_id % server.io_threads_num; + listAddNodeTail(io_threads_list[target_id],c); + item_id++; + } + + /* Give the start condition to the waiting threads, by setting the + * start condition atomic var. */ + io_threads_op = IO_THREADS_OP_READ; + for (int j = 0; j < server.io_threads_num; j++) { + int count = listLength(io_threads_list[j]); + io_threads_pending[j] = count; + } + + /* Wait for all threads to end their work. */ + while(1) { + unsigned long pending = 0; + for (int j = 0; j < server.io_threads_num; j++) + pending += io_threads_pending[j]; + if (pending == 0) break; + } + if (tio_debug) printf("I/O READ All threads finshed\n"); + + /* Run the list of clients again to process the new buffers. */ + listRewind(server.clients_pending_read,&li); + while((ln = listNext(&li))) { + client *c = listNodeValue(ln); + c->flags &= ~CLIENT_PENDING_READ; + processInputBufferAndReplicate(c); + } + listEmpty(server.clients_pending_read); + return processed; } diff --git a/src/server.c b/src/server.c index ef6b85c44..e0c48b097 100644 --- a/src/server.c +++ b/src/server.c @@ -2092,6 +2092,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) { void afterSleep(struct aeEventLoop *eventLoop) { UNUSED(eventLoop); if (moduleCount()) moduleAcquireGIL(); + handleClientsWithPendingReadsUsingThreads(); } /* =========================== Server initialization ======================== */ diff --git a/src/server.h b/src/server.h index dcfcb55fb..0d7882419 100644 --- a/src/server.h +++ b/src/server.h @@ -1578,6 +1578,7 @@ int clientsArePaused(void); int processEventsWhileBlocked(void); int handleClientsWithPendingWrites(void); int handleClientsWithPendingWritesUsingThreads(void); +int handleClientsWithPendingReadsUsingThreads(void); int stopThreadedIOIfNeeded(void); int clientHasPendingReplies(client *c); void unlinkClient(client *c); From 4e10311fe5fefb4e4831459287ebb82851132554 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 31 Mar 2019 22:06:00 +0200 Subject: [PATCH 074/304] Threaded IO: process read queue before stopping threads. --- src/networking.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 3a36badb8..29a56e983 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2590,8 +2590,13 @@ void startThreadedIO(void) { } void stopThreadedIO(void) { + /* We may have still clients with pending reads when this function + * is called: handle them before stopping the threads. */ + handleClientsWithPendingReadsUsingThreads(); if (tio_debug) printf("E"); fflush(stdout); - if (tio_debug) printf("--- STOPPING THREADED IO ---\n"); + if (tio_debug) printf("--- STOPPING THREADED IO [R%d] [W%d] ---\n", + (int) listLength(server.clients_pending_read), + (int) listLength(server.clients_pending_write)); serverAssert(io_threads_active == 1); for (int j = 0; j < server.io_threads_num; j++) pthread_mutex_lock(&io_threads_mutex[j]); From 17b4ac3f8cb12c523676cc1cb6ff7c0ced36b3db Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Apr 2019 13:12:10 +0200 Subject: [PATCH 075/304] Threaded IO: logging should be safe in I/O threads. Potentially it is possible that we get interleaved writes, even if serverLog() makes sure to write into a buffer and then use printf(), so even this should be ok. However in general POSIX guarantees that writing to the same file pointer object from multiple threads is safe. Anyway currently we *reopen* the file at each call, but for the standard output logging. The logging functions actually also access global configuration while performing the log (for instance in order to check the log level, the log filename and so forth), however dunring the I/O threads execution we cannot alter such shared state in any way. --- src/networking.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/networking.c b/src/networking.c index 29a56e983..0e11e1f3f 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1174,14 +1174,13 @@ int writeToClient(int fd, client *c, int handler_installed) { zmalloc_used_memory() < server.maxmemory) && !(c->flags & CLIENT_SLAVE)) break; } - /* FIXME: Fixme, use atomic var for this. */ server.stat_net_output_bytes += totwritten; if (nwritten == -1) { if (errno == EAGAIN) { nwritten = 0; } else { - // serverLog(LL_VERBOSE, - // "Error writing to client: %s", strerror(errno)); + serverLog(LL_VERBOSE, + "Error writing to client: %s", strerror(errno)); freeClientAsync(c); return C_ERR; } From 67d79b9b10aa6074524d1f99cc549f18f829536c Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 12 Apr 2019 17:18:10 +0200 Subject: [PATCH 076/304] Threaded IO: parsing WIP 1: set current_client in a better scoped way. --- src/networking.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/networking.c b/src/networking.c index 0e11e1f3f..3faaf4a12 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1563,7 +1563,7 @@ int processMultibulkBuffer(client *c) { * or because a client was blocked and later reactivated, so there could be * pending query buffer, already representing a full command, to process. */ void processInputBuffer(client *c) { - server.current_client = c; + int deadclient = 0; /* Keep processing while there is something in the input buffer */ while(c->qb_pos < sdslen(c->querybuf)) { @@ -1619,6 +1619,7 @@ void processInputBuffer(client *c) { resetClient(c); } else { /* Only reset the client when the command was executed. */ + server.current_client = c; if (processCommand(c) == C_OK) { if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { /* Update the applied replication offset of our master. */ @@ -1629,23 +1630,26 @@ void processInputBuffer(client *c) { * module blocking command, so that the reply callback will * still be able to access the client argv and argc field. * The client will be reset in unblockClientFromModule(). */ - if (!(c->flags & CLIENT_BLOCKED) || c->btype != BLOCKED_MODULE) + if (!(c->flags & CLIENT_BLOCKED) || + c->btype != BLOCKED_MODULE) + { resetClient(c); + } } + if (server.current_client == NULL) deadclient = 1; + server.current_client = NULL; /* freeMemoryIfNeeded may flush slave output buffers. This may * result into a slave, that may be the active client, to be * freed. */ - if (server.current_client == NULL) break; + if (deadclient) break; } } /* Trim to pos */ - if (server.current_client != NULL && c->qb_pos) { + if (!deadclient && c->qb_pos) { sdsrange(c->querybuf,c->qb_pos,-1); c->qb_pos = 0; } - - server.current_client = NULL; } /* This is a wrapper for processInputBuffer that also cares about handling @@ -1743,11 +1747,8 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { } /* There is more data in the client input buffer, continue parsing it - * in case to check if there is a full command to execute. - * Don't do it if the client is flagged as CLIENT_PENDING_READ: it means - * we are currently in the context of an I/O thread. */ - if (!(c->flags & CLIENT_PENDING_READ)) - processInputBufferAndReplicate(c); + * in case to check if there is a full command to execute. */ + processInputBufferAndReplicate(c); } void getClientsMaxBuffers(unsigned long *longest_output_list, From 1347c13551f34b05f6fb6e03595047c8d16b8f96 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 26 Apr 2019 19:29:50 +0200 Subject: [PATCH 077/304] Threaded IO: parsing WIP 2: refactoring to parse from thread. --- src/networking.c | 89 ++++++++++++++++++++++++++++++++---------------- src/server.h | 1 + 2 files changed, 61 insertions(+), 29 deletions(-) diff --git a/src/networking.c b/src/networking.c index 3faaf4a12..4361ab1af 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1558,13 +1558,47 @@ int processMultibulkBuffer(client *c) { return C_ERR; } +/* This function calls processCommand(), but also performs a few sub tasks + * that are useful in that context: + * + * 1. It sets the current client to the client 'c'. + * 2. In the case of master clients, the replication offset is updated. + * 3. The client is reset unless there are reasons to avoid doing it. + * + * The function returns C_ERR in case the client was freed as a side effect + * of processing the command, otherwise C_OK is returned. */ +int processCommandAndResetClient(client *c) { + int deadclient = 0; + server.current_client = c; + if (processCommand(c) == C_OK) { + if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { + /* Update the applied replication offset of our master. */ + c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; + } + + /* Don't reset the client structure for clients blocked in a + * module blocking command, so that the reply callback will + * still be able to access the client argv and argc field. + * The client will be reset in unblockClientFromModule(). */ + if (!(c->flags & CLIENT_BLOCKED) || + c->btype != BLOCKED_MODULE) + { + resetClient(c); + } + } + if (server.current_client == NULL) deadclient = 1; + server.current_client = NULL; + /* freeMemoryIfNeeded may flush slave output buffers. This may + * result into a slave, that may be the active client, to be + * freed. */ + return deadclient ? C_ERR : C_OK; +} + /* This function is called every time, in the client structure 'c', there is * more query buffer to process, because we read more data from the socket * or because a client was blocked and later reactivated, so there could be * pending query buffer, already representing a full command, to process. */ void processInputBuffer(client *c) { - int deadclient = 0; - /* Keep processing while there is something in the input buffer */ while(c->qb_pos < sdslen(c->querybuf)) { /* Return if clients are paused. */ @@ -1573,6 +1607,10 @@ void processInputBuffer(client *c) { /* Immediately abort if the client is in the middle of something. */ if (c->flags & CLIENT_BLOCKED) break; + /* Don't process more buffers from clients that have already pending + * commands to execute in c->argv. */ + if (c->flags & CLIENT_PENDING_COMMAND) break; + /* Don't process input from the master while there is a busy script * condition on the slave. We want just to accumulate the replication * stream (instead of replying -BUSY like we do with other clients) and @@ -1618,35 +1656,26 @@ void processInputBuffer(client *c) { if (c->argc == 0) { resetClient(c); } else { - /* Only reset the client when the command was executed. */ - server.current_client = c; - if (processCommand(c) == C_OK) { - if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) { - /* Update the applied replication offset of our master. */ - c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos; - } - - /* Don't reset the client structure for clients blocked in a - * module blocking command, so that the reply callback will - * still be able to access the client argv and argc field. - * The client will be reset in unblockClientFromModule(). */ - if (!(c->flags & CLIENT_BLOCKED) || - c->btype != BLOCKED_MODULE) - { - resetClient(c); - } + /* If we are in the context of an I/O thread, we can't really + * execute the command here. All we can do is to flag the client + * as one that needs to process the command. */ + if (c->flags & CLIENT_PENDING_READ) { + c->flags |= CLIENT_PENDING_COMMAND; + break; + } + + /* We are finally ready to execute the command. */ + if (processCommandAndResetClient(c) == C_ERR) { + /* If the client is no longer valid, we avoid exiting this + * loop and trimming the client buffer later. So we return + * ASAP in that case. */ + return; } - if (server.current_client == NULL) deadclient = 1; - server.current_client = NULL; - /* freeMemoryIfNeeded may flush slave output buffers. This may - * result into a slave, that may be the active client, to be - * freed. */ - if (deadclient) break; } } /* Trim to pos */ - if (!deadclient && c->qb_pos) { + if (c->qb_pos) { sdsrange(c->querybuf,c->qb_pos,-1); c->qb_pos = 0; } @@ -1737,9 +1766,7 @@ void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) { sds ci = catClientInfoString(sdsempty(),c), bytes = sdsempty(); bytes = sdscatrepr(bytes,c->querybuf,64); -// FIXME: This may be called from an I/O thread and it is not safe to -// log from there for now. -// serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); + serverLog(LL_WARNING,"Closing client that reached max query buffer length: %s (qbuf initial bytes: %s)", ci, bytes); sdsfree(ci); sdsfree(bytes); freeClientAsync(c); @@ -2747,6 +2774,10 @@ int handleClientsWithPendingReadsUsingThreads(void) { while((ln = listNext(&li))) { client *c = listNodeValue(ln); c->flags &= ~CLIENT_PENDING_READ; + if (c->flags & CLIENT_PENDING_COMMAND) { + c->flags &= ~ CLIENT_PENDING_COMMAND; + processCommandAndResetClient(c); + } processInputBufferAndReplicate(c); } listEmpty(server.clients_pending_read); diff --git a/src/server.h b/src/server.h index 0d7882419..c088d356a 100644 --- a/src/server.h +++ b/src/server.h @@ -288,6 +288,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CLIENT_PENDING_READ (1<<29) /* The client has pending reads and was put in the list of clients we can read from. */ +#define CLIENT_PENDING_COMMAND (1<<30) /* */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ From 6e29b364a869eb30ec9644d7d7d99acd3026a1de Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 29 Apr 2019 12:46:23 +0200 Subject: [PATCH 078/304] Threaded IO: put fflush() inside tio_debug conditional. --- src/networking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 4361ab1af..74bd0f13d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2608,7 +2608,7 @@ void initThreadedIO(void) { } void startThreadedIO(void) { - if (tio_debug) printf("S"); fflush(stdout); + if (tio_debug) { printf("S"); fflush(stdout); } if (tio_debug) printf("--- STARTING THREADED IO ---\n"); serverAssert(io_threads_active == 0); for (int j = 0; j < server.io_threads_num; j++) @@ -2620,7 +2620,7 @@ void stopThreadedIO(void) { /* We may have still clients with pending reads when this function * is called: handle them before stopping the threads. */ handleClientsWithPendingReadsUsingThreads(); - if (tio_debug) printf("E"); fflush(stdout); + if (tio_debug) { printf("E"); fflush(stdout); } if (tio_debug) printf("--- STOPPING THREADED IO [R%d] [W%d] ---\n", (int) listLength(server.clients_pending_read), (int) listLength(server.clients_pending_write)); From 50b09bc408eea72e601f5723a331da20ac705302 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 30 Apr 2019 15:39:27 +0200 Subject: [PATCH 079/304] Threaded IO: ability to disable reads from threaded path. --- src/networking.c | 3 ++- src/server.c | 1 + src/server.h | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 74bd0f13d..651dbdb8a 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2723,6 +2723,7 @@ int handleClientsWithPendingWritesUsingThreads(void) { * pending read clients and flagged as such. */ int postponeClientRead(client *c) { if (io_threads_active && + server.io_threads_do_reads && !(c->flags & (CLIENT_MASTER|CLIENT_SLAVE|CLIENT_PENDING_READ))) { c->flags |= CLIENT_PENDING_READ; @@ -2734,7 +2735,7 @@ int postponeClientRead(client *c) { } int handleClientsWithPendingReadsUsingThreads(void) { - if (!io_threads_active) return 0; + if (!io_threads_active || !server.io_threads_do_reads) return 0; int processed = listLength(server.clients_pending_read); if (processed == 0) return 0; diff --git a/src/server.c b/src/server.c index e0c48b097..2643d7266 100644 --- a/src/server.c +++ b/src/server.c @@ -2315,6 +2315,7 @@ void initServerConfig(void) { server.always_show_logo = CONFIG_DEFAULT_ALWAYS_SHOW_LOGO; server.lua_time_limit = LUA_SCRIPT_TIME_LIMIT; server.io_threads_num = CONFIG_DEFAULT_IO_THREADS_NUM; + server.io_threads_do_reads = CONFIG_DEFAULT_IO_THREADS_DO_READS; server.lruclock = getLRUClock(); resetServerSaveParams(); diff --git a/src/server.h b/src/server.h index c088d356a..3987ab5fc 100644 --- a/src/server.h +++ b/src/server.h @@ -88,6 +88,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* Default client timeout: infinite */ #define CONFIG_DEFAULT_DBNUM 16 #define CONFIG_DEFAULT_IO_THREADS_NUM 1 /* Single threaded by default */ +#define CONFIG_DEFAULT_IO_THREADS_DO_READS 0 /* Read + parse from threads? */ #define CONFIG_MAX_LINE 1024 #define CRON_DBS_PER_CALL 16 #define NET_MAX_WRITES_PER_EVENT (1024*64) @@ -1069,6 +1070,7 @@ struct redisServer { int gopher_enabled; /* If true the server will reply to gopher queries. Will still serve RESP2 queries. */ int io_threads_num; /* Number of IO threads to use. */ + int io_threads_do_reads; /* Read and parse from IO threads? */ /* RDB / AOF loading information */ int loading; /* We are loading data from disk if true */ From a83dcf766ccd855c6bfb1f11d19f317a5440c978 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 30 Apr 2019 15:55:02 +0200 Subject: [PATCH 080/304] Threaded IO: configuration directive for turning on/off reads. --- src/config.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/config.c b/src/config.c index c4a18f3bb..1686743a0 100644 --- a/src/config.c +++ b/src/config.c @@ -318,6 +318,10 @@ void loadServerConfigFromString(char *config) { if (server.io_threads_num < 1 || server.io_threads_num > 512) { err = "Invalid number of I/O threads"; goto loaderr; } + } else if (!strcasecmp(argv[0],"io-threads-do-reads") && argc == 2) { + if ((server.io_threads_do_reads = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"include") && argc == 2) { loadServerConfig(argv[1],NULL); } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) { @@ -1485,6 +1489,7 @@ void configGetCommand(client *c) { config_get_bool_field("activedefrag", server.active_defrag_enabled); config_get_bool_field("protected-mode", server.protected_mode); config_get_bool_field("gopher-enabled", server.gopher_enabled); + config_get_bool_field("io-threads-do-reads", server.io_threads_do_reads); config_get_bool_field("repl-disable-tcp-nodelay", server.repl_disable_tcp_nodelay); config_get_bool_field("repl-diskless-sync", @@ -2316,6 +2321,7 @@ int rewriteConfig(char *path) { rewriteConfigYesNoOption(state,"activedefrag",server.active_defrag_enabled,CONFIG_DEFAULT_ACTIVE_DEFRAG); rewriteConfigYesNoOption(state,"protected-mode",server.protected_mode,CONFIG_DEFAULT_PROTECTED_MODE); rewriteConfigYesNoOption(state,"gopher-enabled",server.gopher_enabled,CONFIG_DEFAULT_GOPHER_ENABLED); + rewriteConfigYesNoOption(state,"io-threads-do-reads",server.io_threads_do_reads,CONFIG_DEFAULT_IO_THREADS_DO_READS); rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigNumericalOption(state,"hz",server.config_hz,CONFIG_DEFAULT_HZ); rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC); From cd4845c478a792bd0b567fbba4b49cd45e1f776c Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 30 Apr 2019 15:59:23 +0200 Subject: [PATCH 081/304] Threaded IO: handleClientsWithPendingReadsUsingThreads top comment. --- src/networking.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/networking.c b/src/networking.c index 651dbdb8a..6fec97605 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2734,6 +2734,12 @@ int postponeClientRead(client *c) { } } +/* When threaded I/O is also enabled for the reading + parsing side, the + * readable handler will just put normal clients into a queue of clients to + * process (instead of serving them synchronously). This function runs + * the queue using the I/O threads, and process them in order to accumulate + * the reads in the buffers, and also parse the first command available + * rendering it in the client structures. */ int handleClientsWithPendingReadsUsingThreads(void) { if (!io_threads_active || !server.io_threads_do_reads) return 0; int processed = listLength(server.clients_pending_read); From d6da0c14153f54f36bc0dae989a5343f4c633b65 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Tue, 7 May 2019 13:35:27 +0800 Subject: [PATCH 082/304] Makefile: 1TD -> STD --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 1c80e547f..f35685eff 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,7 +20,7 @@ DEPENDENCY_TARGETS=hiredis linenoise lua NODEPS:=clean distclean # Default settings -1TD=-std=c11 -pedantic -DREDIS_STATIC='' +STD=-std=c11 -pedantic -DREDIS_STATIC='' ifneq (,$(findstring clang,$(CC))) ifneq (,$(findstring FreeBSD,$(uname_S))) STD+=-Wno-c11-extensions From 0ba31aface761e8de215d32fa48238b6f870d23a Mon Sep 17 00:00:00 2001 From: stan011 Date: Tue, 7 May 2019 14:22:40 +0800 Subject: [PATCH 083/304] change the comments there may have a mis type --- src/t_list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_list.c b/src/t_list.c index 45d2e3317..54e4959b9 100644 --- a/src/t_list.c +++ b/src/t_list.c @@ -617,7 +617,7 @@ void rpoplpushCommand(client *c) { * the AOF and replication channel. * * The argument 'where' is LIST_TAIL or LIST_HEAD, and indicates if the - * 'value' element was popped fron the head (BLPOP) or tail (BRPOP) so that + * 'value' element was popped from the head (BLPOP) or tail (BRPOP) so that * we can propagate the command properly. * * The function returns C_OK if we are able to serve the client, otherwise From 4c83175fa4dd4bfab8de4341cf32c9b3dcd6a8eb Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Tue, 7 May 2019 15:59:16 +0800 Subject: [PATCH 084/304] fix memory leak when rewrite config file --- src/config.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/config.c b/src/config.c index 1686743a0..7f0e9af89 100644 --- a/src/config.c +++ b/src/config.c @@ -1711,12 +1711,11 @@ void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char * * If the old file does not exist at all, an empty state is returned. */ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) { FILE *fp = fopen(path,"r"); - struct rewriteConfigState *state = zmalloc(sizeof(*state)); - char buf[CONFIG_MAX_LINE+1]; - int linenum = -1; - if (fp == NULL && errno != ENOENT) return NULL; + char buf[CONFIG_MAX_LINE+1]; + int linenum = -1; + struct rewriteConfigState *state = zmalloc(sizeof(*state)); state->option_to_line = dictCreate(&optionToLineDictType,NULL); state->rewritten = dictCreate(&optionSetDictType,NULL); state->numlines = 0; From 871308297337d29f5922fe7baa244385e1b60105 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=94=90=E6=9D=83?= Date: Wed, 8 May 2019 12:53:56 +0800 Subject: [PATCH 085/304] Update ziplist.c Hi, @antirez In the code, to get the size of ziplist, "unsigned int bytes = ZIPLIST_HEADER_SIZE+1;" is correct, but why not make it more readable and easy to understand --- src/ziplist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ziplist.c b/src/ziplist.c index 1579d1109..ef40d6aa2 100644 --- a/src/ziplist.c +++ b/src/ziplist.c @@ -576,7 +576,7 @@ void zipEntry(unsigned char *p, zlentry *e) { /* Create a new empty ziplist. */ unsigned char *ziplistNew(void) { - unsigned int bytes = ZIPLIST_HEADER_SIZE+1; + unsigned int bytes = ZIPLIST_HEADER_SIZE+ZIPLIST_END_SIZE; unsigned char *zl = zmalloc(bytes); ZIPLIST_BYTES(zl) = intrev32ifbe(bytes); ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE); From 58fb679d82d3014b1cc36ccf1917a8ac10054e96 Mon Sep 17 00:00:00 2001 From: yongman Date: Wed, 8 May 2019 16:13:42 +0800 Subject: [PATCH 086/304] Fix uint64_t hash value in active defrag --- src/defrag.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/defrag.c b/src/defrag.c index d67b6e253..ecf0255dc 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -47,7 +47,7 @@ int je_get_defrag_hint(void* ptr, int *bin_util, int *run_util); /* forward declarations*/ void defragDictBucketCallback(void *privdata, dictEntry **bucketref); -dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, long *defragged); +dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, uint64_t hash, long *defragged); /* Defrag helper for generic allocations. * @@ -355,7 +355,7 @@ long activeDefragSdsListAndDict(list *l, dict *d, int dict_val_type) { sdsele = ln->value; if ((newsds = activeDefragSds(sdsele))) { /* When defragging an sds value, we need to update the dict key */ - unsigned int hash = dictGetHash(d, sdsele); + uint64_t hash = dictGetHash(d, sdsele); replaceSateliteDictKeyPtrAndOrDefragDictEntry(d, sdsele, newsds, hash, &defragged); ln->value = newsds; defragged++; @@ -392,7 +392,7 @@ long activeDefragSdsListAndDict(list *l, dict *d, int dict_val_type) { * moved. Return value is the the dictEntry if found, or NULL if not found. * NOTE: this is very ugly code, but it let's us avoid the complication of * doing a scan on another dict. */ -dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, unsigned int hash, long *defragged) { +dictEntry* replaceSateliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, uint64_t hash, long *defragged) { dictEntry **deref = dictFindEntryRefByPtrAndHash(d, oldkey, hash); if (deref) { dictEntry *de = *deref; From 285bcac99249eb54e0d71a9780a28c98772f3562 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Wed, 8 May 2019 11:36:31 +0100 Subject: [PATCH 087/304] Add include to deps/hiredis/read.c to fix Implicit Declaration of strcasecmp warning --- deps/hiredis/read.c | 1 + 1 file changed, 1 insertion(+) diff --git a/deps/hiredis/read.c b/deps/hiredis/read.c index c75c3435f..cc0f3cc72 100644 --- a/deps/hiredis/read.c +++ b/deps/hiredis/read.c @@ -31,6 +31,7 @@ #include "fmacros.h" #include +#include #include #ifndef _MSC_VER #include From 8752868c1d60f4c2f205e03773990726c60ccfd9 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Wed, 8 May 2019 12:13:45 +0100 Subject: [PATCH 088/304] Enlarge error buffer in redis-check-aof.c to remove compiler warning of output truncation through snprintf format string --- src/redis-check-aof.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/redis-check-aof.c b/src/redis-check-aof.c index 54ed85f0d..eedb09db5 100644 --- a/src/redis-check-aof.c +++ b/src/redis-check-aof.c @@ -37,7 +37,7 @@ snprintf(error, sizeof(error), "0x%16llx: %s", (long long)epos, __buf); \ } -static char error[1024]; +static char error[1044]; static off_t epos; int consumeNewline(char *buf) { From 1e45354eb6bfc075a98e894c9d601eea82a9eeb4 Mon Sep 17 00:00:00 2001 From: "zhaozhao.zz" Date: Fri, 10 May 2019 16:27:25 +0800 Subject: [PATCH 089/304] test cases: skiptill -> skip-till --- tests/test_helper.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 568eacdee..1442067f5 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -503,7 +503,7 @@ for {set j 0} {$j < [llength $argv]} {incr j} { } elseif {$opt eq {--only}} { lappend ::only_tests $arg incr j - } elseif {$opt eq {--skiptill}} { + } elseif {$opt eq {--skip-till}} { set ::skip_till $arg incr j } elseif {$opt eq {--list-tests}} { From 5606036fb49819312013efbbd9440da65a68c503 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 13 May 2019 17:27:06 +0200 Subject: [PATCH 090/304] Fix test false positive introduced by threaded I/O. Now clients that are ready to be terminated asynchronously are processed more often in beforeSleep() instead of being processed in serverCron(). This means that the test will not be able to catch the moment the client was terminated, also note that the 'omem' figure now changes in big steps, because of the new client output buffers layout. So we have to change the test range in order to accomodate for that. Yet the test is useful enough to be worth taking, even if its precision is reduced by this commit. Probably if we get more problems, a thing that makes sense is just to check that the limit is < 200k. That's more than enough actually. --- tests/unit/obuf-limits.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/obuf-limits.tcl b/tests/unit/obuf-limits.tcl index 5d625cf45..c45bf8e86 100644 --- a/tests/unit/obuf-limits.tcl +++ b/tests/unit/obuf-limits.tcl @@ -15,7 +15,7 @@ start_server {tags {"obuf-limits"}} { if {![regexp {omem=([0-9]+)} $c - omem]} break if {$omem > 200000} break } - assert {$omem >= 90000 && $omem < 200000} + assert {$omem >= 70000 && $omem < 200000} $rd1 close } From d0d1cbbf5c527a64e972957d58fa77aa2d478253 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 13 May 2019 17:30:02 +0200 Subject: [PATCH 091/304] Make comment in getClientOutputBufferMemoryUsage() describing the present. --- src/networking.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/networking.c b/src/networking.c index 6fec97605..4bc22120a 100644 --- a/src/networking.c +++ b/src/networking.c @@ -2295,15 +2295,8 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) { } } -/* This function returns the number of bytes that Redis is virtually +/* This function returns the number of bytes that Redis is * using to store the reply still not read by the client. - * It is "virtual" since the reply output list may contain objects that - * are shared and are not really using additional memory. - * - * The function returns the total sum of the length of all the objects - * stored in the output list, plus the memory used to allocate every - * list node. The static reply buffer is not taken into account since it - * is allocated anyway. * * Note: this function is very fast so can be called as many time as * the caller wishes. The main usage of this function currently is From 78978eb5e1dee96b94c1d158e42796d8c8a4c305 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 14 May 2019 16:54:59 +0200 Subject: [PATCH 092/304] Test: fix slowlog test false positive. In fast systems "SLOWLOG RESET" is fast enough to don't be logged even when the time limit is "1" sometimes. Leading to false positives such as: [err]: SLOWLOG - can be disabled in tests/unit/slowlog.tcl Expected '1' to be equal to '0' --- tests/unit/slowlog.tcl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/slowlog.tcl b/tests/unit/slowlog.tcl index dbd7a1547..22f088103 100644 --- a/tests/unit/slowlog.tcl +++ b/tests/unit/slowlog.tcl @@ -80,9 +80,11 @@ start_server {tags {"slowlog"} overrides {slowlog-log-slower-than 1000000}} { } test {SLOWLOG - can be disabled} { + r config set slowlog-max-len 1 r config set slowlog-log-slower-than 1 r slowlog reset - assert_equal [r slowlog len] 1 + r debug sleep 0.2 + assert_equal [r slowlog len] 1 r config set slowlog-log-slower-than -1 r slowlog reset r debug sleep 0.2 From 9eea57cc311e62a49358e09af7baebce9da9053f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 May 2019 12:16:43 +0200 Subject: [PATCH 093/304] Narrow the effects of PR #6029 to the exact state. CLIENT PAUSE may be used, in other contexts, for a long time making all the slaves time out. Better for now to be more specific about what should disable senidng PINGs. An alternative to that would be to virtually refresh the slave interactions when clients are paused, however for now I went for this more conservative solution. --- src/replication.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/replication.c b/src/replication.c index bfe50c929..63a67a06a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -30,6 +30,7 @@ #include "server.h" +#include "cluster.h" #include #include @@ -2601,12 +2602,23 @@ void replicationCron(void) { /* First, send PING according to ping_slave_period. */ if ((replication_cron_loops % server.repl_ping_slave_period) == 0 && - listLength(server.slaves) && !clientsArePaused()) + listLength(server.slaves)) { - ping_argv[0] = createStringObject("PING",4); - replicationFeedSlaves(server.slaves, server.slaveseldb, - ping_argv, 1); - decrRefCount(ping_argv[0]); + /* Note that we don't send the PING if the clients are paused during + * a Redis Cluster manual failover: the PING we send will otherwise + * alter the replication offsets of master and slave, and will no longer + * match the one stored into 'mf_master_offset' state. */ + int manual_failover_in_progress = + server.cluster_enabled && + server.cluster->mf_end && + clientsArePaused(); + + if (!manual_failover_in_progress) { + ping_argv[0] = createStringObject("PING",4); + replicationFeedSlaves(server.slaves, server.slaveseldb, + ping_argv, 1); + decrRefCount(ping_argv[0]); + } } /* Second, send a newline to all the slaves in pre-synchronization From 6e4635e8fd7e121439235c1271581000728843f5 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 15 May 2019 12:46:01 +0200 Subject: [PATCH 094/304] Update CONTRIBUTING with present info. --- CONTRIBUTING | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING b/CONTRIBUTING index 7dee24c74..5fb038e49 100644 --- a/CONTRIBUTING +++ b/CONTRIBUTING @@ -14,9 +14,7 @@ each source file that you contribute. PLEASE DO NOT POST GENERAL QUESTIONS that are not about bugs or suspected bugs in the Github issues system. We'll be very happy to help you and provide - all the support at the Reddit sub: - - http://reddit.com/r/redis + all the support in the mainling list. There is also an active community of Redis users at Stack Overflow: @@ -24,7 +22,12 @@ each source file that you contribute. # How to provide a patch for a new feature -1. If it is a major feature or a semantical change, please post it as a new submission in r/redis on Reddit at http://reddit.com/r/redis. Try to be passionate about why the feature is needed, make users upvote your proposal to gain traction and so forth. Read feedbacks about the community. But in this first step **please don't write code yet**. +1. If it is a major feature or a semantical change, please don't start coding +straight away: if your feature is not a conceptual fit you'll lose a lot of +time writing the code without any reason. Start by posting in the mailing list +and creating an issue at Github with the description of, excatly, what you want +to accomplish and why. Use cases are important for features to be accepted. +Here you'll see if there is consensus about your idea. 2. If in step 1 you get an acknowledgment from the project leaders, use the following procedure to submit a patch: @@ -35,6 +38,13 @@ each source file that you contribute. d. Initiate a pull request on github ( https://help.github.com/articles/creating-a-pull-request/ ) e. Done :) -For minor fixes just open a pull request on Github. +3. Keep in mind that we are very overloaded, so issues and PRs sometimes wait +for a *very* long time. However this is not lack of interest, as the project +gets more and more users, we find ourselves in a constant need to prioritize +certain issues/PRs over others. If you think your issue/PR is very important +try to popularize it, have other users commenting and sharing their point of +view and so forth. This helps. + +4. For minor fixes just open a pull request on Github. Thanks! From 525fc336ef5172f1eddbf18cbbfc1a32b054c9e2 Mon Sep 17 00:00:00 2001 From: Christian Zeller Date: Wed, 15 May 2019 16:10:48 +0200 Subject: [PATCH 095/304] Typo fixes in CONTRIBUTING --- CONTRIBUTING | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING b/CONTRIBUTING index 5fb038e49..000edbeaf 100644 --- a/CONTRIBUTING +++ b/CONTRIBUTING @@ -14,7 +14,7 @@ each source file that you contribute. PLEASE DO NOT POST GENERAL QUESTIONS that are not about bugs or suspected bugs in the Github issues system. We'll be very happy to help you and provide - all the support in the mainling list. + all the support in the mailing list. There is also an active community of Redis users at Stack Overflow: @@ -25,7 +25,7 @@ each source file that you contribute. 1. If it is a major feature or a semantical change, please don't start coding straight away: if your feature is not a conceptual fit you'll lose a lot of time writing the code without any reason. Start by posting in the mailing list -and creating an issue at Github with the description of, excatly, what you want +and creating an issue at Github with the description of, exactly, what you want to accomplish and why. Use cases are important for features to be accepted. Here you'll see if there is consensus about your idea. From 4bbaf621a12f6ec22ec7dad6a2282b7908660497 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Wed, 22 May 2019 16:39:04 +0100 Subject: [PATCH 096/304] Implement `SCAN cursor [TYPE type]` modifier suggested in issue #6107. Add tests to check basic functionality of this optional keyword, and also tested with a module (redisgraph). Checked quickly with valgrind, no issues. Copies name the type name canonicalisation code from `typeCommand`, perhaps this would be better factored out to prevent the two diverging and both needing to be edited to add new `OBJ_*` types, but this is a little fiddly with C strings. The [redis-doc](https://github.com/antirez/redis-doc/blob/master/commands.json) repo will need to be updated with this new arg if accepted. A quirk to be aware of here is that the GEO commands are backed by zsets not their own type, so they're not distinguishable from other zsets. Additionally, for sparse types this has the same behaviour as `MATCH` in that it may return many empty results before giving something, even for large `COUNT`s. --- src/db.c | 32 +++++++++++++++++++++++++++++++- tests/unit/scan.tcl | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/db.c b/src/db.c index b537a29a4..6623f7f2f 100644 --- a/src/db.c +++ b/src/db.c @@ -613,7 +613,7 @@ int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor) { } /* This command implements SCAN, HSCAN and SSCAN commands. - * If object 'o' is passed, then it must be a Hash or Set object, otherwise + * If object 'o' is passed, then it must be a Hash, Set or Zset object, otherwise * if 'o' is NULL the command will operate on the dictionary associated with * the current database. * @@ -629,6 +629,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { listNode *node, *nextnode; long count = 10; sds pat = NULL; + sds typename = NULL; int patlen = 0, use_pattern = 0; dict *ht; @@ -665,6 +666,10 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { use_pattern = !(pat[0] == '*' && patlen == 1); i += 2; + } else if (!strcasecmp(c->argv[i]->ptr, "type") && o == NULL && j >= 2) { + /* SCAN for a particular type only applies to the db dict */ + typename = c->argv[i+1]->ptr; + i+= 2; } else { addReply(c,shared.syntaxerr); goto cleanup; @@ -759,6 +764,31 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { } } + /* Filter an element if it isn't the type we want. */ + if (!filter && o == NULL && typename){ + robj* typecheck; + char *type; + typecheck = lookupKeyReadWithFlags(c->db, kobj, LOOKUP_NOTOUCH); + if (typecheck == NULL) { + type = "none"; + } else { + switch(typecheck->type) { + case OBJ_STRING: type = "string"; break; + case OBJ_LIST: type = "list"; break; + case OBJ_SET: type = "set"; break; + case OBJ_ZSET: type = "zset"; break; + case OBJ_HASH: type = "hash"; break; + case OBJ_STREAM: type = "stream"; break; + case OBJ_MODULE: { + moduleValue *mv = typecheck->ptr; + type = mv->type->name; + }; break; + default: type = "unknown"; break; + } + } + if (strcasecmp((char*) typename, type)) filter = 1; + } + /* Filter element if it is an expired key. */ if (!filter && o == NULL && expireIfNeeded(c->db, kobj)) filter = 1; diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl index c0f4349d2..9f9ff4df2 100644 --- a/tests/unit/scan.tcl +++ b/tests/unit/scan.tcl @@ -53,6 +53,51 @@ start_server {tags {"scan"}} { assert_equal 100 [llength $keys] } + test "SCAN TYPE" { + r flushdb + # populate only creates strings + r debug populate 1000 + + # Check non-strings are excluded + set cur 0 + set keys {} + while 1 { + set res [r scan $cur type "list"] + set cur [lindex $res 0] + set k [lindex $res 1] + lappend keys {*}$k + if {$cur == 0} break + } + + assert_equal 0 [llength $keys] + + # Check strings are included + set cur 0 + set keys {} + while 1 { + set res [r scan $cur type "string"] + set cur [lindex $res 0] + set k [lindex $res 1] + lappend keys {*}$k + if {$cur == 0} break + } + + assert_equal 1000 [llength $keys] + + # Check all three args work together + set cur 0 + set keys {} + while 1 { + set res [r scan $cur type "string" match "key:*" count 10] + set cur [lindex $res 0] + set k [lindex $res 1] + lappend keys {*}$k + if {$cur == 0} break + } + + assert_equal 1000 [llength $keys] + } + foreach enc {intset hashtable} { test "SSCAN with encoding $enc" { # Create the Set From 0e0756659128102952c3213a9b4e7161dcc9ca3d Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 30 May 2019 11:51:58 +0300 Subject: [PATCH 097/304] Jemalloc: Avoid blocking on background thread lock for stats. Background threads may run for a long time, especially when the # of dirty pages is high. Avoid blocking stats calls because of this (which may cause latency spikes). see https://github.com/jemalloc/jemalloc/issues/1502 cherry picked from commit 1a71533511027dbe3f9d989659efeec446915d6b --- deps/jemalloc/src/background_thread.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/deps/jemalloc/src/background_thread.c b/deps/jemalloc/src/background_thread.c index 3517a3bb8..457669c9e 100644 --- a/deps/jemalloc/src/background_thread.c +++ b/deps/jemalloc/src/background_thread.c @@ -787,7 +787,13 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { nstime_init(&stats->run_interval, 0); for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; - malloc_mutex_lock(tsdn, &info->mtx); + if (malloc_mutex_trylock(tsdn, &info->mtx)) { + /* + * Each background thread run may take a long time; + * avoid waiting on the stats if the thread is active. + */ + continue; + } if (info->state != background_thread_stopped) { num_runs += info->tot_n_runs; nstime_add(&stats->run_interval, &info->tot_sleep_time); From f7833d560d80604445ad34ad2ef9d829d0aef7d6 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 30 May 2019 12:51:32 +0300 Subject: [PATCH 098/304] make redis purge jemalloc after flush, and enable background purging thread jemalloc 5 doesn't immediately release memory back to the OS, instead there's a decaying mechanism, which doesn't work when there's no traffic (no allocations). this is most evident if there's no traffic after flushdb, the RSS will remain high. 1) enable jemalloc background purging 2) explicitly purge in flushdb --- src/config.c | 9 ++++++++ src/db.c | 14 ++++++++++++ src/debug.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/server.c | 2 ++ src/server.h | 1 + src/zmalloc.c | 34 ++++++++++++++++++++++++++++ src/zmalloc.h | 2 ++ 7 files changed, 124 insertions(+) diff --git a/src/config.c b/src/config.c index 7f0e9af89..16850a1fc 100644 --- a/src/config.c +++ b/src/config.c @@ -474,6 +474,10 @@ void loadServerConfigFromString(char *config) { err = "active defrag can't be enabled without proper jemalloc support"; goto loaderr; #endif } + } else if (!strcasecmp(argv[0],"jemalloc-bg-thread") && argc == 2) { + if ((server.jemalloc_bg_thread = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) { if ((server.daemonize = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -1152,6 +1156,9 @@ void configSetCommand(client *c) { return; } #endif + } config_set_bool_field( + "jemalloc-bg-thread",server.jemalloc_bg_thread) { + set_jemalloc_bg_thread(server.jemalloc_bg_thread); } config_set_bool_field( "protected-mode",server.protected_mode) { } config_set_bool_field( @@ -1487,6 +1494,7 @@ void configGetCommand(client *c) { config_get_bool_field("rdbchecksum", server.rdb_checksum); config_get_bool_field("activerehashing", server.activerehashing); config_get_bool_field("activedefrag", server.active_defrag_enabled); + config_get_bool_field("jemalloc-bg-thread", server.jemalloc_bg_thread); config_get_bool_field("protected-mode", server.protected_mode); config_get_bool_field("gopher-enabled", server.gopher_enabled); config_get_bool_field("io-threads-do-reads", server.io_threads_do_reads); @@ -2318,6 +2326,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES); rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,CONFIG_DEFAULT_ACTIVE_REHASHING); rewriteConfigYesNoOption(state,"activedefrag",server.active_defrag_enabled,CONFIG_DEFAULT_ACTIVE_DEFRAG); + rewriteConfigYesNoOption(state,"jemalloc-bg-thread",server.jemalloc_bg_thread,1); rewriteConfigYesNoOption(state,"protected-mode",server.protected_mode,CONFIG_DEFAULT_PROTECTED_MODE); rewriteConfigYesNoOption(state,"gopher-enabled",server.gopher_enabled,CONFIG_DEFAULT_GOPHER_ENABLED); rewriteConfigYesNoOption(state,"io-threads-do-reads",server.io_threads_do_reads,CONFIG_DEFAULT_IO_THREADS_DO_READS); diff --git a/src/db.c b/src/db.c index b537a29a4..50e23d6b2 100644 --- a/src/db.c +++ b/src/db.c @@ -441,6 +441,13 @@ void flushdbCommand(client *c) { signalFlushedDb(c->db->id); server.dirty += emptyDb(c->db->id,flags,NULL); addReply(c,shared.ok); +#if defined(USE_JEMALLOC) + /* jemalloc 5 doesn't release pages back to the OS when there's no traffic. + * for large databases, flushdb blocks for long anyway, so a bit more won't + * harm and this way the flush and purge will be synchroneus. */ + if (!(flags & EMPTYDB_ASYNC)) + jemalloc_purge(); +#endif } /* FLUSHALL [ASYNC] @@ -464,6 +471,13 @@ void flushallCommand(client *c) { server.dirty = saved_dirty; } server.dirty++; +#if defined(USE_JEMALLOC) + /* jemalloc 5 doesn't release pages back to the OS when there's no traffic. + * for large databases, flushdb blocks for long anyway, so a bit more won't + * harm and this way the flush and purge will be synchroneus. */ + if (!(flags & EMPTYDB_ASYNC)) + jemalloc_purge(); +#endif } /* This command implements DEL and LAZYDEL. */ diff --git a/src/debug.c b/src/debug.c index 0c6b5630c..c82c99b1f 100644 --- a/src/debug.c +++ b/src/debug.c @@ -297,6 +297,56 @@ void computeDatasetDigest(unsigned char *final) { } } +#ifdef USE_JEMALLOC +void mallctl_int(client *c, robj **argv, int argc) { + int ret; + /* start with the biggest size (int64), and if that fails, try smaller sizes (int32, bool) */ + int64_t old = 0, val; + if (argc > 1) { + long long ll; + if (getLongLongFromObjectOrReply(c, argv[1], &ll, NULL) != C_OK) + return; + val = ll; + } + size_t sz = sizeof(old); + while (sz > 0) { + if ((ret=je_mallctl(argv[0]->ptr, &old, &sz, argc > 1? &val: NULL, argc > 1?sz: 0))) { + if (ret==EINVAL) { + /* size might be wrong, try a smaller one */ + sz /= 2; +#if BYTE_ORDER == BIG_ENDIAN + val <<= 8*sz; +#endif + continue; + } + addReplyErrorFormat(c,"%s", strerror(ret)); + return; + } else { +#if BYTE_ORDER == BIG_ENDIAN + old >>= 64 - 8*sz; +#endif + addReplyLongLong(c, old); + return; + } + } + addReplyErrorFormat(c,"%s", strerror(EINVAL)); +} + +void mallctl_string(client *c, robj **argv, int argc) { + int ret; + char *old; + size_t sz = sizeof(old); + /* for strings, it seems we need to first get the old value, before overriding it. */ + if ((ret=je_mallctl(argv[0]->ptr, &old, &sz, NULL, 0))) { + addReplyErrorFormat(c,"%s", strerror(ret)); + return; + } + addReplyBulkCString(c, old); + if(argc > 1) + je_mallctl(argv[0]->ptr, NULL, 0, &argv[1]->ptr, sizeof(char*)); +} +#endif + void debugCommand(client *c) { if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { const char *help[] = { @@ -323,6 +373,10 @@ void debugCommand(client *c) { "STRUCTSIZE -- Return the size of different Redis core C structures.", "ZIPLIST -- Show low level info about the ziplist encoding.", "STRINGMATCH-TEST -- Run a fuzz tester against the stringmatchlen() function.", +#ifdef USE_JEMALLOC +"MALLCTL [] -- Get or set a malloc tunning integer.", +"MALLCTL-STR [] -- Get or set a malloc tunning string.", +#endif NULL }; addReplyHelp(c, help); @@ -676,6 +730,14 @@ NULL { stringmatchlen_fuzz_test(); addReplyStatus(c,"Apparently Redis did not crash: test passed"); +#ifdef USE_JEMALLOC + } else if(!strcasecmp(c->argv[1]->ptr,"mallctl") && c->argc >= 3) { + mallctl_int(c, c->argv+2, c->argc-2); + return; + } else if(!strcasecmp(c->argv[1]->ptr,"mallctl-str") && c->argc >= 3) { + mallctl_string(c, c->argv+2, c->argc-2); + return; +#endif } else { addReplySubcommandSyntaxError(c); return; diff --git a/src/server.c b/src/server.c index 4b87b6ac2..fa2c7b1ee 100644 --- a/src/server.c +++ b/src/server.c @@ -2230,6 +2230,7 @@ void initServerConfig(void) { server.maxidletime = CONFIG_DEFAULT_CLIENT_TIMEOUT; server.tcpkeepalive = CONFIG_DEFAULT_TCP_KEEPALIVE; server.active_expire_enabled = 1; + server.jemalloc_bg_thread = 1; server.active_defrag_enabled = CONFIG_DEFAULT_ACTIVE_DEFRAG; server.active_defrag_ignore_bytes = CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES; server.active_defrag_threshold_lower = CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER; @@ -2866,6 +2867,7 @@ void initServer(void) { latencyMonitorInit(); bioInit(); initThreadedIO(); + set_jemalloc_bg_thread(server.jemalloc_bg_thread); server.initial_memory_usage = zmalloc_used_memory(); } diff --git a/src/server.h b/src/server.h index 0813f8bd1..4ae079ff2 100644 --- a/src/server.h +++ b/src/server.h @@ -1129,6 +1129,7 @@ struct redisServer { int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */ int active_expire_enabled; /* Can be disabled for testing purposes. */ int active_defrag_enabled; + int jemalloc_bg_thread; /* Enable jemalloc background thread */ size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */ int active_defrag_threshold_lower; /* minimum percentage of fragmentation to start active defrag */ int active_defrag_threshold_upper; /* maximum percentage of fragmentation at which we use maximum effort */ diff --git a/src/zmalloc.c b/src/zmalloc.c index 5e6010278..58896a727 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -306,6 +306,7 @@ size_t zmalloc_get_rss(void) { #endif #if defined(USE_JEMALLOC) + int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident) { @@ -327,13 +328,46 @@ int zmalloc_get_allocator_info(size_t *allocated, je_mallctl("stats.allocated", allocated, &sz, NULL, 0); return 1; } + +void set_jemalloc_bg_thread(int enable) { + /* let jemalloc do purging asynchronously, required when there's no traffic + * after flushdb */ + if (enable) { + char val = 1; + je_mallctl("background_thread", NULL, 0, &val, 1); + } +} + +int jemalloc_purge() { + /* return all unused (reserved) pages to the OS */ + char tmp[32]; + unsigned narenas = 0; + size_t sz = sizeof(unsigned); + if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) { + sprintf(tmp, "arena.%d.purge", narenas); + if (!je_mallctl(tmp, NULL, 0, NULL, 0)) + return 0; + } + return -1; +} + #else + int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident) { *allocated = *resident = *active = 0; return 1; } + +void set_jemalloc_bg_thread(int enable) { + ((void)(enable)); +} + +int jemalloc_purge() { + return 0; +} + #endif /* Get the sum of the specified field (converted form kb to bytes) in diff --git a/src/zmalloc.h b/src/zmalloc.h index 6fb19b046..b136a910d 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -86,6 +86,8 @@ size_t zmalloc_used_memory(void); void zmalloc_set_oom_handler(void (*oom_handler)(size_t)); size_t zmalloc_get_rss(void); int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident); +void set_jemalloc_bg_thread(int enable); +int jemalloc_purge(); size_t zmalloc_get_private_dirty(long pid); size_t zmalloc_get_smap_bytes_by_field(char *field, long pid); size_t zmalloc_get_memory_size(void); From ec45f5b39501e44adb8f20764c472d210a21ac54 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 5 Jun 2019 16:34:55 +0200 Subject: [PATCH 099/304] Redis Benchmark: prevent CONFIG failure from exiting program --- src/redis-benchmark.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/redis-benchmark.c b/src/redis-benchmark.c index 2785167a8..1d16fa4ee 100644 --- a/src/redis-benchmark.c +++ b/src/redis-benchmark.c @@ -1540,7 +1540,10 @@ int main(int argc, const char **argv) { if (node->name) printf("%s ", node->name); printf("%s:%d\n", node->ip, node->port); node->redis_config = getRedisConfig(node->ip, node->port, NULL); - if (node->redis_config == NULL) exit(1); + if (node->redis_config == NULL) { + fprintf(stderr, "WARN: could not fetch node CONFIG %s:%d\n", + node->ip, node->port); + } } printf("\n"); /* Automatically set thread number to node count if not specified @@ -1550,7 +1553,8 @@ int main(int argc, const char **argv) { } else { config.redis_config = getRedisConfig(config.hostip, config.hostport, config.hostsocket); - if (config.redis_config == NULL) exit(1); + if (config.redis_config == NULL) + fprintf(stderr, "WARN: could not fetch server CONFIG\n"); } if (config.num_threads > 0) { From 67a4bcac1bbab76e4e872299b32cdbfd995c0eac Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Fri, 7 Jun 2019 13:20:22 -0700 Subject: [PATCH 100/304] Fixed some spelling issues in ACL codepath including user facing error --- src/acl.c | 22 +++++++++++----------- src/server.c | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/acl.c b/src/acl.c index 0205e51ad..a2ee65dd0 100644 --- a/src/acl.c +++ b/src/acl.c @@ -295,7 +295,7 @@ int ACLGetCommandBitCoordinates(uint64_t id, uint64_t *word, uint64_t *bit) { * Note that this function does not check the ALLCOMMANDS flag of the user * but just the lowlevel bitmask. * - * If the bit overflows the user internal represetation, zero is returned + * If the bit overflows the user internal representation, zero is returned * in order to disallow the execution of the command in such edge case. */ int ACLGetUserCommandBit(user *u, unsigned long id) { uint64_t word, bit; @@ -311,7 +311,7 @@ int ACLUserCanExecuteFutureCommands(user *u) { } /* Set the specified command bit for the specified user to 'value' (0 or 1). - * If the bit overflows the user internal represetation, no operation + * If the bit overflows the user internal representation, no operation * is performed. As a side effect of calling this function with a value of * zero, the user flag ALLCOMMANDS is cleared since it is no longer possible * to skip the command bit explicit test. */ @@ -350,7 +350,7 @@ int ACLSetUserCommandBitsForCategory(user *u, const char *category, int value) { /* Return the number of commands allowed (on) and denied (off) for the user 'u' * in the subset of commands flagged with the specified category name. - * If the categoty name is not valid, C_ERR is returend, otherwise C_OK is + * If the category name is not valid, C_ERR is returned, otherwise C_OK is * returned and on and off are populated by reference. */ int ACLCountCategoryBitsForUser(user *u, unsigned long *on, unsigned long *off, const char *category) @@ -626,7 +626,7 @@ void ACLAddAllowedSubcommand(user *u, unsigned long id, const char *sub) { * It is possible to specify multiple patterns. * allkeys Alias for ~* * resetkeys Flush the list of allowed keys patterns. - * > Add this passowrd to the list of valid password for the user. + * > Add this password to the list of valid password for the user. * For example >mypass will add "mypass" to the list. * This directive clears the "nopass" flag (see later). * < Remove this password from the list of valid passwords. @@ -949,9 +949,9 @@ user *ACLGetUserByName(const char *name, size_t namelen) { return myuser; } -/* Check if the command ready to be excuted in the client 'c', and already - * referenced by c->cmd, can be executed by this client according to the - * ACls associated to the client user c->user. +/* Check if the command is ready to be executed in the client 'c', already + * referenced by c->cmd, and can be executed by this client according to the + * ACLs associated to the client user c->user. * * If the user can execute the command ACL_OK is returned, otherwise * ACL_DENIED_CMD or ACL_DENIED_KEY is returned: the first in case the @@ -1122,7 +1122,7 @@ int ACLLoadConfiguredUsers(void) { } /* This function loads the ACL from the specified filename: every line - * is validated and shold be either empty or in the format used to specify + * is validated and should be either empty or in the format used to specify * users in the redis.conf configuration or in the ACL file, that is: * * user ... rules ... @@ -1172,7 +1172,7 @@ sds ACLLoadFromFile(const char *filename) { * to the real user mentioned in the ACL line. */ user *fakeuser = ACLCreateUnlinkedUser(); - /* We do all the loading in a fresh insteance of the Users radix tree, + /* We do all the loading in a fresh instance of the Users radix tree, * so if there are errors loading the ACL file we can rollback to the * old version. */ rax *old_users = Users; @@ -1248,7 +1248,7 @@ sds ACLLoadFromFile(const char *filename) { } /* Note that the same rules already applied to the fake user, so - * we just assert that everything goess well: it should. */ + * we just assert that everything goes well: it should. */ for (j = 2; j < argc; j++) serverAssert(ACLSetUser(u,argv[j],sdslen(argv[j])) == C_OK); @@ -1611,7 +1611,7 @@ void addReplyCommandCategories(client *c, struct redisCommand *cmd) { setDeferredSetLen(c, flaglen, flagcount); } -/* AUTH +/* AUTH * AUTH (Redis >= 6.0 form) * * When the user is omitted it means that we are trying to authenticate diff --git a/src/server.c b/src/server.c index 2643d7266..e4df04692 100644 --- a/src/server.c +++ b/src/server.c @@ -3325,7 +3325,7 @@ int processCommand(client *c) { if (acl_retval == ACL_DENIED_CMD) addReplyErrorFormat(c, "-NOPERM this user has no permissions to run " - "the '%s' command or its subcommnad", c->cmd->name); + "the '%s' command or its subcommand", c->cmd->name); else addReplyErrorFormat(c, "-NOPERM this user has no permissions to access " From 5ca48db2e2621984b5312715b0ff66e9a0bdfa92 Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Mon, 10 Jun 2019 17:41:44 +0100 Subject: [PATCH 101/304] Add char* typeNameCanonicalize(robj*) to remove duplicate code between SCAN and TYPE commands, and to keep OBJ_* enum to string canonicalization in one place. --- src/db.c | 37 +++++++++++-------------------------- src/server.h | 6 ++++++ 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/src/db.c b/src/db.c index 6623f7f2f..6557ddc3c 100644 --- a/src/db.c +++ b/src/db.c @@ -766,26 +766,8 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { /* Filter an element if it isn't the type we want. */ if (!filter && o == NULL && typename){ - robj* typecheck; - char *type; - typecheck = lookupKeyReadWithFlags(c->db, kobj, LOOKUP_NOTOUCH); - if (typecheck == NULL) { - type = "none"; - } else { - switch(typecheck->type) { - case OBJ_STRING: type = "string"; break; - case OBJ_LIST: type = "list"; break; - case OBJ_SET: type = "set"; break; - case OBJ_ZSET: type = "zset"; break; - case OBJ_HASH: type = "hash"; break; - case OBJ_STREAM: type = "stream"; break; - case OBJ_MODULE: { - moduleValue *mv = typecheck->ptr; - type = mv->type->name; - }; break; - default: type = "unknown"; break; - } - } + robj* typecheck = lookupKeyReadWithFlags(c->db, kobj, LOOKUP_NOTOUCH); + char* type = typeNameCanonicalize(typecheck); if (strcasecmp((char*) typename, type)) filter = 1; } @@ -845,11 +827,8 @@ void lastsaveCommand(client *c) { addReplyLongLong(c,server.lastsave); } -void typeCommand(client *c) { - robj *o; - char *type; - - o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH); +char* typeNameCanonicalize(robj *o) { + char* type; if (o == NULL) { type = "none"; } else { @@ -867,7 +846,13 @@ void typeCommand(client *c) { default: type = "unknown"; break; } } - addReplyStatus(c,type); + return type; +} + +void typeCommand(client *c) { + robj *o; + o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH); + addReplyStatus(c, typeNameCanonicalize(o)); } void shutdownCommand(client *c) { diff --git a/src/server.h b/src/server.h index 0813f8bd1..06d0611fd 100644 --- a/src/server.h +++ b/src/server.h @@ -646,6 +646,12 @@ typedef struct redisObject { void *ptr; } robj; +/* The 'cannonical' name for a type as enumerated above is given by the + * below function. Native types are checked against the OBJ_STRING, + * OBJ_LIST, OBJ_* defines, and Module types have their registered name + * returned.*/ +char* typeNameCanonicalize(robj*); + /* Macro used to initialize a Redis object allocated on the stack. * Note that this macro is taken near the structure definition to make sure * we'll update it when the structure is changed, to avoid bugs like From 7077d14afeaba6f65306248a460575a10180ed6c Mon Sep 17 00:00:00 2001 From: swilly22 Date: Wed, 12 Jun 2019 15:37:19 +0300 Subject: [PATCH 102/304] Extend REDISMODULE_CTX_FLAGS to indicate if redis is currently loading from either RDB or AOF --- src/module.c | 3 +++ src/redismodule.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/module.c b/src/module.c index 7dee7e776..1cdd94d1d 100644 --- a/src/module.c +++ b/src/module.c @@ -1455,6 +1455,9 @@ int RM_GetContextFlags(RedisModuleCtx *ctx) { if (server.cluster_enabled) flags |= REDISMODULE_CTX_FLAGS_CLUSTER; + if (server.loading) + flags |= REDISMODULE_CTX_FLAGS_LOADING; + /* Maxmemory and eviction policy */ if (server.maxmemory > 0) { flags |= REDISMODULE_CTX_FLAGS_MAXMEMORY; diff --git a/src/redismodule.h b/src/redismodule.h index 259a5f1db..16b8c1937 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -87,6 +87,8 @@ #define REDISMODULE_CTX_FLAGS_OOM_WARNING (1<<11) /* The command was sent over the replication link. */ #define REDISMODULE_CTX_FLAGS_REPLICATED (1<<12) +/* Redis is currently loading either from AOF or RDB. */ +#define REDISMODULE_CTX_FLAGS_LOADING (1<<13) #define REDISMODULE_NOTIFY_GENERIC (1<<2) /* g */ From 49edda41b96b2f4586a1a5020180a0a0941bafbc Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Thu, 13 Jun 2019 17:49:33 +0100 Subject: [PATCH 103/304] Spelling cannonical -> canonical --- src/server.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.h b/src/server.h index 06d0611fd..dc02edb5c 100644 --- a/src/server.h +++ b/src/server.h @@ -646,7 +646,7 @@ typedef struct redisObject { void *ptr; } robj; -/* The 'cannonical' name for a type as enumerated above is given by the +/* The 'canonical' name for a type as enumerated above is given by the * below function. Native types are checked against the OBJ_STRING, * OBJ_LIST, OBJ_* defines, and Module types have their registered name * returned.*/ From bf7eb02d3e8841f860119d0e566bed1d82ac5fb6 Mon Sep 17 00:00:00 2001 From: Madelyn Olson Date: Fri, 31 May 2019 12:05:18 -0700 Subject: [PATCH 104/304] Refactored yesno configs so there was less duplication --- src/config.c | 354 +++++++++++++-------------------------------------- 1 file changed, 89 insertions(+), 265 deletions(-) diff --git a/src/config.c b/src/config.c index 7f0e9af89..2e6e9a6b7 100644 --- a/src/config.c +++ b/src/config.c @@ -98,6 +98,48 @@ clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = { {1024*1024*32, 1024*1024*8, 60} /* pubsub */ }; +/* Configuration values that require no special handling to set, get, load or + * rewrite. */ +typedef struct configYesNo { + const char *name; /* The user visible name of this config */ + const char *alias; /* An alias that can also be used for this config */ + int *config; /* The pointer to the server config this value is stored in */ + const int modifiable; /* Can this value be updated by CONFIG SET? */ + const int default_value; /* The default value of the config on rewrite */ +} configYesNo; + +configYesNo configs_yesno[] = { + /* Non-Modifiable */ + {"rdbchecksum",NULL,&server.rdb_checksum,0,CONFIG_DEFAULT_RDB_CHECKSUM}, + {"daemonize",NULL,&server.daemonize,0,0}, + {"io-threads-do-reads",NULL,&server.io_threads_do_reads, 0, CONFIG_DEFAULT_IO_THREADS_DO_READS}, + {"always-show-logo",NULL,&server.always_show_logo,0,CONFIG_DEFAULT_ALWAYS_SHOW_LOGO}, + /* Modifiable */ + {"protected-mode",NULL,&server.protected_mode,1,CONFIG_DEFAULT_PROTECTED_MODE}, + {"rdbcompression",NULL,&server.rdb_compression,1,CONFIG_DEFAULT_RDB_COMPRESSION}, + {"activerehashing",NULL,&server.activerehashing,1,CONFIG_DEFAULT_ACTIVE_REHASHING}, + {"stop-writes-on-bgsave-error",NULL,&server.stop_writes_on_bgsave_err,1,CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR}, + {"dynamic-hz",NULL,&server.dynamic_hz,1,CONFIG_DEFAULT_DYNAMIC_HZ}, + {"lazyfree-lazy-eviction",NULL,&server.lazyfree_lazy_eviction,1,CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION}, + {"lazyfree-lazy-expire",NULL,&server.lazyfree_lazy_expire,1,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE}, + {"lazyfree-lazy-server-del",NULL,&server.lazyfree_lazy_server_del,1,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL}, + {"repl-disable-tcp-nodelay",NULL,&server.repl_disable_tcp_nodelay,1,CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY}, + {"repl-diskless-sync",NULL,&server.repl_diskless_sync,1,CONFIG_DEFAULT_REPL_DISKLESS_SYNC}, + {"gopher-enabled",NULL,&server.gopher_enabled,1,CONFIG_DEFAULT_GOPHER_ENABLED}, + {"aof-rewrite-incremental-fsync",NULL,&server.aof_rewrite_incremental_fsync,1,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC}, + {"no-appendfsync-on-rewrite",NULL,&server.aof_no_fsync_on_rewrite,1,CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE}, + {"cluster-require-full-coverage",NULL,&server.cluster_require_full_coverage,CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE}, + {"rdb-save-incremental-fsync",NULL,&server.rdb_save_incremental_fsync,1,CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC}, + {"aof-load-truncated",NULL,&server.aof_load_truncated,1,CONFIG_DEFAULT_AOF_LOAD_TRUNCATED}, + {"aof-use-rdb-preamble",NULL,&server.aof_use_rdb_preamble,1,CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE}, + {"cluster-replica-no-failover","cluster-slave-no-failover",&server.cluster_slave_no_failover,1,CLUSTER_DEFAULT_SLAVE_NO_FAILOVER}, + {"replica-lazy-flush","slave-lazy-flush",&server.repl_slave_lazy_flush,1,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH}, + {"replica-serve-stale-data","slave-serve-stale-data",&server.repl_serve_stale_data,1,CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA}, + {"replica-read-only","slave-read-only",&server.repl_slave_ro,1,CONFIG_DEFAULT_SLAVE_READ_ONLY}, + {"replica-ignore-maxmemory","slave-ignore-maxmemory",&server.repl_slave_ignore_maxmemory,1,CONFIG_DEFAULT_SLAVE_IGNORE_MAXMEMORY}, + {NULL, NULL, 0, 0} +}; + /*----------------------------------------------------------------------------- * Enum access functions *----------------------------------------------------------------------------*/ @@ -201,6 +243,26 @@ void loadServerConfigFromString(char *config) { } sdstolower(argv[0]); + /* Iterate the configs that are standard */ + int match = 0; + for (configYesNo *config = configs_yesno; config->name != NULL; config++) { + if ((!strcasecmp(argv[0],config->name) || + (config->alias && !strcasecmp(argv[0],config->alias))) && + (argc == 2)) + { + if ((*(config->config) = yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } + match = 1; + break; + } + } + + if (match) { + sdsfreesplitres(argv,argc); + continue; + } + /* Execute config directives */ if (!strcasecmp(argv[0],"timeout") && argc == 2) { server.maxidletime = atoi(argv[1]); @@ -212,14 +274,6 @@ void loadServerConfigFromString(char *config) { if (server.tcpkeepalive < 0) { err = "Invalid tcp-keepalive value"; goto loaderr; } - } else if (!strcasecmp(argv[0],"protected-mode") && argc == 2) { - if ((server.protected_mode = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"gopher-enabled") && argc == 2) { - if ((server.gopher_enabled = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"port") && argc == 2) { server.port = atoi(argv[1]); if (server.port < 0 || server.port > 65535) { @@ -290,10 +344,6 @@ void loadServerConfigFromString(char *config) { } else if (!strcasecmp(argv[0],"aclfile") && argc == 2) { zfree(server.acl_filename); server.acl_filename = zstrdup(argv[1]); - } else if (!strcasecmp(argv[0],"always-show-logo") && argc == 2) { - if ((server.always_show_logo = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"syslog-enabled") && argc == 2) { if ((server.syslog_enabled = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -318,10 +368,6 @@ void loadServerConfigFromString(char *config) { if (server.io_threads_num < 1 || server.io_threads_num > 512) { err = "Invalid number of I/O threads"; goto loaderr; } - } else if (!strcasecmp(argv[0],"io-threads-do-reads") && argc == 2) { - if ((server.io_threads_do_reads = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"include") && argc == 2) { loadServerConfig(argv[1],NULL); } else if (!strcasecmp(argv[0],"maxclients") && argc == 2) { @@ -381,14 +427,6 @@ void loadServerConfigFromString(char *config) { err = "repl-timeout must be 1 or greater"; goto loaderr; } - } else if (!strcasecmp(argv[0],"repl-disable-tcp-nodelay") && argc==2) { - if ((server.repl_disable_tcp_nodelay = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"repl-diskless-sync") && argc==2) { - if ((server.repl_diskless_sync = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"repl-diskless-sync-delay") && argc==2) { server.repl_diskless_sync_delay = atoi(argv[1]); if (server.repl_diskless_sync_delay < 0) { @@ -414,57 +452,6 @@ void loadServerConfigFromString(char *config) { } else if (!strcasecmp(argv[0],"masterauth") && argc == 2) { zfree(server.masterauth); server.masterauth = argv[1][0] ? zstrdup(argv[1]) : NULL; - } else if ((!strcasecmp(argv[0],"slave-serve-stale-data") || - !strcasecmp(argv[0],"replica-serve-stale-data")) - && argc == 2) - { - if ((server.repl_serve_stale_data = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if ((!strcasecmp(argv[0],"slave-read-only") || - !strcasecmp(argv[0],"replica-read-only")) - && argc == 2) - { - if ((server.repl_slave_ro = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if ((!strcasecmp(argv[0],"slave-ignore-maxmemory") || - !strcasecmp(argv[0],"replica-ignore-maxmemory")) - && argc == 2) - { - if ((server.repl_slave_ignore_maxmemory = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"rdbcompression") && argc == 2) { - if ((server.rdb_compression = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"rdbchecksum") && argc == 2) { - if ((server.rdb_checksum = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"activerehashing") && argc == 2) { - if ((server.activerehashing = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"lazyfree-lazy-eviction") && argc == 2) { - if ((server.lazyfree_lazy_eviction = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"lazyfree-lazy-expire") && argc == 2) { - if ((server.lazyfree_lazy_expire = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"lazyfree-lazy-server-del") && argc == 2){ - if ((server.lazyfree_lazy_server_del = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if ((!strcasecmp(argv[0],"slave-lazy-flush") || - !strcasecmp(argv[0],"replica-lazy-flush")) && argc == 2) - { - if ((server.repl_slave_lazy_flush = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"activedefrag") && argc == 2) { if ((server.active_defrag_enabled = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -474,14 +461,6 @@ void loadServerConfigFromString(char *config) { err = "active defrag can't be enabled without proper jemalloc support"; goto loaderr; #endif } - } else if (!strcasecmp(argv[0],"daemonize") && argc == 2) { - if ((server.daemonize = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"dynamic-hz") && argc == 2) { - if ((server.dynamic_hz = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"hz") && argc == 2) { server.config_hz = atoi(argv[1]); if (server.config_hz < CONFIG_MIN_HZ) server.config_hz = CONFIG_MIN_HZ; @@ -500,11 +479,6 @@ void loadServerConfigFromString(char *config) { } zfree(server.aof_filename); server.aof_filename = zstrdup(argv[1]); - } else if (!strcasecmp(argv[0],"no-appendfsync-on-rewrite") - && argc == 2) { - if ((server.aof_no_fsync_on_rewrite= yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) { server.aof_fsync = configEnumGetValue(aof_fsync_enum,argv[1]); if (server.aof_fsync == INT_MIN) { @@ -523,28 +497,6 @@ void loadServerConfigFromString(char *config) { argc == 2) { server.aof_rewrite_min_size = memtoll(argv[1],NULL); - } else if (!strcasecmp(argv[0],"aof-rewrite-incremental-fsync") && - argc == 2) - { - if ((server.aof_rewrite_incremental_fsync = - yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"rdb-save-incremental-fsync") && - argc == 2) - { - if ((server.rdb_save_incremental_fsync = - yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"aof-load-truncated") && argc == 2) { - if ((server.aof_load_truncated = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } - } else if (!strcasecmp(argv[0],"aof-use-rdb-preamble") && argc == 2) { - if ((server.aof_use_rdb_preamble = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) { if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) { err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN"; @@ -678,13 +630,6 @@ void loadServerConfigFromString(char *config) { { err = "Invalid port"; goto loaderr; } - } else if (!strcasecmp(argv[0],"cluster-require-full-coverage") && - argc == 2) - { - if ((server.cluster_require_full_coverage = yesnotoi(argv[1])) == -1) - { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if (!strcasecmp(argv[0],"cluster-node-timeout") && argc == 2) { server.cluster_node_timeout = strtoll(argv[1],NULL,10); if (server.cluster_node_timeout <= 0) { @@ -707,15 +652,6 @@ void loadServerConfigFromString(char *config) { err = "cluster replica validity factor must be zero or positive"; goto loaderr; } - } else if ((!strcasecmp(argv[0],"cluster-slave-no-failover") || - !strcasecmp(argv[0],"cluster-replica-no-failover")) && - argc == 2) - { - server.cluster_slave_no_failover = yesnotoi(argv[1]); - if (server.cluster_slave_no_failover == -1) { - err = "argument must be 'yes' or 'no'"; - goto loaderr; - } } else if (!strcasecmp(argv[0],"lua-time-limit") && argc == 2) { server.lua_time_limit = strtoll(argv[1],NULL,10); } else if (!strcasecmp(argv[0],"lua-replicate-commands") && argc == 2) { @@ -756,11 +692,6 @@ void loadServerConfigFromString(char *config) { server.client_obuf_limits[class].hard_limit_bytes = hard; server.client_obuf_limits[class].soft_limit_bytes = soft; server.client_obuf_limits[class].soft_limit_seconds = soft_seconds; - } else if (!strcasecmp(argv[0],"stop-writes-on-bgsave-error") && - argc == 2) { - if ((server.stop_writes_on_bgsave_err = yesnotoi(argv[1])) == -1) { - err = "argument must be 'yes' or 'no'"; goto loaderr; - } } else if ((!strcasecmp(argv[0],"slave-priority") || !strcasecmp(argv[0],"replica-priority")) && argc == 2) { @@ -941,6 +872,19 @@ void configSetCommand(client *c) { serverAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3])); o = c->argv[3]; + /* Iterate the configs that are standard */ + for (configYesNo *config = configs_yesno; config->name != NULL; config++) { + if(config->modifiable && (!strcasecmp(c->argv[2]->ptr,config->name) || + (config->alias && !strcasecmp(c->argv[2]->ptr,config->alias)))) + { + int yn = yesnotoi(o->ptr); + if (yn == -1) goto badfmt; + *(config->config) = yn; + addReply(c,shared.ok); + return; + } + } + if (0) { /* this starts the config_set macros else-if chain. */ /* Special fields that can't be handled with general macros. */ @@ -1105,40 +1049,6 @@ void configSetCommand(client *c) { /* Boolean fields. * config_set_bool_field(name,var). */ - } config_set_bool_field( - "rdbcompression", server.rdb_compression) { - } config_set_bool_field( - "repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay) { - } config_set_bool_field( - "repl-diskless-sync",server.repl_diskless_sync) { - } config_set_bool_field( - "cluster-require-full-coverage",server.cluster_require_full_coverage) { - } config_set_bool_field( - "cluster-slave-no-failover",server.cluster_slave_no_failover) { - } config_set_bool_field( - "cluster-replica-no-failover",server.cluster_slave_no_failover) { - } config_set_bool_field( - "aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync) { - } config_set_bool_field( - "rdb-save-incremental-fsync",server.rdb_save_incremental_fsync) { - } config_set_bool_field( - "aof-load-truncated",server.aof_load_truncated) { - } config_set_bool_field( - "aof-use-rdb-preamble",server.aof_use_rdb_preamble) { - } config_set_bool_field( - "slave-serve-stale-data",server.repl_serve_stale_data) { - } config_set_bool_field( - "replica-serve-stale-data",server.repl_serve_stale_data) { - } config_set_bool_field( - "slave-read-only",server.repl_slave_ro) { - } config_set_bool_field( - "replica-read-only",server.repl_slave_ro) { - } config_set_bool_field( - "slave-ignore-maxmemory",server.repl_slave_ignore_maxmemory) { - } config_set_bool_field( - "replica-ignore-maxmemory",server.repl_slave_ignore_maxmemory) { - } config_set_bool_field( - "activerehashing",server.activerehashing) { } config_set_bool_field( "activedefrag",server.active_defrag_enabled) { #ifndef HAVE_DEFRAG @@ -1152,27 +1062,6 @@ void configSetCommand(client *c) { return; } #endif - } config_set_bool_field( - "protected-mode",server.protected_mode) { - } config_set_bool_field( - "gopher-enabled",server.gopher_enabled) { - } config_set_bool_field( - "stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err) { - } config_set_bool_field( - "lazyfree-lazy-eviction",server.lazyfree_lazy_eviction) { - } config_set_bool_field( - "lazyfree-lazy-expire",server.lazyfree_lazy_expire) { - } config_set_bool_field( - "lazyfree-lazy-server-del",server.lazyfree_lazy_server_del) { - } config_set_bool_field( - "slave-lazy-flush",server.repl_slave_lazy_flush) { - } config_set_bool_field( - "replica-lazy-flush",server.repl_slave_lazy_flush) { - } config_set_bool_field( - "no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite) { - } config_set_bool_field( - "dynamic-hz",server.dynamic_hz) { - /* Numerical fields. * config_set_numerical_field(name,var,min,max) */ } config_set_numerical_field( @@ -1460,60 +1349,15 @@ void configGetCommand(client *c) { config_get_numerical_field("tcp-keepalive",server.tcpkeepalive); /* Bool (yes/no) values */ - config_get_bool_field("cluster-require-full-coverage", - server.cluster_require_full_coverage); - config_get_bool_field("cluster-slave-no-failover", - server.cluster_slave_no_failover); - config_get_bool_field("cluster-replica-no-failover", - server.cluster_slave_no_failover); - config_get_bool_field("no-appendfsync-on-rewrite", - server.aof_no_fsync_on_rewrite); - config_get_bool_field("slave-serve-stale-data", - server.repl_serve_stale_data); - config_get_bool_field("replica-serve-stale-data", - server.repl_serve_stale_data); - config_get_bool_field("slave-read-only", - server.repl_slave_ro); - config_get_bool_field("replica-read-only", - server.repl_slave_ro); - config_get_bool_field("slave-ignore-maxmemory", - server.repl_slave_ignore_maxmemory); - config_get_bool_field("replica-ignore-maxmemory", - server.repl_slave_ignore_maxmemory); - config_get_bool_field("stop-writes-on-bgsave-error", - server.stop_writes_on_bgsave_err); - config_get_bool_field("daemonize", server.daemonize); - config_get_bool_field("rdbcompression", server.rdb_compression); - config_get_bool_field("rdbchecksum", server.rdb_checksum); - config_get_bool_field("activerehashing", server.activerehashing); + /* Iterate the configs that are standard */ + for (configYesNo *config = configs_yesno; config->name != NULL; config++) { + config_get_bool_field(config->name, *(config->config)); + if (config->alias) { + config_get_bool_field(config->alias, *(config->config)); + } + } + config_get_bool_field("activedefrag", server.active_defrag_enabled); - config_get_bool_field("protected-mode", server.protected_mode); - config_get_bool_field("gopher-enabled", server.gopher_enabled); - config_get_bool_field("io-threads-do-reads", server.io_threads_do_reads); - config_get_bool_field("repl-disable-tcp-nodelay", - server.repl_disable_tcp_nodelay); - config_get_bool_field("repl-diskless-sync", - server.repl_diskless_sync); - config_get_bool_field("aof-rewrite-incremental-fsync", - server.aof_rewrite_incremental_fsync); - config_get_bool_field("rdb-save-incremental-fsync", - server.rdb_save_incremental_fsync); - config_get_bool_field("aof-load-truncated", - server.aof_load_truncated); - config_get_bool_field("aof-use-rdb-preamble", - server.aof_use_rdb_preamble); - config_get_bool_field("lazyfree-lazy-eviction", - server.lazyfree_lazy_eviction); - config_get_bool_field("lazyfree-lazy-expire", - server.lazyfree_lazy_expire); - config_get_bool_field("lazyfree-lazy-server-del", - server.lazyfree_lazy_server_del); - config_get_bool_field("slave-lazy-flush", - server.repl_slave_lazy_flush); - config_get_bool_field("replica-lazy-flush", - server.repl_slave_lazy_flush); - config_get_bool_field("dynamic-hz", - server.dynamic_hz); /* Enum values */ config_get_enum_field("maxmemory-policy", @@ -1858,7 +1702,7 @@ void rewriteConfigBytesOption(struct rewriteConfigState *state, char *option, lo } /* Rewrite a yes/no option. */ -void rewriteConfigYesNoOption(struct rewriteConfigState *state, char *option, int value, int defvalue) { +void rewriteConfigYesNoOption(struct rewriteConfigState *state, const char *option, int value, int defvalue) { int force = value != defvalue; sds line = sdscatprintf(sdsempty(),"%s %s",option, value ? "yes" : "no"); @@ -2228,7 +2072,11 @@ int rewriteConfig(char *path) { /* Step 2: rewrite every single option, replacing or appending it inside * the rewrite state. */ - rewriteConfigYesNoOption(state,"daemonize",server.daemonize,0); + /* Iterate the configs that are standard */ + for (configYesNo *config = configs_yesno; config->name != NULL; config++) { + rewriteConfigYesNoOption(state,config->name,*(config->config),config->default_value); + } + rewriteConfigStringOption(state,"pidfile",server.pidfile,CONFIG_DEFAULT_PID_FILE); rewriteConfigNumericalOption(state,"port",server.port,CONFIG_DEFAULT_SERVER_PORT); rewriteConfigNumericalOption(state,"cluster-announce-port",server.cluster_announce_port,CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT); @@ -2250,9 +2098,6 @@ int rewriteConfig(char *path) { rewriteConfigUserOption(state); rewriteConfigNumericalOption(state,"databases",server.dbnum,CONFIG_DEFAULT_DBNUM); rewriteConfigNumericalOption(state,"io-threads",server.dbnum,CONFIG_DEFAULT_IO_THREADS_NUM); - rewriteConfigYesNoOption(state,"stop-writes-on-bgsave-error",server.stop_writes_on_bgsave_err,CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR); - rewriteConfigYesNoOption(state,"rdbcompression",server.rdb_compression,CONFIG_DEFAULT_RDB_COMPRESSION); - rewriteConfigYesNoOption(state,"rdbchecksum",server.rdb_checksum,CONFIG_DEFAULT_RDB_CHECKSUM); rewriteConfigStringOption(state,"dbfilename",server.rdb_filename,CONFIG_DEFAULT_RDB_FILENAME); rewriteConfigDirOption(state); rewriteConfigSlaveofOption(state,"replicaof"); @@ -2260,15 +2105,10 @@ int rewriteConfig(char *path) { rewriteConfigStringOption(state,"masteruser",server.masteruser,NULL); rewriteConfigStringOption(state,"masterauth",server.masterauth,NULL); rewriteConfigStringOption(state,"cluster-announce-ip",server.cluster_announce_ip,NULL); - rewriteConfigYesNoOption(state,"replica-serve-stale-data",server.repl_serve_stale_data,CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA); - rewriteConfigYesNoOption(state,"replica-read-only",server.repl_slave_ro,CONFIG_DEFAULT_SLAVE_READ_ONLY); - rewriteConfigYesNoOption(state,"replica-ignore-maxmemory",server.repl_slave_ignore_maxmemory,CONFIG_DEFAULT_SLAVE_IGNORE_MAXMEMORY); rewriteConfigNumericalOption(state,"repl-ping-replica-period",server.repl_ping_slave_period,CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD); rewriteConfigNumericalOption(state,"repl-timeout",server.repl_timeout,CONFIG_DEFAULT_REPL_TIMEOUT); rewriteConfigBytesOption(state,"repl-backlog-size",server.repl_backlog_size,CONFIG_DEFAULT_REPL_BACKLOG_SIZE); rewriteConfigBytesOption(state,"repl-backlog-ttl",server.repl_backlog_time_limit,CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT); - rewriteConfigYesNoOption(state,"repl-disable-tcp-nodelay",server.repl_disable_tcp_nodelay,CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY); - rewriteConfigYesNoOption(state,"repl-diskless-sync",server.repl_diskless_sync,CONFIG_DEFAULT_REPL_DISKLESS_SYNC); rewriteConfigNumericalOption(state,"repl-diskless-sync-delay",server.repl_diskless_sync_delay,CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY); rewriteConfigNumericalOption(state,"replica-priority",server.slave_priority,CONFIG_DEFAULT_SLAVE_PRIORITY); rewriteConfigNumericalOption(state,"min-replicas-to-write",server.repl_min_slaves_to_write,CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE); @@ -2291,14 +2131,11 @@ int rewriteConfig(char *path) { rewriteConfigYesNoOption(state,"appendonly",server.aof_state != AOF_OFF,0); rewriteConfigStringOption(state,"appendfilename",server.aof_filename,CONFIG_DEFAULT_AOF_FILENAME); rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,aof_fsync_enum,CONFIG_DEFAULT_AOF_FSYNC); - rewriteConfigYesNoOption(state,"no-appendfsync-on-rewrite",server.aof_no_fsync_on_rewrite,CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE); rewriteConfigNumericalOption(state,"auto-aof-rewrite-percentage",server.aof_rewrite_perc,AOF_REWRITE_PERC); rewriteConfigBytesOption(state,"auto-aof-rewrite-min-size",server.aof_rewrite_min_size,AOF_REWRITE_MIN_SIZE); rewriteConfigNumericalOption(state,"lua-time-limit",server.lua_time_limit,LUA_SCRIPT_TIME_LIMIT); rewriteConfigYesNoOption(state,"cluster-enabled",server.cluster_enabled,0); rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,CONFIG_DEFAULT_CLUSTER_CONFIG_FILE); - rewriteConfigYesNoOption(state,"cluster-require-full-coverage",server.cluster_require_full_coverage,CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE); - rewriteConfigYesNoOption(state,"cluster-replica-no-failover",server.cluster_slave_no_failover,CLUSTER_DEFAULT_SLAVE_NO_FAILOVER); rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,CLUSTER_DEFAULT_NODE_TIMEOUT); rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,CLUSTER_DEFAULT_MIGRATION_BARRIER); rewriteConfigNumericalOption(state,"cluster-replica-validity-factor",server.cluster_slave_validity_factor,CLUSTER_DEFAULT_SLAVE_VALIDITY); @@ -2316,23 +2153,10 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"zset-max-ziplist-entries",server.zset_max_ziplist_entries,OBJ_ZSET_MAX_ZIPLIST_ENTRIES); rewriteConfigNumericalOption(state,"zset-max-ziplist-value",server.zset_max_ziplist_value,OBJ_ZSET_MAX_ZIPLIST_VALUE); rewriteConfigNumericalOption(state,"hll-sparse-max-bytes",server.hll_sparse_max_bytes,CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES); - rewriteConfigYesNoOption(state,"activerehashing",server.activerehashing,CONFIG_DEFAULT_ACTIVE_REHASHING); rewriteConfigYesNoOption(state,"activedefrag",server.active_defrag_enabled,CONFIG_DEFAULT_ACTIVE_DEFRAG); - rewriteConfigYesNoOption(state,"protected-mode",server.protected_mode,CONFIG_DEFAULT_PROTECTED_MODE); - rewriteConfigYesNoOption(state,"gopher-enabled",server.gopher_enabled,CONFIG_DEFAULT_GOPHER_ENABLED); - rewriteConfigYesNoOption(state,"io-threads-do-reads",server.io_threads_do_reads,CONFIG_DEFAULT_IO_THREADS_DO_READS); rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigNumericalOption(state,"hz",server.config_hz,CONFIG_DEFAULT_HZ); - rewriteConfigYesNoOption(state,"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync,CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC); - rewriteConfigYesNoOption(state,"rdb-save-incremental-fsync",server.rdb_save_incremental_fsync,CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC); - rewriteConfigYesNoOption(state,"aof-load-truncated",server.aof_load_truncated,CONFIG_DEFAULT_AOF_LOAD_TRUNCATED); - rewriteConfigYesNoOption(state,"aof-use-rdb-preamble",server.aof_use_rdb_preamble,CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE); rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,SUPERVISED_NONE); - rewriteConfigYesNoOption(state,"lazyfree-lazy-eviction",server.lazyfree_lazy_eviction,CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION); - rewriteConfigYesNoOption(state,"lazyfree-lazy-expire",server.lazyfree_lazy_expire,CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE); - rewriteConfigYesNoOption(state,"lazyfree-lazy-server-del",server.lazyfree_lazy_server_del,CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL); - rewriteConfigYesNoOption(state,"replica-lazy-flush",server.repl_slave_lazy_flush,CONFIG_DEFAULT_SLAVE_LAZY_FLUSH); - rewriteConfigYesNoOption(state,"dynamic-hz",server.dynamic_hz,CONFIG_DEFAULT_DYNAMIC_HZ); /* Rewrite Sentinel config if in Sentinel mode. */ if (server.sentinel_mode) rewriteConfigSentinelOption(state); From 6441bc04756a178c07e900d9dc82da5ab8c5118e Mon Sep 17 00:00:00 2001 From: "zheng.ren01@mljr.com" Date: Tue, 25 Jun 2019 18:34:35 +0800 Subject: [PATCH 105/304] =?UTF-8?q?fix=20readme.md=EF=BC=8CRedis=20data=20?= =?UTF-8?q?types=20should=20add=20`t=5Fstream.c`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6c9435b53..3442659e6 100644 --- a/README.md +++ b/README.md @@ -406,7 +406,7 @@ replicas, or to continue the replication after a disconnection. Other C files --- -* `t_hash.c`, `t_list.c`, `t_set.c`, `t_string.c` and `t_zset.c` contains the implementation of the Redis data types. They implement both an API to access a given data type, and the client commands implementations for these data types. +* `t_hash.c`, `t_list.c`, `t_set.c`, `t_string.c`, `t_zset.c` and `t_stream.c` contains the implementation of the Redis data types. They implement both an API to access a given data type, and the client commands implementations for these data types. * `ae.c` implements the Redis event loop, it's a self contained library which is simple to read and understand. * `sds.c` is the Redis string library, check http://github.com/antirez/sds for more information. * `anet.c` is a library to use POSIX networking in a simpler way compared to the raw interface exposed by the kernel. From a152f483cddb2d8c5a28c6abc3805ac66140b1e4 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 29 Jun 2019 09:09:38 -0400 Subject: [PATCH 106/304] Client side caching: add new file and description. --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index f35685eff..e608309f8 100644 --- a/src/Makefile +++ b/src/Makefile @@ -164,7 +164,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o acl.o gopher.o +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o acl.o gopher.o tracking.c REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark From 7f9de752de8fb5a12b571570386656362c215b60 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 29 Jun 2019 20:08:41 -0400 Subject: [PATCH 107/304] Client side caching: fields and flags for tracking mode. --- src/networking.c | 5 +++++ src/server.h | 21 ++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/networking.c b/src/networking.c index 4bc22120a..44979770c 100644 --- a/src/networking.c +++ b/src/networking.c @@ -158,6 +158,7 @@ client *createClient(int fd) { c->pubsub_patterns = listCreate(); c->peerid = NULL; c->client_list_node = NULL; + c->client_tracking_redirection = 0; listSetFreeMethod(c->pubsub_patterns,decrRefCountVoid); listSetMatchMethod(c->pubsub_patterns,listMatchObjects); if (fd != -1) linkClient(c); @@ -966,6 +967,9 @@ void unlinkClient(client *c) { listDelNode(server.unblocked_clients,ln); c->flags &= ~CLIENT_UNBLOCKED; } + + /* Clear the tracking status. */ + if (c->flags & CLIENT_TRACKING) disableTracking(c); } void freeClient(client *c) { @@ -1849,6 +1853,7 @@ sds catClientInfoString(sds s, client *client) { if (client->flags & CLIENT_PUBSUB) *p++ = 'P'; if (client->flags & CLIENT_MULTI) *p++ = 'x'; if (client->flags & CLIENT_BLOCKED) *p++ = 'b'; + if (client->flags & CLIENT_TRACKING) *p++ = 't'; if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd'; if (client->flags & CLIENT_CLOSE_AFTER_REPLY) *p++ = 'c'; if (client->flags & CLIENT_UNBLOCKED) *p++ = 'u'; diff --git a/src/server.h b/src/server.h index 0813f8bd1..a6c6a4dae 100644 --- a/src/server.h +++ b/src/server.h @@ -254,8 +254,8 @@ typedef long long mstime_t; /* millisecond time type. */ #define AOF_WAIT_REWRITE 2 /* AOF waits rewrite to start appending */ /* Client flags */ -#define CLIENT_SLAVE (1<<0) /* This client is a slave server */ -#define CLIENT_MASTER (1<<1) /* This client is a master server */ +#define CLIENT_SLAVE (1<<0) /* This client is a repliaca */ +#define CLIENT_MASTER (1<<1) /* This client is a master */ #define CLIENT_MONITOR (1<<2) /* This client is a slave monitor, see MONITOR */ #define CLIENT_MULTI (1<<3) /* This client is in a MULTI context */ #define CLIENT_BLOCKED (1<<4) /* The client is waiting in a blocking operation */ @@ -289,7 +289,12 @@ typedef long long mstime_t; /* millisecond time type. */ #define CLIENT_PENDING_READ (1<<29) /* The client has pending reads and was put in the list of clients we can read from. */ -#define CLIENT_PENDING_COMMAND (1<<30) /* */ +#define CLIENT_PENDING_COMMAND (1<<30) /* Used in threaded I/O to signal after + we return single threaded that the + client has already pending commands + to be executed. */ +#define CLIENT_TRACKING (1<<31) /* Client enabled keys tracking in order to + perform client side caching. */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -845,6 +850,11 @@ typedef struct client { sds peerid; /* Cached peer ID. */ listNode *client_list_node; /* list node in client list */ + /* If this client is in tracking mode and this field is non zero, + * invalidation messages for keys fetched by this client will be send to + * the specified client ID. */ + uint64_t client_tracking_redirection; + /* Response buffer */ int bufpos; char buf[PROTO_REPLY_CHUNK_BYTES]; @@ -1286,6 +1296,8 @@ struct redisServer { unsigned int blocked_clients_by_type[BLOCKED_NUM]; list *unblocked_clients; /* list of clients to unblock before next loop */ list *ready_keys; /* List of readyList structures for BLPOP & co */ + /* Client side caching. */ + unsigned int tracking_clients; /* # of clients with tracking enabled.*/ /* Sort parameters - qsort_r() is only available under BSD so we * have to take this state global, in order to pass it to sortCompare() */ int sort_desc; @@ -1602,6 +1614,9 @@ void addReplyErrorFormat(client *c, const char *fmt, ...); void addReplyStatusFormat(client *c, const char *fmt, ...); #endif +/* Client side caching (tracking mode) */ +void disableTracking(client *c); + /* List data type */ void listTypeTryConversion(robj *subject, robj *value); void listTypePush(robj *subject, robj *value, int where); From b54789d47252075b0761158c2e120f043eb46a15 Mon Sep 17 00:00:00 2001 From: antirez Date: Sun, 30 Jun 2019 06:19:04 -0400 Subject: [PATCH 108/304] Client side caching: enable tracking mode. --- src/server.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/server.h b/src/server.h index a6c6a4dae..8c97f83f6 100644 --- a/src/server.h +++ b/src/server.h @@ -1615,6 +1615,7 @@ void addReplyStatusFormat(client *c, const char *fmt, ...); #endif /* Client side caching (tracking mode) */ +void enableTracking(client *c, uint64_t redirect_to); void disableTracking(client *c); /* List data type */ From 17e98d88516d72ca9a8716e411e5055750b9b71e Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Tue, 2 Jul 2019 14:28:48 +0100 Subject: [PATCH 109/304] RESP3 double representation for -infinity is `,-inf\r\n`, not `-inf\r\n` --- src/networking.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/networking.c b/src/networking.c index 4bc22120a..6225229ce 100644 --- a/src/networking.c +++ b/src/networking.c @@ -506,7 +506,7 @@ void addReplyDouble(client *c, double d) { if (c->resp == 2) { addReplyBulkCString(c, d > 0 ? "inf" : "-inf"); } else { - addReplyProto(c, d > 0 ? ",inf\r\n" : "-inf\r\n", + addReplyProto(c, d > 0 ? ",inf\r\n" : ",-inf\r\n", d > 0 ? 6 : 7); } } else { From b826cf89f1e2d9fd082f92b9bf71f886bee17168 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Jul 2019 11:58:20 +0200 Subject: [PATCH 110/304] Client side caching: CLIENT TRACKING subcommand. --- src/networking.c | 41 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 44979770c..185060267 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1966,6 +1966,7 @@ void clientCommand(client *c) { "reply (on|off|skip) -- Control the replies sent to the current connection.", "setname -- Assign the name to the current connection.", "unblock [TIMEOUT|ERROR] -- Unblock the specified blocked client.", +"tracking (on|off) [REDIRECT ] -- Enable client keys tracking for client side caching.", NULL }; addReplyHelp(c, help); @@ -2122,20 +2123,56 @@ NULL addReply(c,shared.czero); } } else if (!strcasecmp(c->argv[1]->ptr,"setname") && c->argc == 3) { + /* CLIENT SETNAME */ if (clientSetNameOrReply(c,c->argv[2]) == C_OK) addReply(c,shared.ok); } else if (!strcasecmp(c->argv[1]->ptr,"getname") && c->argc == 2) { + /* CLIENT GETNAME */ if (c->name) addReplyBulk(c,c->name); else addReplyNull(c); } else if (!strcasecmp(c->argv[1]->ptr,"pause") && c->argc == 3) { + /* CLIENT PAUSE */ long long duration; - if (getTimeoutFromObjectOrReply(c,c->argv[2],&duration,UNIT_MILLISECONDS) - != C_OK) return; + if (getTimeoutFromObjectOrReply(c,c->argv[2],&duration, + UNIT_MILLISECONDS) != C_OK) return; pauseClients(duration); addReply(c,shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr,"tracking") && + (c->argc == 3 || c->argc == 5)) + { + /* CLIENT TRACKING (on|off) [REDIRECT ] */ + long long redir = 0; + + /* Parse the redirection option: we'll require the client with + * the specified ID to exist right now, even if it is possible + * it will get disconnected later. */ + if (c->argc == 5) { + if (strcasecmp(c->argv[3]->ptr,"redirect") != 0) { + addReply(c,shared.syntaxerr); + return; + } else { + if (getLongLongFromObjectOrReply(c,c->argv[4],&redir,NULL) != + C_OK) return; + if (lookupClientByID(redir) == NULL) { + addReplyError(c,"The client ID you want redirect to " + "does not exist"); + return; + } + } + } + + if (!strcasecmp(c->argv[2]->ptr,"on")) { + enableTracking(c,redir); + } else if (!strcasecmp(c->argv[2]->ptr,"off")) { + disableTracking(c); + } else { + addReply(c,shared.syntaxerr); + return; + } + addReply(c,shared.ok); } else { addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try CLIENT HELP", (char*)c->argv[1]->ptr); } From a4e44726d02ea63a9a2baf185ff9133b6b2bc867 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Jul 2019 12:42:16 +0200 Subject: [PATCH 111/304] Client side caching: hook inside call() for tracking. --- src/server.c | 11 +++++++++++ src/server.h | 1 + 2 files changed, 12 insertions(+) diff --git a/src/server.c b/src/server.c index 4b87b6ac2..bb891594b 100644 --- a/src/server.c +++ b/src/server.c @@ -3194,6 +3194,7 @@ void call(client *c, int flags) { latencyAddSampleIfNeeded(latency_event,duration/1000); slowlogPushEntryIfNeeded(c,c->argv,c->argc,duration); } + if (flags & CMD_CALL_STATS) { /* use the real command that was executed (cmd and lastamc) may be * different, in case of MULTI-EXEC or re-written commands such as @@ -3261,6 +3262,16 @@ void call(client *c, int flags) { redisOpArrayFree(&server.also_propagate); } server.also_propagate = prev_also_propagate; + + /* If the client has keys tracking enabled for client side caching, + * make sure to remember the keys it fetched via this command. */ + if (c->cmd->flags & CMD_READONLY) { + client *caller = (c->flags & CLIENT_LUA && server.lua_caller) ? + server.lua_caller : c; + if (caller->flags & CLIENT_TRACKING) + trackingRememberKeys(caller); + } + server.stat_numcommands++; } diff --git a/src/server.h b/src/server.h index 8c97f83f6..022e48304 100644 --- a/src/server.h +++ b/src/server.h @@ -1617,6 +1617,7 @@ void addReplyStatusFormat(client *c, const char *fmt, ...); /* Client side caching (tracking mode) */ void enableTracking(client *c, uint64_t redirect_to); void disableTracking(client *c); +void trackingRememberKeys(client *c); /* List data type */ void listTypeTryConversion(robj *subject, robj *value); From 4b5027845e2b3566579a1b5e34b536df2f1c23ee Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 3 Jul 2019 19:16:20 +0200 Subject: [PATCH 112/304] Client side caching: implement trackingInvalidateKey(). --- src/db.c | 1 + src/debug.c | 2 +- src/expire.c | 1 + src/networking.c | 1 + src/server.h | 5 +- src/tracking.c | 162 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 src/tracking.c diff --git a/src/db.c b/src/db.c index b537a29a4..4977873e9 100644 --- a/src/db.c +++ b/src/db.c @@ -399,6 +399,7 @@ int selectDb(client *c, int id) { void signalModifiedKey(redisDb *db, robj *key) { touchWatchedKey(db,key); + if (server.tracking_clients) trackingInvalidateKey(key); } void signalFlushedDb(int dbid) { diff --git a/src/debug.c b/src/debug.c index 0c6b5630c..1f1157d4a 100644 --- a/src/debug.c +++ b/src/debug.c @@ -702,7 +702,7 @@ void _serverAssertPrintClientInfo(const client *c) { bugReportStart(); serverLog(LL_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ==="); - serverLog(LL_WARNING,"client->flags = %d", c->flags); + serverLog(LL_WARNING,"client->flags = %llu", (unsigned long long)c->flags); serverLog(LL_WARNING,"client->fd = %d", c->fd); serverLog(LL_WARNING,"client->argc = %d", c->argc); for (j=0; j < c->argc; j++) { diff --git a/src/expire.c b/src/expire.c index 0b92ee3fe..b23117a3c 100644 --- a/src/expire.c +++ b/src/expire.c @@ -64,6 +64,7 @@ int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) { dbSyncDelete(db,keyobj); notifyKeyspaceEvent(NOTIFY_EXPIRED, "expired",keyobj,db->id); + if (server.tracking_clients) trackingInvalidateKey(keyobj); decrRefCount(keyobj); server.stat_expiredkeys++; return 1; diff --git a/src/networking.c b/src/networking.c index 185060267..716b35859 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1854,6 +1854,7 @@ sds catClientInfoString(sds s, client *client) { if (client->flags & CLIENT_MULTI) *p++ = 'x'; if (client->flags & CLIENT_BLOCKED) *p++ = 'b'; if (client->flags & CLIENT_TRACKING) *p++ = 't'; + if (client->flags & CLIENT_TRACKING_BROKEN_REDIR) *p++ = 'R'; if (client->flags & CLIENT_DIRTY_CAS) *p++ = 'd'; if (client->flags & CLIENT_CLOSE_AFTER_REPLY) *p++ = 'c'; if (client->flags & CLIENT_UNBLOCKED) *p++ = 'u'; diff --git a/src/server.h b/src/server.h index 022e48304..cd6652257 100644 --- a/src/server.h +++ b/src/server.h @@ -295,6 +295,7 @@ typedef long long mstime_t; /* millisecond time type. */ to be executed. */ #define CLIENT_TRACKING (1<<31) /* Client enabled keys tracking in order to perform client side caching. */ +#define CLIENT_TRACKING_BROKEN_REDIR (1ULL<<32) /* Target client is invalid. */ /* Client block type (btype field in client structure) * if CLIENT_BLOCKED flag is set. */ @@ -821,7 +822,7 @@ typedef struct client { time_t ctime; /* Client creation time. */ time_t lastinteraction; /* Time of the last interaction, used for timeout */ time_t obuf_soft_limit_reached_time; - int flags; /* Client flags: CLIENT_* macros. */ + uint64_t flags; /* Client flags: CLIENT_* macros. */ int authenticated; /* Needed when the default user requires auth. */ int replstate; /* Replication state if this is a slave. */ int repl_put_online_on_ack; /* Install slave write handler on ACK. */ @@ -1603,6 +1604,7 @@ void linkClient(client *c); void protectClient(client *c); void unprotectClient(client *c); void initThreadedIO(void); +client *lookupClientByID(uint64_t id); #ifdef __GNUC__ void addReplyErrorFormat(client *c, const char *fmt, ...) @@ -1618,6 +1620,7 @@ void addReplyStatusFormat(client *c, const char *fmt, ...); void enableTracking(client *c, uint64_t redirect_to); void disableTracking(client *c); void trackingRememberKeys(client *c); +void trackingInvalidateKey(robj *keyobj); /* List data type */ void listTypeTryConversion(robj *subject, robj *value); diff --git a/src/tracking.c b/src/tracking.c new file mode 100644 index 000000000..66615ed91 --- /dev/null +++ b/src/tracking.c @@ -0,0 +1,162 @@ +/* tracking.c - Client side caching: keys tracking and invalidation + * + * Copyright (c) 2019, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" + +/* The tracking table is constituted by 2^24 radix trees (each tree, and the + * table itself, are allocated in a lazy way only when needed) tracking + * clients that may have certain keys in their local, client side, cache. + * + * Keys are grouped into 2^24 slots, in a way similar to Redis Cluster hash + * slots, however here the function we use is crc64, taking the least + * significant 24 bits of the output. + * + * When a client enables tracking with "CLIENT TRACKING on", each key served to + * the client is hashed to one of such slots, and Redis will remember what + * client may have keys about such slot. Later, when a key in a given slot is + * modified, all the clients that may have local copies of keys in that slot + * will receive an invalidation message. There is no distinction of database + * number: a single table is used. + * + * Clients will normally take frequently requested objects in memory, removing + * them when invalidation messages are received. A strategy clients may use is + * to just cache objects in a dictionary, associating to each cached object + * some incremental epoch, or just a timestamp. When invalidation messages are + * received clients may store, in a different table, the timestamp (or epoch) + * of the invalidation of such given slot: later when accessing objects, the + * eviction of stale objects may be performed in a lazy way by checking if the + * cached object timestamp is older than the invalidation timestamp for such + * objects. + * + * The output of the 24 bit hash function is very large (more than 16 million + * possible slots), so clients that may want to use less resources may only + * use the most significant bits instead of the full 24 bits. */ +#define TRACKING_TABLE_SIZE (1<<24) +rax **TrackingTable = NULL; + +/* Remove the tracking state from the client 'c'. Note that there is not much + * to do for us here, if not to decrement the counter of the clients in + * tracking mode, because we just store the ID of the client in the tracking + * table, so we'll remove the ID reference in a lazy way. Otherwise when a + * client with many entries in the table is removed, it would cost a lot of + * time to do the cleanup. */ +void disableTracking(client *c) { + if (c->flags & CLIENT_TRACKING) { + server.tracking_clients--; + c->flags &= ~(CLIENT_TRACKING|CLIENT_TRACKING_BROKEN_REDIR); + } +} + +/* Enable the tracking state for the client 'c', and as a side effect allocates + * the tracking table if needed. If the 'redirect_to' argument is non zero, the + * invalidation messages for this client will be sent to the client ID + * specified by the 'redirect_to' argument. Note that if such client will + * eventually get freed, we'll send a message to the original client to + * inform it of the condition. Multiple clients can redirect the invalidation + * messages to the same client ID. */ +void enableTracking(client *c, uint64_t redirect_to) { + if (c->flags & CLIENT_TRACKING) return; + c->flags |= CLIENT_TRACKING; + c->flags &= ~CLIENT_TRACKING_BROKEN_REDIR; + c->client_tracking_redirection = redirect_to; + server.tracking_clients++; + if (TrackingTable == NULL) + TrackingTable = zcalloc(sizeof(rax*) * TRACKING_TABLE_SIZE); +} + +/* This function is called after the excution of a readonly command in the + * case the client 'c' has keys tracking enabled. It will populate the + * tracking ivalidation table according to the keys the user fetched, so that + * Redis will know what are the clients that should receive an invalidation + * message with certain groups of keys are modified. */ +void trackingRememberKeys(client *c) { + int numkeys; + int *keys = getKeysFromCommand(c->cmd,c->argv,c->argc,&numkeys); + if (keys == NULL) return; + + for(int j = 0; j < numkeys; j++) { + int idx = keys[j]; + sds sdskey = c->argv[idx]->ptr; + uint64_t hash = crc64(0, + (unsigned char*)sdskey,sdslen(sdskey))&(TRACKING_TABLE_SIZE-1); + if (TrackingTable[hash] == NULL) + TrackingTable[hash] = raxNew(); + raxTryInsert(TrackingTable[hash], + (unsigned char*)&c->id,sizeof(c->id),NULL,NULL); + } + getKeysFreeResult(keys); +} + +/* This function is called from signalModifiedKey() or other places in Redis + * when a key changes value. In the context of keys tracking, our task here is + * to send a notification to every client that may have keys about such . */ +void trackingInvalidateKey(robj *keyobj) { + sds sdskey = keyobj->ptr; + uint64_t hash = crc64(0, + (unsigned char*)sdskey,sdslen(sdskey))&(TRACKING_TABLE_SIZE-1); + if (TrackingTable == NULL || TrackingTable[hash] == NULL) return; + + raxIterator ri; + raxStart(&ri,TrackingTable[hash]); + raxSeek(&ri,"^",NULL,0); + while(raxNext(&ri)) { + uint64_t id; + memcpy(&id,ri.key,ri.key_len); + client *c = lookupClientByID(id); + if (c->client_tracking_redirection) { + client *redir = lookupClientByID(c->client_tracking_redirection); + if (!redir) { + /* We need to signal to the original connection that we + * are unable to send invalidation messages to the redirected + * connection, because the client no longer exist. */ + if (c->resp > 2) { + addReplyPushLen(c,3); + addReplyBulkCBuffer(c,"tracking-redir-broken",21); + addReplyLongLong(c,c->client_tracking_redirection); + } + continue; + } + c = redir; + } + + /* Only send such info for clients in RESP version 3 or more. */ + if (c->resp > 2) { + addReplyPushLen(c,3); + addReplyBulkCBuffer(c,"invalidate",10); + addReplyBulk(c,keyobj); + } + } + raxStop(&ri); + + /* Free the tracking table: we'll create the radix tree and populate it + * again if more keys will be modified in this hash slot. */ + raxFree(TrackingTable[hash]); + TrackingTable[hash] = NULL; +} From 6aeb2627ded872211c1ccec4e90bc27078aa6b89 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 4 Jul 2019 14:06:44 +0200 Subject: [PATCH 113/304] Client side caching: fix invalidate message len and content. --- src/tracking.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tracking.c b/src/tracking.c index 66615ed91..aade137c4 100644 --- a/src/tracking.c +++ b/src/tracking.c @@ -148,9 +148,9 @@ void trackingInvalidateKey(robj *keyobj) { /* Only send such info for clients in RESP version 3 or more. */ if (c->resp > 2) { - addReplyPushLen(c,3); + addReplyPushLen(c,2); addReplyBulkCBuffer(c,"invalidate",10); - addReplyBulk(c,keyobj); + addReplyLongLong(c,hash); } } raxStop(&ri); From 527c297efb16694f0239ea221a226afecaf310d2 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Thu, 4 Jul 2019 10:02:26 +0300 Subject: [PATCH 114/304] missing per-skiplist overheads in MEMORY USAGE these had severe impact for small zsets, for instance ones with just one element that is longer than 64 (causing it not to be ziplist encoded) --- src/object.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/object.c b/src/object.c index 234e11f8a..10209a6c8 100644 --- a/src/object.c +++ b/src/object.c @@ -834,7 +834,9 @@ size_t objectComputeSize(robj *o, size_t sample_size) { d = ((zset*)o->ptr)->dict; zskiplist *zsl = ((zset*)o->ptr)->zsl; zskiplistNode *znode = zsl->header->level[0].forward; - asize = sizeof(*o)+sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d)); + asize = sizeof(*o)+sizeof(zset)+sizeof(zskiplist)+sizeof(dict)+ + (sizeof(struct dictEntry*)*dictSlots(d))+ + zmalloc_size(zsl->header); while(znode != NULL && samples < sample_size) { elesize += sdsAllocSize(znode->ele); elesize += sizeof(struct dictEntry) + zmalloc_size(znode); From f3a8b5001d3dd97eea84f7ed5aaabcec14802f16 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 5 Jul 2019 12:24:28 +0200 Subject: [PATCH 115/304] Client side caching: RESP2 support. --- src/server.h | 1 + src/tracking.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/server.h b/src/server.h index cd6652257..cb70b93ad 100644 --- a/src/server.h +++ b/src/server.h @@ -1946,6 +1946,7 @@ int pubsubUnsubscribeAllPatterns(client *c, int notify); void freePubsubPattern(void *p); int listMatchPubsubPattern(void *a, void *b); int pubsubPublishMessage(robj *channel, robj *message); +void addReplyPubsubMessage(client *c, robj *channel, robj *msg); /* Keyspace events notification */ void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid); diff --git a/src/tracking.c b/src/tracking.c index aade137c4..9d9585c95 100644 --- a/src/tracking.c +++ b/src/tracking.c @@ -60,6 +60,7 @@ * use the most significant bits instead of the full 24 bits. */ #define TRACKING_TABLE_SIZE (1<<24) rax **TrackingTable = NULL; +robj *TrackingChannelName; /* Remove the tracking state from the client 'c'. Note that there is not much * to do for us here, if not to decrement the counter of the clients in @@ -87,8 +88,10 @@ void enableTracking(client *c, uint64_t redirect_to) { c->flags &= ~CLIENT_TRACKING_BROKEN_REDIR; c->client_tracking_redirection = redirect_to; server.tracking_clients++; - if (TrackingTable == NULL) + if (TrackingTable == NULL) { TrackingTable = zcalloc(sizeof(rax*) * TRACKING_TABLE_SIZE); + TrackingChannelName = createStringObject("__redis__:invalidate",20); + } } /* This function is called after the excution of a readonly command in the @@ -130,6 +133,7 @@ void trackingInvalidateKey(robj *keyobj) { uint64_t id; memcpy(&id,ri.key,ri.key_len); client *c = lookupClientByID(id); + int using_redirection = 0; if (c->client_tracking_redirection) { client *redir = lookupClientByID(c->client_tracking_redirection); if (!redir) { @@ -144,13 +148,21 @@ void trackingInvalidateKey(robj *keyobj) { continue; } c = redir; + using_redirection = 1; } - /* Only send such info for clients in RESP version 3 or more. */ + /* Only send such info for clients in RESP version 3 or more. However + * if redirection is active, and the connection we redirect to is + * in Pub/Sub mode, we can support the feature with RESP 2 as well, + * by sending Pub/Sub messages in the __redis__:invalidate channel. */ if (c->resp > 2) { addReplyPushLen(c,2); addReplyBulkCBuffer(c,"invalidate",10); addReplyLongLong(c,hash); + } else if (using_redirection && c->flags & CLIENT_PUBSUB) { + robj *msg = createStringObjectFromLongLong(hash); + addReplyPubsubMessage(c,TrackingChannelName,msg); + decrRefCount(msg); } } raxStop(&ri); From 664d7d6e84b590564690c453677ac10362ff2ee7 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 7 Jul 2019 18:28:15 +0300 Subject: [PATCH 116/304] fix build tracking.c should be tracking.o thanks to @rafie --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index e608309f8..b6cc69e2f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -164,7 +164,7 @@ endif REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o acl.o gopher.o tracking.c +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o acl.o gopher.o tracking.o REDIS_CLI_NAME=redis-cli REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark From 00c9b4f15dea45e3470655e80f71465070bf728d Mon Sep 17 00:00:00 2001 From: Angus Pearson Date: Mon, 8 Jul 2019 11:04:37 +0100 Subject: [PATCH 117/304] Change typeNameCanonicalize -> getObjectTypeName, and other style changes --- src/db.c | 6 +++--- src/server.h | 9 ++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/db.c b/src/db.c index 6557ddc3c..bb53081f6 100644 --- a/src/db.c +++ b/src/db.c @@ -767,7 +767,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { /* Filter an element if it isn't the type we want. */ if (!filter && o == NULL && typename){ robj* typecheck = lookupKeyReadWithFlags(c->db, kobj, LOOKUP_NOTOUCH); - char* type = typeNameCanonicalize(typecheck); + char* type = getObjectTypeName(typecheck); if (strcasecmp((char*) typename, type)) filter = 1; } @@ -827,7 +827,7 @@ void lastsaveCommand(client *c) { addReplyLongLong(c,server.lastsave); } -char* typeNameCanonicalize(robj *o) { +char* getObjectTypeName(robj *o) { char* type; if (o == NULL) { type = "none"; @@ -852,7 +852,7 @@ char* typeNameCanonicalize(robj *o) { void typeCommand(client *c) { robj *o; o = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH); - addReplyStatus(c, typeNameCanonicalize(o)); + addReplyStatus(c, getObjectTypeName(o)); } void shutdownCommand(client *c) { diff --git a/src/server.h b/src/server.h index dc02edb5c..19ef1ac59 100644 --- a/src/server.h +++ b/src/server.h @@ -646,11 +646,10 @@ typedef struct redisObject { void *ptr; } robj; -/* The 'canonical' name for a type as enumerated above is given by the - * below function. Native types are checked against the OBJ_STRING, - * OBJ_LIST, OBJ_* defines, and Module types have their registered name - * returned.*/ -char* typeNameCanonicalize(robj*); +/* The a string name for an object's type as listed above + * Native types are checked against the OBJ_STRING, OBJ_LIST, OBJ_* defines, + * and Module types have their registered name returned. */ +char *getObjectTypeName(robj*); /* Macro used to initialize a Redis object allocated on the stack. * Note that this macro is taken near the structure definition to make sure From 29754ebe22178f3014227fb38e2e7b4ab4546731 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Mon, 1 Jul 2019 15:22:29 +0300 Subject: [PATCH 118/304] diskless replication on slave side (don't store rdb to file), plus some other related fixes The implementation of the diskless replication was currently diskless only on the master side. The slave side was still storing the received rdb file to the disk before loading it back in and parsing it. This commit adds two modes to load rdb directly from socket: 1) when-empty 2) using "swapdb" the third mode of using diskless slave by flushdb is risky and currently not included. other changes: -------------- distinguish between aof configuration and state so that we can re-enable aof only when sync eventually succeeds (and not when exiting from readSyncBulkPayload after a failed attempt) also a CONFIG GET and INFO during rdb loading would have lied When loading rdb from the network, don't kill the server on short read (that can be a network error) Fix rdb check when performed on preamble AOF tests: run replication tests for diskless slave too make replication test a bit more aggressive Add test for diskless load swapdb --- redis.conf | 16 ++ src/anet.c | 14 + src/anet.h | 1 + src/aof.c | 2 +- src/config.c | 38 ++- src/db.c | 23 +- src/rdb.c | 40 ++- src/redis-check-rdb.c | 4 +- src/replication.c | 340 +++++++++++++++--------- src/rio.c | 109 +++++++- src/rio.h | 10 + src/server.c | 7 +- src/server.h | 19 +- tests/integration/replication-4.tcl | 9 - tests/integration/replication-psync.tcl | 40 ++- tests/integration/replication.tcl | 216 ++++++++++----- tests/support/util.tcl | 12 + 17 files changed, 648 insertions(+), 252 deletions(-) diff --git a/redis.conf b/redis.conf index 060510768..74b6c018f 100644 --- a/redis.conf +++ b/redis.conf @@ -377,6 +377,22 @@ repl-diskless-sync no # it entirely just set it to 0 seconds and the transfer will start ASAP. repl-diskless-sync-delay 5 +# Replica can load the rdb it reads from the replication link directly from the +# socket, or store the rdb to a file and read that file after it was completely +# recived from the master. +# In many cases the disk is slower than the network, and storing and loading +# the rdb file may increase replication time (and even increase the master's +# Copy on Write memory and salve buffers). +# However, parsing the rdb file directly from the socket may mean that we have +# to flush the contents of the current database before the full rdb was received. +# for this reason we have the following options: +# "disabled" - Don't use diskless load (store the rdb file to the disk first) +# "on-empty-db" - Use diskless load only when it is completely safe. +# "swapdb" - Keep a copy of the current db contents in RAM while parsing +# the data directly from the socket. note that this requires +# sufficient memory, if you don't have it, you risk an OOM kill. +repl-diskless-load disabled + # Replicas send PINGs to server in a predefined interval. It's possible to change # this interval with the repl_ping_replica_period option. The default value is 10 # seconds. diff --git a/src/anet.c b/src/anet.c index 2981fca13..2088f4fb1 100644 --- a/src/anet.c +++ b/src/anet.c @@ -193,6 +193,20 @@ int anetSendTimeout(char *err, int fd, long long ms) { return ANET_OK; } +/* Set the socket receive timeout (SO_RCVTIMEO socket option) to the specified + * number of milliseconds, or disable it if the 'ms' argument is zero. */ +int anetRecvTimeout(char *err, int fd, long long ms) { + struct timeval tv; + + tv.tv_sec = ms/1000; + tv.tv_usec = (ms%1000)*1000; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) { + anetSetError(err, "setsockopt SO_RCVTIMEO: %s", strerror(errno)); + return ANET_ERR; + } + return ANET_OK; +} + /* anetGenericResolve() is called by anetResolve() and anetResolveIP() to * do the actual work. It resolves the hostname "host" and set the string * representation of the IP address into the buffer pointed by "ipbuf". diff --git a/src/anet.h b/src/anet.h index 7142f78d2..dd735240d 100644 --- a/src/anet.h +++ b/src/anet.h @@ -70,6 +70,7 @@ int anetEnableTcpNoDelay(char *err, int fd); int anetDisableTcpNoDelay(char *err, int fd); int anetTcpKeepAlive(char *err, int fd); int anetSendTimeout(char *err, int fd, long long ms); +int anetRecvTimeout(char *err, int fd, long long ms); int anetPeerToString(int fd, char *ip, size_t ip_len, int *port); int anetKeepAlive(char *err, int fd, int interval); int anetSockName(int fd, char *ip, size_t ip_len, int *port); diff --git a/src/aof.c b/src/aof.c index 4744847d2..565ee8073 100644 --- a/src/aof.c +++ b/src/aof.c @@ -729,7 +729,7 @@ int loadAppendOnlyFile(char *filename) { server.aof_state = AOF_OFF; fakeClient = createFakeClient(); - startLoading(fp); + startLoadingFile(fp, filename); /* Check if this AOF file has an RDB preamble. In that case we need to * load the RDB file and later continue loading the AOF tail. */ diff --git a/src/config.c b/src/config.c index 2e6e9a6b7..fde00ddf5 100644 --- a/src/config.c +++ b/src/config.c @@ -91,6 +91,13 @@ configEnum aof_fsync_enum[] = { {NULL, 0} }; +configEnum repl_diskless_load_enum[] = { + {"disabled", REPL_DISKLESS_LOAD_DISABLED}, + {"on-empty-db", REPL_DISKLESS_LOAD_WHEN_DB_EMPTY}, + {"swapdb", REPL_DISKLESS_LOAD_SWAPDB}, + {NULL, 0} +}; + /* Output buffer limits presets. */ clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = { {0, 0, 0}, /* normal */ @@ -427,6 +434,11 @@ void loadServerConfigFromString(char *config) { err = "repl-timeout must be 1 or greater"; goto loaderr; } + } else if (!strcasecmp(argv[0],"repl-diskless-load") && argc==2) { + server.repl_diskless_load = configEnumGetValue(repl_diskless_load_enum,argv[1]); + if (server.repl_diskless_load == INT_MIN) { + err = "argument must be 'disabled', 'on-empty-db', 'swapdb' or 'flushdb'"; + } } else if (!strcasecmp(argv[0],"repl-diskless-sync-delay") && argc==2) { server.repl_diskless_sync_delay = atoi(argv[1]); if (server.repl_diskless_sync_delay < 0) { @@ -466,12 +478,10 @@ void loadServerConfigFromString(char *config) { if (server.config_hz < CONFIG_MIN_HZ) server.config_hz = CONFIG_MIN_HZ; if (server.config_hz > CONFIG_MAX_HZ) server.config_hz = CONFIG_MAX_HZ; } else if (!strcasecmp(argv[0],"appendonly") && argc == 2) { - int yes; - - if ((yes = yesnotoi(argv[1])) == -1) { + if ((server.aof_enabled = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; } - server.aof_state = yes ? AOF_ON : AOF_OFF; + server.aof_state = server.aof_enabled ? AOF_ON : AOF_OFF; } else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) { if (!pathIsBaseName(argv[1])) { err = "appendfilename can't be a path, just a filename"; @@ -497,6 +507,12 @@ void loadServerConfigFromString(char *config) { argc == 2) { server.aof_rewrite_min_size = memtoll(argv[1],NULL); + } else if (!strcasecmp(argv[0],"rdb-key-save-delay") && argc==2) { + server.rdb_key_save_delay = atoi(argv[1]); + if (server.rdb_key_save_delay < 0) { + err = "rdb-key-save-delay can't be negative"; + goto loaderr; + } } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) { if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) { err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN"; @@ -942,6 +958,7 @@ void configSetCommand(client *c) { int enable = yesnotoi(o->ptr); if (enable == -1) goto badfmt; + server.aof_enabled = enable; if (enable == 0 && server.aof_state != AOF_OFF) { stopAppendOnly(); } else if (enable && server.aof_state == AOF_OFF) { @@ -1132,6 +1149,8 @@ void configSetCommand(client *c) { "slave-priority",server.slave_priority,0,INT_MAX) { } config_set_numerical_field( "replica-priority",server.slave_priority,0,INT_MAX) { + } config_set_numerical_field( + "rdb-key-save-delay",server.rdb_key_save_delay,0,LLONG_MAX) { } config_set_numerical_field( "slave-announce-port",server.slave_announce_port,0,65535) { } config_set_numerical_field( @@ -1199,6 +1218,8 @@ void configSetCommand(client *c) { "maxmemory-policy",server.maxmemory_policy,maxmemory_policy_enum) { } config_set_enum_field( "appendfsync",server.aof_fsync,aof_fsync_enum) { + } config_set_enum_field( + "repl-diskless-load",server.repl_diskless_load,repl_diskless_load_enum) { /* Everyhing else is an error... */ } config_set_else { @@ -1346,6 +1367,7 @@ void configGetCommand(client *c) { config_get_numerical_field("cluster-slave-validity-factor",server.cluster_slave_validity_factor); config_get_numerical_field("cluster-replica-validity-factor",server.cluster_slave_validity_factor); config_get_numerical_field("repl-diskless-sync-delay",server.repl_diskless_sync_delay); + config_get_numerical_field("rdb-key-save-delay",server.rdb_key_save_delay); config_get_numerical_field("tcp-keepalive",server.tcpkeepalive); /* Bool (yes/no) values */ @@ -1370,12 +1392,14 @@ void configGetCommand(client *c) { server.aof_fsync,aof_fsync_enum); config_get_enum_field("syslog-facility", server.syslog_facility,syslog_facility_enum); + config_get_enum_field("repl-diskless-load", + server.repl_diskless_load,repl_diskless_load_enum); /* Everything we can't handle with macros follows. */ if (stringmatch(pattern,"appendonly",1)) { addReplyBulkCString(c,"appendonly"); - addReplyBulkCString(c,server.aof_state == AOF_OFF ? "no" : "yes"); + addReplyBulkCString(c,server.aof_enabled ? "yes" : "no"); matches++; } if (stringmatch(pattern,"dir",1)) { @@ -2109,6 +2133,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"repl-timeout",server.repl_timeout,CONFIG_DEFAULT_REPL_TIMEOUT); rewriteConfigBytesOption(state,"repl-backlog-size",server.repl_backlog_size,CONFIG_DEFAULT_REPL_BACKLOG_SIZE); rewriteConfigBytesOption(state,"repl-backlog-ttl",server.repl_backlog_time_limit,CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT); + rewriteConfigEnumOption(state,"repl-diskless-load",server.repl_diskless_load,repl_diskless_load_enum,CONFIG_DEFAULT_REPL_DISKLESS_LOAD); rewriteConfigNumericalOption(state,"repl-diskless-sync-delay",server.repl_diskless_sync_delay,CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY); rewriteConfigNumericalOption(state,"replica-priority",server.slave_priority,CONFIG_DEFAULT_SLAVE_PRIORITY); rewriteConfigNumericalOption(state,"min-replicas-to-write",server.repl_min_slaves_to_write,CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE); @@ -2128,7 +2153,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"active-defrag-cycle-min",server.active_defrag_cycle_min,CONFIG_DEFAULT_DEFRAG_CYCLE_MIN); rewriteConfigNumericalOption(state,"active-defrag-cycle-max",server.active_defrag_cycle_max,CONFIG_DEFAULT_DEFRAG_CYCLE_MAX); rewriteConfigNumericalOption(state,"active-defrag-max-scan-fields",server.active_defrag_max_scan_fields,CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS); - rewriteConfigYesNoOption(state,"appendonly",server.aof_state != AOF_OFF,0); + rewriteConfigYesNoOption(state,"appendonly",server.aof_enabled,0); rewriteConfigStringOption(state,"appendfilename",server.aof_filename,CONFIG_DEFAULT_AOF_FILENAME); rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,aof_fsync_enum,CONFIG_DEFAULT_AOF_FSYNC); rewriteConfigNumericalOption(state,"auto-aof-rewrite-percentage",server.aof_rewrite_perc,AOF_REWRITE_PERC); @@ -2157,6 +2182,7 @@ int rewriteConfig(char *path) { rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigNumericalOption(state,"hz",server.config_hz,CONFIG_DEFAULT_HZ); rewriteConfigEnumOption(state,"supervised",server.supervised_mode,supervised_mode_enum,SUPERVISED_NONE); + rewriteConfigNumericalOption(state,"rdb-key-save-delay",server.rdb_key_save_delay,CONFIG_DEFAULT_RDB_KEY_SAVE_DELAY); /* Rewrite Sentinel config if in Sentinel mode. */ if (server.sentinel_mode) rewriteConfigSentinelOption(state); diff --git a/src/db.c b/src/db.c index 07051dad4..8b7656802 100644 --- a/src/db.c +++ b/src/db.c @@ -344,7 +344,7 @@ robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) { * On success the fuction returns the number of keys removed from the * database(s). Otherwise -1 is returned in the specific case the * DB number is out of range, and errno is set to EINVAL. */ -long long emptyDb(int dbnum, int flags, void(callback)(void*)) { +long long emptyDbGeneric(redisDb *dbarray, int dbnum, int flags, void(callback)(void*)) { int async = (flags & EMPTYDB_ASYNC); long long removed = 0; @@ -362,12 +362,12 @@ long long emptyDb(int dbnum, int flags, void(callback)(void*)) { } for (int j = startdb; j <= enddb; j++) { - removed += dictSize(server.db[j].dict); + removed += dictSize(dbarray[j].dict); if (async) { - emptyDbAsync(&server.db[j]); + emptyDbAsync(&dbarray[j]); } else { - dictEmpty(server.db[j].dict,callback); - dictEmpty(server.db[j].expires,callback); + dictEmpty(dbarray[j].dict,callback); + dictEmpty(dbarray[j].expires,callback); } } if (server.cluster_enabled) { @@ -381,6 +381,10 @@ long long emptyDb(int dbnum, int flags, void(callback)(void*)) { return removed; } +long long emptyDb(int dbnum, int flags, void(callback)(void*)) { + return emptyDbGeneric(server.db, dbnum, flags, callback); +} + int selectDb(client *c, int id) { if (id < 0 || id >= server.dbnum) return C_ERR; @@ -388,6 +392,15 @@ int selectDb(client *c, int id) { return C_OK; } +long long dbTotalServerKeyCount() { + long long total = 0; + int j; + for (j = 0; j < server.dbnum; j++) { + total += dictSize(server.db[j].dict); + } + return total; +} + /*----------------------------------------------------------------------------- * Hooks for key space changes. * diff --git a/src/rdb.c b/src/rdb.c index 95e4766ea..c566378fb 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -44,6 +44,7 @@ #define rdbExitReportCorruptRDB(...) rdbCheckThenExit(__LINE__,__VA_ARGS__) +char* rdbFileBeingLoaded = NULL; /* used for rdb checking on read error */ extern int rdbCheckMode; void rdbCheckError(const char *fmt, ...); void rdbCheckSetError(const char *fmt, ...); @@ -61,11 +62,17 @@ void rdbCheckThenExit(int linenum, char *reason, ...) { if (!rdbCheckMode) { serverLog(LL_WARNING, "%s", msg); - char *argv[2] = {"",server.rdb_filename}; - redis_check_rdb_main(2,argv,NULL); + if (rdbFileBeingLoaded) { + char *argv[2] = {"",rdbFileBeingLoaded}; + redis_check_rdb_main(2,argv,NULL); + } else { + serverLog(LL_WARNING, "Failure loading rdb format from socket, assuming connection error, resuming operation."); + return; + } } else { rdbCheckError("%s",msg); } + serverLog(LL_WARNING, "Terminating server after rdb file reading failure."); exit(1); } @@ -1039,6 +1046,11 @@ int rdbSaveKeyValuePair(rio *rdb, robj *key, robj *val, long long expiretime) { if (rdbSaveObjectType(rdb,val) == -1) return -1; if (rdbSaveStringObject(rdb,key) == -1) return -1; if (rdbSaveObject(rdb,val,key) == -1) return -1; + + /* Delay return if required (for testing) */ + if (server.rdb_key_save_delay) + usleep(server.rdb_key_save_delay); + return 1; } @@ -1800,18 +1812,23 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, robj *key) { /* Mark that we are loading in the global state and setup the fields * needed to provide loading stats. */ -void startLoading(FILE *fp) { - struct stat sb; - +void startLoading(size_t size) { /* Load the DB */ server.loading = 1; server.loading_start_time = time(NULL); server.loading_loaded_bytes = 0; - if (fstat(fileno(fp), &sb) == -1) { - server.loading_total_bytes = 0; - } else { - server.loading_total_bytes = sb.st_size; - } + server.loading_total_bytes = size; +} + +/* Mark that we are loading in the global state and setup the fields + * needed to provide loading stats. + * 'filename' is optional and used for rdb-check on error */ +void startLoadingFile(FILE *fp, char* filename) { + struct stat sb; + if (fstat(fileno(fp), &sb) == -1) + sb.st_size = 0; + rdbFileBeingLoaded = filename; + startLoading(sb.st_size); } /* Refresh the loading progress info */ @@ -1824,6 +1841,7 @@ void loadingProgress(off_t pos) { /* Loading finished */ void stopLoading(void) { server.loading = 0; + rdbFileBeingLoaded = NULL; } /* Track loading progress in order to serve client's from time to time @@ -2089,7 +2107,7 @@ int rdbLoad(char *filename, rdbSaveInfo *rsi) { int retval; if ((fp = fopen(filename,"r")) == NULL) return C_ERR; - startLoading(fp); + startLoadingFile(fp, filename); rioInitWithFile(&rdb,fp); retval = rdbLoadRio(&rdb,rsi,0); fclose(fp); diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index ec00ee71c..e2d71b5a5 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -202,7 +202,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { } expiretime = -1; - startLoading(fp); + startLoadingFile(fp, rdbfilename); while(1) { robj *key, *val; @@ -314,6 +314,7 @@ int redis_check_rdb(char *rdbfilename, FILE *fp) { } if (closefile) fclose(fp); + stopLoading(); return 0; eoferr: /* unexpected end of file is handled here with a fatal exit */ @@ -324,6 +325,7 @@ eoferr: /* unexpected end of file is handled here with a fatal exit */ } err: if (closefile) fclose(fp); + stopLoading(); return 1; } diff --git a/src/replication.c b/src/replication.c index 63a67a06a..e2bac08bd 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1113,11 +1113,22 @@ void restartAOFAfterSYNC() { } } +static int useDisklessLoad() { + /* compute boolean decision to use diskless load */ + return server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB || + (server.repl_diskless_load == REPL_DISKLESS_LOAD_WHEN_DB_EMPTY && dbTotalServerKeyCount()==0); +} + + /* Asynchronously read the SYNC payload we receive from a master */ #define REPL_MAX_WRITTEN_BEFORE_FSYNC (1024*1024*8) /* 8 MB */ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { char buf[4096]; ssize_t nread, readlen, nwritten; + int use_diskless_load; + redisDb *diskless_load_backup = NULL; + int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS; + int i; off_t left; UNUSED(el); UNUSED(privdata); @@ -1173,90 +1184,177 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { * at the next call. */ server.repl_transfer_size = 0; serverLog(LL_NOTICE, - "MASTER <-> REPLICA sync: receiving streamed RDB from master"); + "MASTER <-> REPLICA sync: receiving streamed RDB from master with EOF %s", + useDisklessLoad()? "to parser":"to disk"); } else { usemark = 0; server.repl_transfer_size = strtol(buf+1,NULL,10); serverLog(LL_NOTICE, - "MASTER <-> REPLICA sync: receiving %lld bytes from master", - (long long) server.repl_transfer_size); + "MASTER <-> REPLICA sync: receiving %lld bytes from master %s", + (long long) server.repl_transfer_size, + useDisklessLoad()? "to parser":"to disk"); } return; } - /* Read bulk data */ - if (usemark) { - readlen = sizeof(buf); - } else { - left = server.repl_transfer_size - server.repl_transfer_read; - readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf); - } + use_diskless_load = useDisklessLoad(); + if (!use_diskless_load) { - nread = read(fd,buf,readlen); - if (nread <= 0) { - serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s", - (nread == -1) ? strerror(errno) : "connection lost"); - cancelReplicationHandshake(); - return; - } - server.stat_net_input_bytes += nread; - - /* When a mark is used, we want to detect EOF asap in order to avoid - * writing the EOF mark into the file... */ - int eof_reached = 0; - - if (usemark) { - /* Update the last bytes array, and check if it matches our delimiter.*/ - if (nread >= CONFIG_RUN_ID_SIZE) { - memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,CONFIG_RUN_ID_SIZE); + /* read the data from the socket, store it to a file and search for the EOF */ + if (usemark) { + readlen = sizeof(buf); } else { - int rem = CONFIG_RUN_ID_SIZE-nread; - memmove(lastbytes,lastbytes+nread,rem); - memcpy(lastbytes+rem,buf,nread); + left = server.repl_transfer_size - server.repl_transfer_read; + readlen = (left < (signed)sizeof(buf)) ? left : (signed)sizeof(buf); } - if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) eof_reached = 1; - } - server.repl_transfer_lastio = server.unixtime; - if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) { - serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> REPLICA synchronization: %s", - (nwritten == -1) ? strerror(errno) : "short write"); - goto error; - } - server.repl_transfer_read += nread; + nread = read(fd,buf,readlen); + if (nread <= 0) { + serverLog(LL_WARNING,"I/O error trying to sync with MASTER: %s", + (nread == -1) ? strerror(errno) : "connection lost"); + cancelReplicationHandshake(); + return; + } + server.stat_net_input_bytes += nread; - /* Delete the last 40 bytes from the file if we reached EOF. */ - if (usemark && eof_reached) { - if (ftruncate(server.repl_transfer_fd, - server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1) - { - serverLog(LL_WARNING,"Error truncating the RDB file received from the master for SYNC: %s", strerror(errno)); + /* When a mark is used, we want to detect EOF asap in order to avoid + * writing the EOF mark into the file... */ + int eof_reached = 0; + + if (usemark) { + /* Update the last bytes array, and check if it matches our delimiter.*/ + if (nread >= CONFIG_RUN_ID_SIZE) { + memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,CONFIG_RUN_ID_SIZE); + } else { + int rem = CONFIG_RUN_ID_SIZE-nread; + memmove(lastbytes,lastbytes+nread,rem); + memcpy(lastbytes+rem,buf,nread); + } + if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) eof_reached = 1; + } + + server.repl_transfer_lastio = server.unixtime; + if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) { + serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> REPLICA synchronization: %s", + (nwritten == -1) ? strerror(errno) : "short write"); goto error; } + server.repl_transfer_read += nread; + + /* Delete the last 40 bytes from the file if we reached EOF. */ + if (usemark && eof_reached) { + if (ftruncate(server.repl_transfer_fd, + server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1) + { + serverLog(LL_WARNING,"Error truncating the RDB file received from the master for SYNC: %s", strerror(errno)); + goto error; + } + } + + /* Sync data on disk from time to time, otherwise at the end of the transfer + * we may suffer a big delay as the memory buffers are copied into the + * actual disk. */ + if (server.repl_transfer_read >= + server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC) + { + off_t sync_size = server.repl_transfer_read - + server.repl_transfer_last_fsync_off; + rdb_fsync_range(server.repl_transfer_fd, + server.repl_transfer_last_fsync_off, sync_size); + server.repl_transfer_last_fsync_off += sync_size; + } + + /* Check if the transfer is now complete */ + if (!usemark) { + if (server.repl_transfer_read == server.repl_transfer_size) + eof_reached = 1; + } + if (!eof_reached) + return; } - /* Sync data on disk from time to time, otherwise at the end of the transfer - * we may suffer a big delay as the memory buffers are copied into the - * actual disk. */ - if (server.repl_transfer_read >= - server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC) - { - off_t sync_size = server.repl_transfer_read - - server.repl_transfer_last_fsync_off; - rdb_fsync_range(server.repl_transfer_fd, - server.repl_transfer_last_fsync_off, sync_size); - server.repl_transfer_last_fsync_off += sync_size; + /* We reach here when the slave is using diskless replication, + * or when we are done reading from the socket to the rdb file. */ + serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data"); + /* We need to stop any AOFRW fork before flusing and parsing + * RDB, otherwise we'll create a copy-on-write disaster. */ + if (server.aof_state != AOF_OFF) stopAppendOnly(); + signalFlushedDb(-1); + if (use_diskless_load && server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) { + /* create a backup of the current db */ + diskless_load_backup = zmalloc(sizeof(redisDb)*server.dbnum); + for (i=0; i REPLICA sync: Loading DB in memory"); + rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; + if (use_diskless_load) { + rio rdb; + rioInitWithFd(&rdb,fd,server.repl_transfer_size); + /* Put the socket in blocking mode to simplify RDB transfer. + * We'll restore it when the RDB is received. */ + anetBlock(NULL,fd); + anetRecvTimeout(NULL,fd,server.repl_timeout*1000); - /* Check if the transfer is now complete */ - if (!usemark) { - if (server.repl_transfer_read == server.repl_transfer_size) - eof_reached = 1; - } - - if (eof_reached) { - int aof_is_enabled = server.aof_state != AOF_OFF; - + startLoading(server.repl_transfer_size); + if (rdbLoadRio(&rdb,&rsi,0) != C_OK) { + /* rdbloading failed */ + stopLoading(); + serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from socket"); + cancelReplicationHandshake(); + rioFreeFd(&rdb, NULL); + if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) { + /* restore the backed up db */ + emptyDbGeneric(server.db,-1,empty_db_flags,replicationEmptyDbCallback); + for (i=0; i REPLICA synchronization: %s", - server.rdb_filename, strerror(errno)); + server.rdb_filename, strerror(errno)); cancelReplicationHandshake(); return; } - serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data"); - /* We need to stop any AOFRW fork before flusing and parsing - * RDB, otherwise we'll create a copy-on-write disaster. */ - if(aof_is_enabled) stopAppendOnly(); - signalFlushedDb(-1); - emptyDb( - -1, - server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS, - replicationEmptyDbCallback); - /* Before loading the DB into memory we need to delete the readable - * handler, otherwise it will get called recursively since - * rdbLoad() will call the event loop to process events from time to - * time for non blocking loading. */ - aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE); - serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Loading DB in memory"); - rdbSaveInfo rsi = RDB_SAVE_INFO_INIT; if (rdbLoad(server.rdb_filename,&rsi) != C_OK) { serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); cancelReplicationHandshake(); - /* Re-enable the AOF if we disabled it earlier, in order to restore - * the original configuration. */ - if (aof_is_enabled) restartAOFAfterSYNC(); + /* Note that there's no point in restarting the AOF on sync failure, + it'll be restarted when sync succeeds or slave promoted. */ return; } - /* Final setup of the connected slave <- master link */ zfree(server.repl_transfer_tmpfile); close(server.repl_transfer_fd); - replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db); - server.repl_state = REPL_STATE_CONNECTED; - server.repl_down_since = 0; - /* After a full resynchroniziation we use the replication ID and - * offset of the master. The secondary ID / offset are cleared since - * we are starting a new history. */ - memcpy(server.replid,server.master->replid,sizeof(server.replid)); - server.master_repl_offset = server.master->reploff; - clearReplicationId2(); - /* Let's create the replication backlog if needed. Slaves need to - * accumulate the backlog regardless of the fact they have sub-slaves - * or not, in order to behave correctly if they are promoted to - * masters after a failover. */ - if (server.repl_backlog == NULL) createReplicationBacklog(); - - serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success"); - /* Restart the AOF subsystem now that we finished the sync. This - * will trigger an AOF rewrite, and when done will start appending - * to the new file. */ - if (aof_is_enabled) restartAOFAfterSYNC(); + server.repl_transfer_fd = -1; + server.repl_transfer_tmpfile = NULL; } + /* Final setup of the connected slave <- master link */ + replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db); + server.repl_state = REPL_STATE_CONNECTED; + server.repl_down_since = 0; + /* After a full resynchroniziation we use the replication ID and + * offset of the master. The secondary ID / offset are cleared since + * we are starting a new history. */ + memcpy(server.replid,server.master->replid,sizeof(server.replid)); + server.master_repl_offset = server.master->reploff; + clearReplicationId2(); + /* Let's create the replication backlog if needed. Slaves need to + * accumulate the backlog regardless of the fact they have sub-slaves + * or not, in order to behave correctly if they are promoted to + * masters after a failover. */ + if (server.repl_backlog == NULL) createReplicationBacklog(); + + serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success"); + /* Restart the AOF subsystem now that we finished the sync. This + * will trigger an AOF rewrite, and when done will start appending + * to the new file. */ + if (server.aof_enabled) restartAOFAfterSYNC(); return; error: @@ -1845,16 +1928,20 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { } /* Prepare a suitable temp file for bulk transfer */ - while(maxtries--) { - snprintf(tmpfile,256, - "temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid()); - dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644); - if (dfd != -1) break; - sleep(1); - } - if (dfd == -1) { - serverLog(LL_WARNING,"Opening the temp file needed for MASTER <-> REPLICA synchronization: %s",strerror(errno)); - goto error; + if (!useDisklessLoad()) { + while(maxtries--) { + snprintf(tmpfile,256, + "temp-%d.%ld.rdb",(int)server.unixtime,(long int)getpid()); + dfd = open(tmpfile,O_CREAT|O_WRONLY|O_EXCL,0644); + if (dfd != -1) break; + sleep(1); + } + if (dfd == -1) { + serverLog(LL_WARNING,"Opening the temp file needed for MASTER <-> REPLICA synchronization: %s",strerror(errno)); + goto error; + } + server.repl_transfer_tmpfile = zstrdup(tmpfile); + server.repl_transfer_fd = dfd; } /* Setup the non blocking download of the bulk file. */ @@ -1871,15 +1958,19 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) { server.repl_transfer_size = -1; server.repl_transfer_read = 0; server.repl_transfer_last_fsync_off = 0; - server.repl_transfer_fd = dfd; server.repl_transfer_lastio = server.unixtime; - server.repl_transfer_tmpfile = zstrdup(tmpfile); return; error: aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE); if (dfd != -1) close(dfd); close(fd); + if (server.repl_transfer_fd != -1) + close(server.repl_transfer_fd); + if (server.repl_transfer_tmpfile) + zfree(server.repl_transfer_tmpfile); + server.repl_transfer_tmpfile = NULL; + server.repl_transfer_fd = -1; server.repl_transfer_s = -1; server.repl_state = REPL_STATE_CONNECT; return; @@ -1933,9 +2024,13 @@ void undoConnectWithMaster(void) { void replicationAbortSyncTransfer(void) { serverAssert(server.repl_state == REPL_STATE_TRANSFER); undoConnectWithMaster(); - close(server.repl_transfer_fd); - unlink(server.repl_transfer_tmpfile); - zfree(server.repl_transfer_tmpfile); + if (server.repl_transfer_fd!=-1) { + close(server.repl_transfer_fd); + unlink(server.repl_transfer_tmpfile); + zfree(server.repl_transfer_tmpfile); + server.repl_transfer_tmpfile = NULL; + server.repl_transfer_fd = -1; + } } /* This function aborts a non blocking replication attempt if there is one @@ -2045,6 +2140,9 @@ void replicaofCommand(client *c) { serverLog(LL_NOTICE,"MASTER MODE enabled (user request from '%s')", client); sdsfree(client); + /* Restart the AOF subsystem in case we shut it down during a sync when + * we were still a slave. */ + if (server.aof_enabled && server.aof_state == AOF_OFF) restartAOFAfterSYNC(); } } else { long port; diff --git a/src/rio.c b/src/rio.c index c9c76b8f2..993768b56 100644 --- a/src/rio.c +++ b/src/rio.c @@ -157,6 +157,113 @@ void rioInitWithFile(rio *r, FILE *fp) { r->io.file.autosync = 0; } +/* ------------------- File descriptor implementation ------------------- */ + +static size_t rioFdWrite(rio *r, const void *buf, size_t len) { + UNUSED(r); + UNUSED(buf); + UNUSED(len); + return 0; /* Error, this target does not yet support writing. */ +} + +/* Returns 1 or 0 for success/failure. */ +static size_t rioFdRead(rio *r, void *buf, size_t len) { + size_t avail = sdslen(r->io.fd.buf)-r->io.fd.pos; + + /* if the buffer is too small for the entire request: realloc */ + if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len) + r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf)); + + /* if the remaining unused buffer is not large enough: memmove so that we can read the rest */ + if (len > avail && sdsavail(r->io.fd.buf) < len - avail) { + sdsrange(r->io.fd.buf, r->io.fd.pos, -1); + r->io.fd.pos = 0; + } + + /* if we don't already have all the data in the sds, read more */ + while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) { + size_t buffered = sdslen(r->io.fd.buf) - r->io.fd.pos; + size_t toread = len - buffered; + /* read either what's missing, or PROTO_IOBUF_LEN, the bigger of the two */ + if (toread < PROTO_IOBUF_LEN) + toread = PROTO_IOBUF_LEN; + if (toread > sdsavail(r->io.fd.buf)) + toread = sdsavail(r->io.fd.buf); + if (r->io.fd.read_limit != 0 && + r->io.fd.read_so_far + buffered + toread > r->io.fd.read_limit) { + if (r->io.fd.read_limit >= r->io.fd.read_so_far - buffered) + toread = r->io.fd.read_limit - r->io.fd.read_so_far - buffered; + else { + errno = EOVERFLOW; + return 0; + } + } + int retval = read(r->io.fd.fd, (char*)r->io.fd.buf + sdslen(r->io.fd.buf), toread); + if (retval <= 0) { + if (errno == EWOULDBLOCK) errno = ETIMEDOUT; + return 0; + } + sdsIncrLen(r->io.fd.buf, retval); + } + + memcpy(buf, (char*)r->io.fd.buf + r->io.fd.pos, len); + r->io.fd.read_so_far += len; + r->io.fd.pos += len; + return len; +} + +/* Returns read/write position in file. */ +static off_t rioFdTell(rio *r) { + return r->io.fd.read_so_far; +} + +/* Flushes any buffer to target device if applicable. Returns 1 on success + * and 0 on failures. */ +static int rioFdFlush(rio *r) { + /* Our flush is implemented by the write method, that recognizes a + * buffer set to NULL with a count of zero as a flush request. */ + return rioFdWrite(r,NULL,0); +} + +static const rio rioFdIO = { + rioFdRead, + rioFdWrite, + rioFdTell, + rioFdFlush, + NULL, /* update_checksum */ + 0, /* current checksum */ + 0, /* bytes read or written */ + 0, /* read/write chunk size */ + { { NULL, 0 } } /* union for io-specific vars */ +}; + +/* create an rio that implements a buffered read from an fd + * read_limit argument stops buffering when the reaching the limit */ +void rioInitWithFd(rio *r, int fd, size_t read_limit) { + *r = rioFdIO; + r->io.fd.fd = fd; + r->io.fd.pos = 0; + r->io.fd.read_limit = read_limit; + r->io.fd.read_so_far = 0; + r->io.fd.buf = sdsnewlen(NULL, PROTO_IOBUF_LEN); + sdsclear(r->io.fd.buf); +} + +/* release the rio stream. + * optionally returns the unread buffered data. */ +void rioFreeFd(rio *r, sds* out_remainingBufferedData) { + if(out_remainingBufferedData && (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) { + if (r->io.fd.pos > 0) + sdsrange(r->io.fd.buf, r->io.fd.pos, -1); + *out_remainingBufferedData = r->io.fd.buf; + } else { + sdsfree(r->io.fd.buf); + if (out_remainingBufferedData) + *out_remainingBufferedData = NULL; + } + r->io.fd.buf = NULL; +} + /* ------------------- File descriptors set implementation ------------------- */ /* Returns 1 or 0 for success/failure. @@ -300,7 +407,7 @@ void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len) { * disk I/O concentrated in very little time. When we fsync in an explicit * way instead the I/O pressure is more distributed across time. */ void rioSetAutoSync(rio *r, off_t bytes) { - serverAssert(r->read == rioFileIO.read); + if(r->write != rioFileIO.write) return; r->io.file.autosync = bytes; } diff --git a/src/rio.h b/src/rio.h index c996c54f6..beea06888 100644 --- a/src/rio.h +++ b/src/rio.h @@ -73,6 +73,14 @@ struct _rio { off_t buffered; /* Bytes written since last fsync. */ off_t autosync; /* fsync after 'autosync' bytes written. */ } file; + /* file descriptor */ + struct { + int fd; /* File descriptor. */ + off_t pos; /* pos in buf that was returned */ + sds buf; /* buffered data */ + size_t read_limit; /* don't allow to buffer/read more than that */ + size_t read_so_far; /* amount of data read from the rio (not buffered) */ + } fd; /* Multiple FDs target (used to write to N sockets). */ struct { int *fds; /* File descriptors. */ @@ -126,9 +134,11 @@ static inline int rioFlush(rio *r) { void rioInitWithFile(rio *r, FILE *fp); void rioInitWithBuffer(rio *r, sds s); +void rioInitWithFd(rio *r, int fd, size_t read_limit); void rioInitWithFdset(rio *r, int *fds, int numfds); void rioFreeFdset(rio *r); +void rioFreeFd(rio *r, sds* out_remainingBufferedData); size_t rioWriteBulkCount(rio *r, char prefix, long count); size_t rioWriteBulkString(rio *r, const char *buf, size_t len); diff --git a/src/server.c b/src/server.c index 78b8d8f1b..8ed5b591c 100644 --- a/src/server.c +++ b/src/server.c @@ -2265,6 +2265,7 @@ void initServerConfig(void) { server.aof_flush_postponed_start = 0; server.aof_rewrite_incremental_fsync = CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC; server.rdb_save_incremental_fsync = CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC; + server.rdb_key_save_delay = CONFIG_DEFAULT_RDB_KEY_SAVE_DELAY; server.aof_load_truncated = CONFIG_DEFAULT_AOF_LOAD_TRUNCATED; server.aof_use_rdb_preamble = CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE; server.pidfile = NULL; @@ -2334,6 +2335,9 @@ void initServerConfig(void) { server.cached_master = NULL; server.master_initial_offset = -1; server.repl_state = REPL_STATE_NONE; + server.repl_transfer_tmpfile = NULL; + server.repl_transfer_fd = -1; + server.repl_transfer_s = -1; server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT; server.repl_serve_stale_data = CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA; server.repl_slave_ro = CONFIG_DEFAULT_SLAVE_READ_ONLY; @@ -2342,6 +2346,7 @@ void initServerConfig(void) { server.repl_down_since = 0; /* Never connected, repl is down since EVER. */ server.repl_disable_tcp_nodelay = CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY; server.repl_diskless_sync = CONFIG_DEFAULT_REPL_DISKLESS_SYNC; + server.repl_diskless_load = CONFIG_DEFAULT_REPL_DISKLESS_LOAD; server.repl_diskless_sync_delay = CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY; server.repl_ping_slave_period = CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD; server.repl_timeout = CONFIG_DEFAULT_REPL_TIMEOUT; @@ -4053,7 +4058,7 @@ sds genRedisInfoString(char *section) { (server.aof_last_write_status == C_OK) ? "ok" : "err", server.stat_aof_cow_bytes); - if (server.aof_state != AOF_OFF) { + if (server.aof_enabled) { info = sdscatprintf(info, "aof_current_size:%lld\r\n" "aof_base_size:%lld\r\n" diff --git a/src/server.h b/src/server.h index 8686994f6..f81b1010e 100644 --- a/src/server.h +++ b/src/server.h @@ -132,6 +132,7 @@ typedef long long mstime_t; /* millisecond time type. */ #define CONFIG_DEFAULT_RDB_FILENAME "dump.rdb" #define CONFIG_DEFAULT_REPL_DISKLESS_SYNC 0 #define CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY 5 +#define CONFIG_DEFAULT_RDB_KEY_SAVE_DELAY 0 #define CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA 1 #define CONFIG_DEFAULT_SLAVE_READ_ONLY 1 #define CONFIG_DEFAULT_SLAVE_IGNORE_MAXMEMORY 1 @@ -394,6 +395,12 @@ typedef long long mstime_t; /* millisecond time type. */ #define AOF_FSYNC_EVERYSEC 2 #define CONFIG_DEFAULT_AOF_FSYNC AOF_FSYNC_EVERYSEC +/* Replication diskless load defines */ +#define REPL_DISKLESS_LOAD_DISABLED 0 +#define REPL_DISKLESS_LOAD_WHEN_DB_EMPTY 1 +#define REPL_DISKLESS_LOAD_SWAPDB 2 +#define CONFIG_DEFAULT_REPL_DISKLESS_LOAD REPL_DISKLESS_LOAD_DISABLED + /* Zipped structures related defaults */ #define OBJ_HASH_MAX_ZIPLIST_ENTRIES 512 #define OBJ_HASH_MAX_ZIPLIST_VALUE 64 @@ -1158,6 +1165,7 @@ struct redisServer { int daemonize; /* True if running as a daemon */ clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT]; /* AOF persistence */ + int aof_enabled; /* AOF configuration */ int aof_state; /* AOF_(ON|OFF|WAIT_REWRITE) */ int aof_fsync; /* Kind of fsync() policy */ char *aof_filename; /* Name of the AOF file */ @@ -1214,6 +1222,8 @@ struct redisServer { int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */ int rdb_pipe_write_result_to_parent; /* RDB pipes used to return the state */ int rdb_pipe_read_result_from_child; /* of each slave in diskless SYNC. */ + int rdb_key_save_delay; /* Delay in microseconds between keys while + * writing the RDB. (for testings) */ /* Pipe and data structures for child -> parent info sharing. */ int child_info_pipe[2]; /* Pipe used to write the child_info_data. */ struct { @@ -1249,7 +1259,9 @@ struct redisServer { int repl_min_slaves_to_write; /* Min number of slaves to write. */ int repl_min_slaves_max_lag; /* Max lag of slaves to write. */ int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */ - int repl_diskless_sync; /* Send RDB to slaves sockets directly. */ + int repl_diskless_sync; /* Master send RDB to slaves sockets directly. */ + int repl_diskless_load; /* Slave parse RDB directly from the socket. + * see REPL_DISKLESS_LOAD_* enum */ int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */ /* Replication (slave) */ char *masteruser; /* AUTH with this user and masterauth with master */ @@ -1739,7 +1751,8 @@ void replicationCacheMasterUsingMyself(void); void feedReplicationBacklog(void *ptr, size_t len); /* Generic persistence functions */ -void startLoading(FILE *fp); +void startLoadingFile(FILE* fp, char* filename); +void startLoading(size_t size); void loadingProgress(off_t pos); void stopLoading(void); @@ -1996,6 +2009,8 @@ robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o); #define EMPTYDB_NO_FLAGS 0 /* No flags. */ #define EMPTYDB_ASYNC (1<<0) /* Reclaim memory in another thread. */ long long emptyDb(int dbnum, int flags, void(callback)(void*)); +long long emptyDbGeneric(redisDb *dbarray, int dbnum, int flags, void(callback)(void*)); +long long dbTotalServerKeyCount(); int selectDb(client *c, int id); void signalModifiedKey(redisDb *db, robj *key); diff --git a/tests/integration/replication-4.tcl b/tests/integration/replication-4.tcl index 3c6df52a8..54891151b 100644 --- a/tests/integration/replication-4.tcl +++ b/tests/integration/replication-4.tcl @@ -1,12 +1,3 @@ -proc start_bg_complex_data {host port db ops} { - set tclsh [info nameofexecutable] - exec $tclsh tests/helpers/bg_complex_data.tcl $host $port $db $ops & -} - -proc stop_bg_complex_data {handle} { - catch {exec /bin/kill -9 $handle} -} - start_server {tags {"repl"}} { start_server {} { diff --git a/tests/integration/replication-psync.tcl b/tests/integration/replication-psync.tcl index bf8682446..3c98723af 100644 --- a/tests/integration/replication-psync.tcl +++ b/tests/integration/replication-psync.tcl @@ -1,12 +1,3 @@ -proc start_bg_complex_data {host port db ops} { - set tclsh [info nameofexecutable] - exec $tclsh tests/helpers/bg_complex_data.tcl $host $port $db $ops & -} - -proc stop_bg_complex_data {handle} { - catch {exec /bin/kill -9 $handle} -} - # Creates a master-slave pair and breaks the link continuously to force # partial resyncs attempts, all this while flooding the master with # write queries. @@ -17,7 +8,7 @@ proc stop_bg_complex_data {handle} { # If reconnect is > 0, the test actually try to break the connection and # reconnect with the master, otherwise just the initial synchronization is # checked for consistency. -proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless reconnect} { +proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect} { start_server {tags {"repl"}} { start_server {} { @@ -28,8 +19,9 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec $master config set repl-backlog-size $backlog_size $master config set repl-backlog-ttl $backlog_ttl - $master config set repl-diskless-sync $diskless + $master config set repl-diskless-sync $mdl $master config set repl-diskless-sync-delay 1 + $slave config set repl-diskless-load $sdl set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000] set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000] @@ -54,7 +46,7 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec } } - test "Test replication partial resync: $descr (diskless: $diskless, reconnect: $reconnect)" { + test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect)" { # Now while the clients are writing data, break the maste-slave # link multiple times. if ($reconnect) { @@ -132,23 +124,25 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless rec } } -foreach diskless {no yes} { - test_psync {no reconnection, just sync} 6 1000000 3600 0 { - } $diskless 0 +foreach mdl {no yes} { + foreach sdl {disabled swapdb} { + test_psync {no reconnection, just sync} 6 1000000 3600 0 { + } $mdl $sdl 0 - test_psync {ok psync} 6 100000000 3600 0 { + test_psync {ok psync} 6 100000000 3600 0 { assert {[s -1 sync_partial_ok] > 0} - } $diskless 1 + } $mdl $sdl 1 - test_psync {no backlog} 6 100 3600 0.5 { + test_psync {no backlog} 6 100 3600 0.5 { assert {[s -1 sync_partial_err] > 0} - } $diskless 1 + } $mdl $sdl 1 - test_psync {ok after delay} 3 100000000 3600 3 { + test_psync {ok after delay} 3 100000000 3600 3 { assert {[s -1 sync_partial_ok] > 0} - } $diskless 1 + } $mdl $sdl 1 - test_psync {backlog expired} 3 100000000 1 3 { + test_psync {backlog expired} 3 100000000 1 3 { assert {[s -1 sync_partial_err] > 0} - } $diskless 1 + } $mdl $sdl 1 + } } diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 0e50c20a9..d69a1761a 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -183,85 +183,92 @@ start_server {tags {"repl"}} { } } -foreach dl {no yes} { - start_server {tags {"repl"}} { - set master [srv 0 client] - $master config set repl-diskless-sync $dl - set master_host [srv 0 host] - set master_port [srv 0 port] - set slaves {} - set load_handle0 [start_write_load $master_host $master_port 3] - set load_handle1 [start_write_load $master_host $master_port 5] - set load_handle2 [start_write_load $master_host $master_port 20] - set load_handle3 [start_write_load $master_host $master_port 8] - set load_handle4 [start_write_load $master_host $master_port 4] - start_server {} { - lappend slaves [srv 0 client] +foreach mdl {no yes} { + foreach sdl {disabled swapdb} { + start_server {tags {"repl"}} { + set master [srv 0 client] + $master config set repl-diskless-sync $mdl + $master config set repl-diskless-sync-delay 1 + set master_host [srv 0 host] + set master_port [srv 0 port] + set slaves {} + set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000] + set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000] + set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000000] + set load_handle3 [start_write_load $master_host $master_port 8] + set load_handle4 [start_write_load $master_host $master_port 4] + after 5000 ;# wait for some data to accumulate so that we have RDB part for the fork start_server {} { lappend slaves [srv 0 client] start_server {} { lappend slaves [srv 0 client] - test "Connect multiple replicas at the same time (issue #141), diskless=$dl" { - # Send SLAVEOF commands to slaves - [lindex $slaves 0] slaveof $master_host $master_port - [lindex $slaves 1] slaveof $master_host $master_port - [lindex $slaves 2] slaveof $master_host $master_port + start_server {} { + lappend slaves [srv 0 client] + test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl" { + # Send SLAVEOF commands to slaves + [lindex $slaves 0] config set repl-diskless-load $sdl + [lindex $slaves 1] config set repl-diskless-load $sdl + [lindex $slaves 2] config set repl-diskless-load $sdl + [lindex $slaves 0] slaveof $master_host $master_port + [lindex $slaves 1] slaveof $master_host $master_port + [lindex $slaves 2] slaveof $master_host $master_port - # Wait for all the three slaves to reach the "online" - # state from the POV of the master. - set retry 500 - while {$retry} { - set info [r -3 info] - if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} { - break - } else { - incr retry -1 - after 100 + # Wait for all the three slaves to reach the "online" + # state from the POV of the master. + set retry 500 + while {$retry} { + set info [r -3 info] + if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} { + break + } else { + incr retry -1 + after 100 + } + } + if {$retry == 0} { + error "assertion:Slaves not correctly synchronized" } - } - if {$retry == 0} { - error "assertion:Replicas not correctly synchronized" - } - # Wait that slaves acknowledge they are online so - # we are sure that DBSIZE and DEBUG DIGEST will not - # fail because of timing issues. - wait_for_condition 500 100 { - [lindex [[lindex $slaves 0] role] 3] eq {connected} && - [lindex [[lindex $slaves 1] role] 3] eq {connected} && - [lindex [[lindex $slaves 2] role] 3] eq {connected} - } else { - fail "Replicas still not connected after some time" + # Wait that slaves acknowledge they are online so + # we are sure that DBSIZE and DEBUG DIGEST will not + # fail because of timing issues. + wait_for_condition 500 100 { + [lindex [[lindex $slaves 0] role] 3] eq {connected} && + [lindex [[lindex $slaves 1] role] 3] eq {connected} && + [lindex [[lindex $slaves 2] role] 3] eq {connected} + } else { + fail "Slaves still not connected after some time" + } + + # Stop the write load + stop_bg_complex_data $load_handle0 + stop_bg_complex_data $load_handle1 + stop_bg_complex_data $load_handle2 + stop_write_load $load_handle3 + stop_write_load $load_handle4 + + # Make sure that slaves and master have same + # number of keys + wait_for_condition 500 100 { + [$master dbsize] == [[lindex $slaves 0] dbsize] && + [$master dbsize] == [[lindex $slaves 1] dbsize] && + [$master dbsize] == [[lindex $slaves 2] dbsize] + } else { + fail "Different number of keys between master and replica after too long time." + } + + # Check digests + set digest [$master debug digest] + set digest0 [[lindex $slaves 0] debug digest] + set digest1 [[lindex $slaves 1] debug digest] + set digest2 [[lindex $slaves 2] debug digest] + assert {$digest ne 0000000000000000000000000000000000000000} + assert {$digest eq $digest0} + assert {$digest eq $digest1} + assert {$digest eq $digest2} } - - # Stop the write load - stop_write_load $load_handle0 - stop_write_load $load_handle1 - stop_write_load $load_handle2 - stop_write_load $load_handle3 - stop_write_load $load_handle4 - - # Make sure that slaves and master have same - # number of keys - wait_for_condition 500 100 { - [$master dbsize] == [[lindex $slaves 0] dbsize] && - [$master dbsize] == [[lindex $slaves 1] dbsize] && - [$master dbsize] == [[lindex $slaves 2] dbsize] - } else { - fail "Different number of keys between masted and replica after too long time." - } - - # Check digests - set digest [$master debug digest] - set digest0 [[lindex $slaves 0] debug digest] - set digest1 [[lindex $slaves 1] debug digest] - set digest2 [[lindex $slaves 2] debug digest] - assert {$digest ne 0000000000000000000000000000000000000000} - assert {$digest eq $digest0} - assert {$digest eq $digest1} - assert {$digest eq $digest2} - } - } + } + } } } } @@ -309,3 +316,70 @@ start_server {tags {"repl"}} { } } } + +test {slave fails full sync and diskless load swapdb recoveres it} { + start_server {tags {"repl"}} { + set slave [srv 0 client] + set slave_host [srv 0 host] + set slave_port [srv 0 port] + set slave_log [srv 0 stdout] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + # Put different data sets on the master and slave + # we need to put large keys on the master since the slave replies to info only once in 2mb + $slave debug populate 2000 slave 10 + $master debug populate 200 master 100000 + $master config set rdbcompression no + + # Set master and slave to use diskless replication + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + $slave config set repl-diskless-load swapdb + + # Set master with a slow rdb generation, so that we can easily disconnect it mid sync + # 10ms per key, with 200 keys is 2 seconds + $master config set rdb-key-save-delay 10000 + + # Start the replication process... + $slave slaveof $master_host $master_port + + # wait for the slave to start reading the rdb + wait_for_condition 50 100 { + [s -1 loading] eq 1 + } else { + fail "Replica didn't get into loading mode" + } + + # make sure that next sync will not start immediately so that we can catch the slave in betweeen syncs + $master config set repl-diskless-sync-delay 5 + # for faster server shutdown, make rdb saving fast again (the fork is already uses the slow one) + $master config set rdb-key-save-delay 0 + + # waiting slave to do flushdb (key count drop) + wait_for_condition 50 100 { + 2000 != [scan [regexp -inline {keys\=([\d]*)} [$slave info keyspace]] keys=%d] + } else { + fail "Replica didn't flush" + } + + # make sure we're still loading + assert_equal [s -1 loading] 1 + + # kill the slave connection on the master + set killed [$master client kill type slave] + + # wait for loading to stop (fail) + wait_for_condition 50 100 { + [s -1 loading] eq 0 + } else { + fail "Replica didn't disconnect" + } + + # make sure the original keys were restored + assert_equal [$slave dbsize] 2000 + } + } +} diff --git a/tests/support/util.tcl b/tests/support/util.tcl index 74f491e48..41cc5612a 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -399,3 +399,15 @@ proc lshuffle {list} { } return $slist } + +# Execute a background process writing complex data for the specified number +# of ops to the specified Redis instance. +proc start_bg_complex_data {host port db ops} { + set tclsh [info nameofexecutable] + exec $tclsh tests/helpers/bg_complex_data.tcl $host $port $db $ops & +} + +# Stop a process generating write load executed with start_bg_complex_data. +proc stop_bg_complex_data {handle} { + catch {exec /bin/kill -9 $handle} +} From 6e9ffa93ebdb8da02401c72dde0c5dd0e3cdb48d Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Jul 2019 18:32:47 +0200 Subject: [PATCH 119/304] Diskless replica: a few aesthetic changes to replication.c. --- src/replication.c | 126 ++++++++++++++++++++++++++++++++-------------- src/rio.c | 12 +++-- 2 files changed, 96 insertions(+), 42 deletions(-) diff --git a/src/replication.c b/src/replication.c index e2bac08bd..a7c1c0d6a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1127,7 +1127,8 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { ssize_t nread, readlen, nwritten; int use_diskless_load; redisDb *diskless_load_backup = NULL; - int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS; + int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : + EMPTYDB_NO_FLAGS; int i; off_t left; UNUSED(el); @@ -1199,8 +1200,8 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { use_diskless_load = useDisklessLoad(); if (!use_diskless_load) { - - /* read the data from the socket, store it to a file and search for the EOF */ + /* Read the data from the socket, store it to a file and search + * for the EOF. */ if (usemark) { readlen = sizeof(buf); } else { @@ -1222,20 +1223,28 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { int eof_reached = 0; if (usemark) { - /* Update the last bytes array, and check if it matches our delimiter.*/ + /* Update the last bytes array, and check if it matches our + * delimiter. */ if (nread >= CONFIG_RUN_ID_SIZE) { - memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,CONFIG_RUN_ID_SIZE); + memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE, + CONFIG_RUN_ID_SIZE); } else { int rem = CONFIG_RUN_ID_SIZE-nread; memmove(lastbytes,lastbytes+nread,rem); memcpy(lastbytes+rem,buf,nread); } - if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) eof_reached = 1; + if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) + eof_reached = 1; } + /* Update the last I/O time for the replication transfer (used in + * order to detect timeouts during replication), and write what we + * got from the socket to the dump file on disk. */ server.repl_transfer_lastio = server.unixtime; if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) { - serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> REPLICA synchronization: %s", + serverLog(LL_WARNING, + "Write error or short write writing to the DB dump file " + "needed for MASTER <-> REPLICA synchronization: %s", (nwritten == -1) ? strerror(errno) : "short write"); goto error; } @@ -1246,14 +1255,16 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { if (ftruncate(server.repl_transfer_fd, server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1) { - serverLog(LL_WARNING,"Error truncating the RDB file received from the master for SYNC: %s", strerror(errno)); + serverLog(LL_WARNING, + "Error truncating the RDB file received from the master " + "for SYNC: %s", strerror(errno)); goto error; } } - /* Sync data on disk from time to time, otherwise at the end of the transfer - * we may suffer a big delay as the memory buffers are copied into the - * actual disk. */ + /* Sync data on disk from time to time, otherwise at the end of the + * transfer we may suffer a big delay as the memory buffers are copied + * into the actual disk. */ if (server.repl_transfer_read >= server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC) { @@ -1269,19 +1280,34 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { if (server.repl_transfer_read == server.repl_transfer_size) eof_reached = 1; } - if (!eof_reached) - return; + + /* If the transfer is yet not complete, we need to read more, so + * return ASAP and wait for the handler to be called again. */ + if (!eof_reached) return; } - /* We reach here when the slave is using diskless replication, - * or when we are done reading from the socket to the rdb file. */ + /* We reach this point in one of the following cases: + * + * 1. The replica is using diskless replication, that is, it reads data + * directly from the socket to the Redis memory, without using + * a temporary RDB file on disk. In that case we just block and + * read everything from the socket. + * + * 2. Or when we are done reading from the socket to the RDB file, in + * such case we want just to read the RDB file in memory. */ serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data"); - /* We need to stop any AOFRW fork before flusing and parsing - * RDB, otherwise we'll create a copy-on-write disaster. */ + + /* We need to stop any AOF rewriting child before flusing and parsing + * the RDB, otherwise we'll create a copy-on-write disaster. */ if (server.aof_state != AOF_OFF) stopAppendOnly(); signalFlushedDb(-1); - if (use_diskless_load && server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) { - /* create a backup of the current db */ + + /* When diskless RDB loading is used by replicas, it may be configured + * in order to save the current DB instead of throwing it away, + * so that we can restore it in case of failed transfer. */ + if (use_diskless_load && + server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) + { diskless_load_backup = zmalloc(sizeof(redisDb)*server.dbnum); for (i=0; i REPLICA synchronization: %s", + serverLog(LL_WARNING, + "Failed trying to rename the temp DB into %s in " + "MASTER <-> REPLICA synchronization: %s", server.rdb_filename, strerror(errno)); cancelReplicationHandshake(); return; } if (rdbLoad(server.rdb_filename,&rsi) != C_OK) { - serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); + serverLog(LL_WARNING, + "Failed trying to load the MASTER synchronization " + "DB from disk"); cancelReplicationHandshake(); /* Note that there's no point in restarting the AOF on sync failure, - it'll be restarted when sync succeeds or slave promoted. */ + it'll be restarted when sync succeeds or replica promoted. */ return; } + + /* Cleanup. */ zfree(server.repl_transfer_tmpfile); close(server.repl_transfer_fd); server.repl_transfer_fd = -1; server.repl_transfer_tmpfile = NULL; } + /* Final setup of the connected slave <- master link */ replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db); server.repl_state = REPL_STATE_CONNECTED; server.repl_down_since = 0; + /* After a full resynchroniziation we use the replication ID and * offset of the master. The secondary ID / offset are cleared since * we are starting a new history. */ memcpy(server.replid,server.master->replid,sizeof(server.replid)); server.master_repl_offset = server.master->reploff; clearReplicationId2(); + /* Let's create the replication backlog if needed. Slaves need to * accumulate the backlog regardless of the fact they have sub-slaves * or not, in order to behave correctly if they are promoted to * masters after a failover. */ if (server.repl_backlog == NULL) createReplicationBacklog(); - serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success"); + /* Restart the AOF subsystem now that we finished the sync. This * will trigger an AOF rewrite, and when done will start appending * to the new file. */ diff --git a/src/rio.c b/src/rio.c index 993768b56..9327c17a8 100644 --- a/src/rio.c +++ b/src/rio.c @@ -173,13 +173,13 @@ static size_t rioFdRead(rio *r, void *buf, size_t len) { /* if the buffer is too small for the entire request: realloc */ if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len) r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf)); - + /* if the remaining unused buffer is not large enough: memmove so that we can read the rest */ if (len > avail && sdsavail(r->io.fd.buf) < len - avail) { sdsrange(r->io.fd.buf, r->io.fd.pos, -1); r->io.fd.pos = 0; } - + /* if we don't already have all the data in the sds, read more */ while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) { size_t buffered = sdslen(r->io.fd.buf) - r->io.fd.pos; @@ -251,8 +251,10 @@ void rioInitWithFd(rio *r, int fd, size_t read_limit) { /* release the rio stream. * optionally returns the unread buffered data. */ -void rioFreeFd(rio *r, sds* out_remainingBufferedData) { - if(out_remainingBufferedData && (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) { +void rioFreeFd(rio *r, sds *out_remainingBufferedData) { + if (out_remainingBufferedData && + (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) + { if (r->io.fd.pos > 0) sdsrange(r->io.fd.buf, r->io.fd.pos, -1); *out_remainingBufferedData = r->io.fd.buf; @@ -264,7 +266,7 @@ void rioFreeFd(rio *r, sds* out_remainingBufferedData) { r->io.fd.buf = NULL; } -/* ------------------- File descriptors set implementation ------------------- */ +/* ------------------- File descriptors set implementation ------------------ */ /* Returns 1 or 0 for success/failure. * The function returns success as long as we are able to correctly write From 65e463d7a15fd0e7bddc357323a99b55f7b786fc Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Jul 2019 18:39:59 +0200 Subject: [PATCH 120/304] Diskless replica: a few aesthetic changes to rio.c --- src/rio.c | 57 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/src/rio.c b/src/rio.c index 9327c17a8..1a53992a2 100644 --- a/src/rio.c +++ b/src/rio.c @@ -157,7 +157,11 @@ void rioInitWithFile(rio *r, FILE *fp) { r->io.file.autosync = 0; } -/* ------------------- File descriptor implementation ------------------- */ +/* ------------------- File descriptor implementation ------------------- + * We use this RIO implemetnation when reading an RDB file directly from + * the socket to the memory via rdbLoadRio(), thus this implementation + * only implements reading from a file descriptor that is, normally, + * just a socket. */ static size_t rioFdWrite(rio *r, const void *buf, size_t len) { UNUSED(r); @@ -170,27 +174,28 @@ static size_t rioFdWrite(rio *r, const void *buf, size_t len) { static size_t rioFdRead(rio *r, void *buf, size_t len) { size_t avail = sdslen(r->io.fd.buf)-r->io.fd.pos; - /* if the buffer is too small for the entire request: realloc */ + /* If the buffer is too small for the entire request: realloc. */ if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len) r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf)); - /* if the remaining unused buffer is not large enough: memmove so that we can read the rest */ + /* If the remaining unused buffer is not large enough: memmove so that we + * can read the rest. */ if (len > avail && sdsavail(r->io.fd.buf) < len - avail) { sdsrange(r->io.fd.buf, r->io.fd.pos, -1); r->io.fd.pos = 0; } - /* if we don't already have all the data in the sds, read more */ + /* If we don't already have all the data in the sds, read more */ while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) { size_t buffered = sdslen(r->io.fd.buf) - r->io.fd.pos; size_t toread = len - buffered; - /* read either what's missing, or PROTO_IOBUF_LEN, the bigger of the two */ - if (toread < PROTO_IOBUF_LEN) - toread = PROTO_IOBUF_LEN; - if (toread > sdsavail(r->io.fd.buf)) - toread = sdsavail(r->io.fd.buf); + /* Read either what's missing, or PROTO_IOBUF_LEN, the bigger of + * the two. */ + if (toread < PROTO_IOBUF_LEN) toread = PROTO_IOBUF_LEN; + if (toread > sdsavail(r->io.fd.buf)) toread = sdsavail(r->io.fd.buf); if (r->io.fd.read_limit != 0 && - r->io.fd.read_so_far + buffered + toread > r->io.fd.read_limit) { + r->io.fd.read_so_far + buffered + toread > r->io.fd.read_limit) + { if (r->io.fd.read_limit >= r->io.fd.read_so_far - buffered) toread = r->io.fd.read_limit - r->io.fd.read_so_far - buffered; else { @@ -198,7 +203,9 @@ static size_t rioFdRead(rio *r, void *buf, size_t len) { return 0; } } - int retval = read(r->io.fd.fd, (char*)r->io.fd.buf + sdslen(r->io.fd.buf), toread); + int retval = read(r->io.fd.fd, + (char*)r->io.fd.buf + sdslen(r->io.fd.buf), + toread); if (retval <= 0) { if (errno == EWOULDBLOCK) errno = ETIMEDOUT; return 0; @@ -237,8 +244,8 @@ static const rio rioFdIO = { { { NULL, 0 } } /* union for io-specific vars */ }; -/* create an rio that implements a buffered read from an fd - * read_limit argument stops buffering when the reaching the limit */ +/* Create an RIO that implements a buffered read from an fd + * read_limit argument stops buffering when the reaching the limit. */ void rioInitWithFd(rio *r, int fd, size_t read_limit) { *r = rioFdIO; r->io.fd.fd = fd; @@ -249,24 +256,24 @@ void rioInitWithFd(rio *r, int fd, size_t read_limit) { sdsclear(r->io.fd.buf); } -/* release the rio stream. - * optionally returns the unread buffered data. */ -void rioFreeFd(rio *r, sds *out_remainingBufferedData) { - if (out_remainingBufferedData && - (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) - { - if (r->io.fd.pos > 0) - sdsrange(r->io.fd.buf, r->io.fd.pos, -1); - *out_remainingBufferedData = r->io.fd.buf; +/* Release the RIO tream. Optionally returns the unread buffered data + * when the SDS pointer 'remaining' is passed. */ +void rioFreeFd(rio *r, sds *remaining) { + if (remaining && (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) { + if (r->io.fd.pos > 0) sdsrange(r->io.fd.buf, r->io.fd.pos, -1); + *remaining = r->io.fd.buf; } else { sdsfree(r->io.fd.buf); - if (out_remainingBufferedData) - *out_remainingBufferedData = NULL; + if (out_remainingBufferedData) *remaining = NULL; } r->io.fd.buf = NULL; } -/* ------------------- File descriptors set implementation ------------------ */ +/* ------------------- File descriptors set implementation ------------------ + * This target is used to write the RDB file to N different replicas via + * sockets, when the master just streams the data to the replicas without + * creating an RDB on-disk image (diskless replication option). + * It only implements writes. */ /* Returns 1 or 0 for success/failure. * The function returns success as long as we are able to correctly write From 23063bf23e3206bc947d7048b513a453a8277c6f Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 Jul 2019 09:34:21 +0200 Subject: [PATCH 121/304] Diskless replica: fix mispelled var name. --- src/rio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rio.c b/src/rio.c index 1a53992a2..5359bc3d6 100644 --- a/src/rio.c +++ b/src/rio.c @@ -264,7 +264,7 @@ void rioFreeFd(rio *r, sds *remaining) { *remaining = r->io.fd.buf; } else { sdsfree(r->io.fd.buf); - if (out_remainingBufferedData) *remaining = NULL; + if (remaining) *remaining = NULL; } r->io.fd.buf = NULL; } From 26c4e8a9656a9aac1ed502f02334b627e59f1a4b Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 Jul 2019 11:42:26 +0200 Subject: [PATCH 122/304] Diskless replica: refactoring of DBs backups. --- src/replication.c | 69 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/src/replication.c b/src/replication.c index a7c1c0d6a..a89552a8d 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1119,6 +1119,49 @@ static int useDisklessLoad() { (server.repl_diskless_load == REPL_DISKLESS_LOAD_WHEN_DB_EMPTY && dbTotalServerKeyCount()==0); } +/* Helper function for readSyncBulkPayload() to make backups of the current + * DBs before socket-loading the new ones. The backups may be restored later + * or freed by disklessLoadRestoreBackups(). */ +redisDb *disklessLoadMakeBackups(void) { + redisDb *backups = zmalloc(sizeof(redisDb)*server.dbnum); + for (int i=0; i Date: Wed, 10 Jul 2019 12:36:14 +0200 Subject: [PATCH 123/304] Diskless replica: fix disklessLoadRestoreBackups() bug. --- src/replication.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/replication.c b/src/replication.c index a89552a8d..26e7cf8f0 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1138,16 +1138,16 @@ redisDb *disklessLoadMakeBackups(void) { * * If the socket loading went wrong, we want to restore the old backups * into the server databases. This function does just that in the case - * the 'count' argument (the number of DBs to replace) is non-zero. + * the 'restore' argument (the number of DBs to replace) is non-zero. * * When instead the loading succeeded we want just to free our old backups, - * in that case the funciton will do just that when 'count' is 0. */ -void disklessLoadRestoreBackups(redisDb *backup, int count, int empty_db_flags) + * in that case the funciton will do just that when 'restore' is 0. */ +void disklessLoadRestoreBackups(redisDb *backup, int restore, int empty_db_flags) { - if (count) { + if (restore) { /* Restore. */ emptyDbGeneric(server.db,-1,empty_db_flags,replicationEmptyDbCallback); - for (int i=0; i Date: Wed, 10 Jul 2019 18:08:31 +0200 Subject: [PATCH 124/304] Client side caching: add tracking clients in INFO. --- src/server.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/server.c b/src/server.c index 8ed5b591c..4337b8f01 100644 --- a/src/server.c +++ b/src/server.c @@ -3895,10 +3895,12 @@ sds genRedisInfoString(char *section) { "connected_clients:%lu\r\n" "client_recent_max_input_buffer:%zu\r\n" "client_recent_max_output_buffer:%zu\r\n" - "blocked_clients:%d\r\n", + "blocked_clients:%d\r\n" + "tracking_clients:%d\r\n", listLength(server.clients)-listLength(server.slaves), maxin, maxout, - server.blocked_clients); + server.blocked_clients, + server.tracking_clients); } /* Memory */ From c3d91f5d8b9e5207f984080507b146435fdd172a Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 10 Jul 2019 18:17:07 +0200 Subject: [PATCH 125/304] Client side caching: implement CLIENT GETREDIR. This subcommand may simplify the writing of Redis client libraries using the tracking feature and/or improve observability and debugging capabilities. --- src/networking.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/networking.c b/src/networking.c index 716b35859..1a8e3530a 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1954,20 +1954,21 @@ void clientCommand(client *c) { if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) { const char *help[] = { -"id -- Return the ID of the current connection.", -"getname -- Return the name of the current connection.", -"kill -- Kill connection made from .", -"kill