diff --git a/redis.conf b/redis.conf index 0ec3321a5..39e21b5e7 100644 --- a/redis.conf +++ b/redis.conf @@ -813,11 +813,11 @@ replica-priority 100 # MAXMEMORY POLICY: how Redis will select what to remove when maxmemory # is reached. You can select among five behaviors: # -# volatile-lru -> Evict using approximated LRU among the keys with an expire set. +# volatile-lru -> Evict using approximated LRU, only keys with an expire set. # allkeys-lru -> Evict any key using approximated LRU. -# volatile-lfu -> Evict using approximated LFU among the keys with an expire set. +# volatile-lfu -> Evict using approximated LFU, only keys with an expire set. # allkeys-lfu -> Evict any key using approximated LFU. -# volatile-random -> Remove a random key among the ones with an expire set. +# volatile-random -> Remove a random key having an expire set. # allkeys-random -> Remove a random key, any key. # volatile-ttl -> Remove the key with the nearest expire time (minor TTL) # noeviction -> Don't evict anything, just return an error on write operations. @@ -872,6 +872,23 @@ replica-priority 100 # # replica-ignore-maxmemory yes +# Redis reclaims expired keys in two ways: upon access when those keys are +# found to be expired, and also in background, in what is called the +# "active expire key". The key space is slowly and interactively scanned +# looking for expired keys to reclaim, so that it is possible to free memory +# of keys that are expired and will never be accessed again in a short time. +# +# The default effort of the expire cycle will try to avoid having more than +# ten percent of expired keys still in memory, and will try to avoid consuming +# more than 25% of total memory and to add latency to the system. However +# it is possible to increase the expire "effort" that is normally set to +# "1", to a greater value, up to the value "10". At its maximum value the +# system will use more CPU, longer cycles (and technically may introduce +# more latency), and will tollerate less already expired keys still present +# in the system. It's a tradeoff betweeen memory, CPU and latecy. +# +# active-expire-effort 1 + ############################# LAZY FREEING #################################### # Redis has two primitives to delete keys. One is called DEL and is a blocking diff --git a/runtest-moduleapi b/runtest-moduleapi index 3eb6b21b2..32d5c2b8d 100755 --- a/runtest-moduleapi +++ b/runtest-moduleapi @@ -23,4 +23,5 @@ $TCLSH tests/test_helper.tcl \ --single unit/moduleapi/misc \ --single unit/moduleapi/blockonkeys \ --single unit/moduleapi/scan \ +--single unit/moduleapi/datatype \ "${@}" diff --git a/src/config.c b/src/config.c index 505dabc9c..de8e8d8cf 100644 --- a/src/config.c +++ b/src/config.c @@ -256,7 +256,7 @@ void loadServerConfigFromString(char *config) { for (configYesNo *config = configs_yesno; config->name != NULL; config++) { if ((!strcasecmp(argv[0],config->name) || (config->alias && !strcasecmp(argv[0],config->alias))) && - (argc == 2)) + (argc == 2)) { if ((*(config->config) = yesnotoi(argv[1])) == -1) { err = "argument must be 'yes' or 'no'"; goto loaderr; @@ -580,6 +580,14 @@ void loadServerConfigFromString(char *config) { err = "active-defrag-max-scan-fields must be positive"; goto loaderr; } + } else if (!strcasecmp(argv[0],"active-expire-effort") && argc == 2) { + server.active_expire_effort = atoi(argv[1]); + if (server.active_expire_effort < 1 || + server.active_expire_effort > 10) + { + err = "active-expire-effort must be between 1 and 10"; + goto loaderr; + } } else if (!strcasecmp(argv[0],"hash-max-ziplist-entries") && argc == 2) { server.hash_max_ziplist_entries = memtoll(argv[1], NULL); } else if (!strcasecmp(argv[0],"hash-max-ziplist-value") && argc == 2) { @@ -1165,6 +1173,8 @@ void configSetCommand(client *c) { "active-defrag-cycle-max",server.active_defrag_cycle_max,1,99) { } config_set_numerical_field( "active-defrag-max-scan-fields",server.active_defrag_max_scan_fields,1,LONG_MAX) { + } config_set_numerical_field( + "active-expire-effort",server.active_expire_effort,1,10) { } config_set_numerical_field( "auto-aof-rewrite-percentage",server.aof_rewrite_perc,0,INT_MAX){ } config_set_numerical_field( @@ -1478,6 +1488,7 @@ void configGetCommand(client *c) { config_get_numerical_field("active-defrag-cycle-min",server.active_defrag_cycle_min); config_get_numerical_field("active-defrag-cycle-max",server.active_defrag_cycle_max); config_get_numerical_field("active-defrag-max-scan-fields",server.active_defrag_max_scan_fields); + config_get_numerical_field("active-expire-effort",server.active_expire_effort); config_get_numerical_field("auto-aof-rewrite-percentage", server.aof_rewrite_perc); config_get_numerical_field("auto-aof-rewrite-min-size", @@ -2327,6 +2338,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"active-defrag-cycle-min",server.active_defrag_cycle_min,CONFIG_DEFAULT_DEFRAG_CYCLE_MIN); rewriteConfigNumericalOption(state,"active-defrag-cycle-max",server.active_defrag_cycle_max,CONFIG_DEFAULT_DEFRAG_CYCLE_MAX); rewriteConfigNumericalOption(state,"active-defrag-max-scan-fields",server.active_defrag_max_scan_fields,CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS); + rewriteConfigNumericalOption(state,"active-expire-effort",server.active_expire_effort,CONFIG_DEFAULT_ACTIVE_EXPIRE_EFFORT); rewriteConfigYesNoOption(state,"appendonly",server.aof_enabled,0); rewriteConfigStringOption(state,"appendfilename",server.aof_filename,CONFIG_DEFAULT_AOF_FILENAME); rewriteConfigEnumOption(state,"appendfsync",server.aof_fsync,aof_fsync_enum,CONFIG_DEFAULT_AOF_FSYNC); diff --git a/src/db.c b/src/db.c index 8a7ea98ae..e47a4a681 100644 --- a/src/db.c +++ b/src/db.c @@ -1077,10 +1077,12 @@ int dbSwapDatabases(long id1, long id2) { db1->dict = db2->dict; db1->expires = db2->expires; db1->avg_ttl = db2->avg_ttl; + db1->expires_cursor = db2->expires_cursor; db2->dict = aux.dict; db2->expires = aux.expires; db2->avg_ttl = aux.avg_ttl; + db2->expires_cursor = aux.expires_cursor; /* Now we need to handle clients blocked on lists: as an effect * of swapping the two DBs, a client that was waiting for list diff --git a/src/expire.c b/src/expire.c index 598b27f96..b4ab9ab18 100644 --- a/src/expire.c +++ b/src/expire.c @@ -78,24 +78,63 @@ int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) { * it will get more aggressive to avoid that too much memory is used by * keys that can be removed from the keyspace. * - * No more than CRON_DBS_PER_CALL databases are tested at every - * iteration. + * Every expire cycle tests multiple databases: the next call will start + * again from the next db, with the exception of exists for time limit: in that + * case we restart again from the last database we were processing. Anyway + * no more than CRON_DBS_PER_CALL databases are tested at every iteration. * - * This kind of call is used when Redis detects that timelimit_exit is - * true, so there is more work to do, and we do it more incrementally from - * the beforeSleep() function of the event loop. + * The function can perform more or less work, depending on the "type" + * argument. It can execute a "fast cycle" or a "slow cycle". The slow + * cycle is the main way we collect expired cycles: this happens with + * the "server.hz" frequency (usually 10 hertz). * - * Expire cycle type: + * However the slow cycle can exit for timeout, since it used too much time. + * For this reason the function is also invoked to perform a fast cycle + * at every event loop cycle, in the beforeSleep() function. The fast cycle + * will try to perform less work, but will do it much more often. + * + * The following are the details of the two expire cycles and their stop + * conditions: * * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION * microseconds, and is not repeated again before the same amount of time. + * The cycle will also refuse to run at all if the latest slow cycle did not + * terminate because of a time limit condition. * * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is * executed, where the time limit is a percentage of the REDIS_HZ period - * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. */ + * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. In the + * fast cycle, the check of every database is interrupted once the number + * of already expired keys in the database is estimated to be lower than + * a given percentage, in order to avoid doing too much work to gain too + * little memory. + * + * The configured expire "effort" will modify the baseline parameters in + * order to do more work in both the fast and slow expire cycles. + */ + +#define ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP 20 /* Keys for each DB loop. */ +#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */ +#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Max % of CPU to use. */ +#define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 /* % of stale keys after which + we do extra efforts. */ void activeExpireCycle(int type) { + /* Adjust the running parameters according to the configured expire + * effort. The default effort is 1, and the maximum configurable effort + * is 10. */ + unsigned long + effort = server.active_expire_effort-1, /* Rescale from 0 to 9. */ + config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP + + ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4*effort, + config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION + + ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort, + config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC + + 2*effort, + config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE- + effort; + /* This function has some global state in order to continue the work * incrementally across calls. */ static unsigned int current_db = 0; /* Last DB tested. */ @@ -113,10 +152,16 @@ void activeExpireCycle(int type) { if (type == ACTIVE_EXPIRE_CYCLE_FAST) { /* Don't start a fast cycle if the previous cycle did not exit - * for time limit. Also don't repeat a fast cycle for the same period + * for time limit, unless the percentage of estimated stale keys is + * too high. Also never repeat a fast cycle for the same period * as the fast cycle total duration itself. */ - if (!timelimit_exit) return; - if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return; + if (!timelimit_exit && + server.stat_expired_stale_perc < config_cycle_acceptable_stale) + return; + + if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2) + return; + last_fast_cycle = start; } @@ -130,16 +175,16 @@ void activeExpireCycle(int type) { if (dbs_per_call > server.dbnum || timelimit_exit) dbs_per_call = server.dbnum; - /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time - * per iteration. Since this function gets called with a frequency of + /* We can use at max 'config_cycle_slow_time_perc' percentage of CPU + * time per iteration. Since this function gets called with a frequency of * server.hz times per second, the following is the max amount of * microseconds we can spend in this function. */ - timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/server.hz/100; + timelimit = config_cycle_slow_time_perc*1000000/server.hz/100; timelimit_exit = 0; if (timelimit <= 0) timelimit = 1; if (type == ACTIVE_EXPIRE_CYCLE_FAST) - timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */ + timelimit = config_cycle_fast_duration; /* in microseconds. */ /* Accumulate some global stats as we expire keys, to have some idea * about the number of keys that are already logically expired, but still @@ -148,7 +193,9 @@ void activeExpireCycle(int type) { long total_expired = 0; for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) { - int expired; + /* Expired and checked in a single loop. */ + unsigned long expired, sampled; + redisDb *db = server.db+(current_db % server.dbnum); /* Increment the DB now so we are sure if we run out of time @@ -172,8 +219,8 @@ void activeExpireCycle(int type) { slots = dictSlots(db->expires); now = mstime(); - /* When there are less than 1% filled slots getting random - * keys is expensive, so stop here waiting for better times... + /* When there are less than 1% filled slots, sampling the key + * space is expensive, so stop here waiting for better times... * The dictionary will be resized asap. */ if (num && slots > DICT_HT_INITIAL_SIZE && (num*100/slots < 1)) break; @@ -181,27 +228,58 @@ void activeExpireCycle(int type) { /* The main collection cycle. Sample random keys among keys * with an expire set, checking for expired ones. */ expired = 0; + sampled = 0; ttl_sum = 0; ttl_samples = 0; - if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP) - num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP; + if (num > config_keys_per_loop) + num = config_keys_per_loop; - while (num--) { - dictEntry *de; - long long ttl; + /* Here we access the low level representation of the hash table + * for speed concerns: this makes this code coupled with dict.c, + * but it hardly changed in ten years. + * + * Note that certain places of the hash table may be empty, + * so we want also a stop condition about the number of + * buckets that we scanned. However scanning for free buckets + * is very fast: we are in the cache line scanning a sequential + * array of NULL pointers, so we can scan a lot more buckets + * than keys in the same time. */ + long max_buckets = num*20; + long checked_buckets = 0; - if ((de = dictGetRandomKey(db->expires)) == NULL) break; - ttl = dictGetSignedIntegerVal(de)-now; - if (activeExpireCycleTryExpire(db,de,now)) expired++; - if (ttl > 0) { - /* We want the average TTL of keys yet not expired. */ - ttl_sum += ttl; - ttl_samples++; + while (sampled < num && checked_buckets < max_buckets) { + for (int table = 0; table < 2; table++) { + if (table == 1 && !dictIsRehashing(db->expires)) break; + + unsigned long idx = db->expires_cursor; + idx &= db->expires->ht[table].sizemask; + dictEntry *de = db->expires->ht[table].table[idx]; + long long ttl; + + /* Scan the current bucket of the current table. */ + checked_buckets++; + while(de) { + /* Get the next entry now since this entry may get + * deleted. */ + dictEntry *e = de; + de = de->next; + + ttl = dictGetSignedIntegerVal(e)-now; + if (activeExpireCycleTryExpire(db,e,now)) expired++; + if (ttl > 0) { + /* We want the average TTL of keys yet + * not expired. */ + ttl_sum += ttl; + ttl_samples++; + } + sampled++; + } } - total_sampled++; + db->expires_cursor++; } total_expired += expired; + total_sampled += sampled; /* Update the average TTL stats for this database. */ if (ttl_samples) { @@ -225,12 +303,14 @@ void activeExpireCycle(int type) { break; } } - /* We don't repeat the cycle if there are less than 25% of keys - * found expired in the current DB. */ - } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4); + /* We don't repeat the cycle for the current database if there are + * an acceptable amount of stale keys (logically expired but yet + * not reclained). */ + } while ((expired*100/sampled) > config_cycle_acceptable_stale); } elapsed = ustime()-start; + server.stat_expire_cycle_time_used += elapsed; latencyAddSampleIfNeeded("expire-cycle",elapsed/1000); /* Update our estimate of keys existing but yet to be expired. diff --git a/src/mkreleasehdr.sh b/src/mkreleasehdr.sh index e6d558b17..236c26c2b 100755 --- a/src/mkreleasehdr.sh +++ b/src/mkreleasehdr.sh @@ -3,7 +3,7 @@ GIT_SHA1=`(git show-ref --head --hash=8 2> /dev/null || echo 00000000) | head -n GIT_DIRTY=`git diff --no-ext-diff 2> /dev/null | wc -l` BUILD_ID=`uname -n`"-"`date +%s` if [ -n "$SOURCE_DATE_EPOCH" ]; then - BUILD_ID=$(date -u -d "@$SOURCE_DATE_EPOCH" +%s 2>/dev/null || date -u -r "$SOURCE_DATE_EPOCH" +%s 2>/dev/null || date -u %s) + BUILD_ID=$(date -u -d "@$SOURCE_DATE_EPOCH" +%s 2>/dev/null || date -u -r "$SOURCE_DATE_EPOCH" +%s 2>/dev/null || date -u +%s) fi test -f release.h || touch release.h (cat release.h | grep SHA1 | grep $GIT_SHA1) && \ diff --git a/src/module.c b/src/module.c index b92a9e692..e1ec81e54 100644 --- a/src/module.c +++ b/src/module.c @@ -1389,7 +1389,7 @@ int RM_ReplyWithString(RedisModuleCtx *ctx, RedisModuleString *str) { int RM_ReplyWithEmptyString(RedisModuleCtx *ctx) { client *c = moduleGetReplyClient(ctx); if (c == NULL) return REDISMODULE_OK; - addReplyBulkCBuffer(c, "", 0); + addReply(c,shared.emptybulk); return REDISMODULE_OK; } @@ -1404,8 +1404,7 @@ int RM_ReplyWithVerbatimString(RedisModuleCtx *ctx, const char *buf, size_t len) return REDISMODULE_OK; } -/* Reply to the client with a NULL. In the RESP protocol a NULL is encoded - * as the string "$-1\r\n". +/* Reply to the client with a NULL. * * The function always returns REDISMODULE_OK. */ int RM_ReplyWithNull(RedisModuleCtx *ctx) { @@ -1749,6 +1748,8 @@ int RM_GetSelectedDb(RedisModuleCtx *ctx) { * * REDISMODULE_CTX_FLAGS_OOM_WARNING: Less than 25% of memory remains before * reaching the maxmemory level. * + * * REDISMODULE_CTX_FLAGS_LOADING: Server is loading RDB/AOF + * * * REDISMODULE_CTX_FLAGS_REPLICA_IS_STALE: No active link with the master. * * * REDISMODULE_CTX_FLAGS_REPLICA_IS_CONNECTING: The replica is trying to @@ -1960,7 +1961,7 @@ int RM_DeleteKey(RedisModuleKey *key) { return REDISMODULE_OK; } -/* If the key is open for writing, unlink it (that is delete it in a +/* If the key is open for writing, unlink it (that is delete it in a * non-blocking way, not reclaiming memory immediately) and setup the key to * accept new writes as an empty key (that will be created on demand). * On success REDISMODULE_OK is returned. If the key is not open for @@ -3894,6 +3895,59 @@ void RM_DigestEndSequence(RedisModuleDigest *md) { memset(md->o,0,sizeof(md->o)); } +/* Decode a serialized representation of a module data type 'mt' from string + * 'str' and return a newly allocated value, or NULL if decoding failed. + * + * This call basically reuses the 'rdb_load' callback which module data types + * implement in order to allow a module to arbitrarily serialize/de-serialize + * keys, similar to how the Redis 'DUMP' and 'RESTORE' commands are implemented. + * + * Modules should generally use the REDISMODULE_OPTIONS_HANDLE_IO_ERRORS flag and + * make sure the de-serialization code properly checks and handles IO errors + * (freeing allocated buffers and returning a NULL). + * + * If this is NOT done, Redis will handle corrupted (or just truncated) serialized + * data by producing an error message and terminating the process. + */ + +void *RM_LoadDataTypeFromString(const RedisModuleString *str, const moduleType *mt) { + rio payload; + RedisModuleIO io; + + rioInitWithBuffer(&payload, str->ptr); + moduleInitIOContext(io,(moduleType *)mt,&payload,NULL); + + /* All RM_Save*() calls always write a version 2 compatible format, so we + * need to make sure we read the same. + */ + io.ver = 2; + return mt->rdb_load(&io,0); +} + +/* Encode a module data type 'mt' value 'data' into serialized form, and return it + * as a newly allocated RedisModuleString. + * + * This call basically reuses the 'rdb_save' callback which module data types + * implement in order to allow a module to arbitrarily serialize/de-serialize + * keys, similar to how the Redis 'DUMP' and 'RESTORE' commands are implemented. + */ + +RedisModuleString *RM_SaveDataTypeToString(RedisModuleCtx *ctx, void *data, const moduleType *mt) { + rio payload; + RedisModuleIO io; + + rioInitWithBuffer(&payload,sdsempty()); + moduleInitIOContext(io,(moduleType *)mt,&payload,NULL); + mt->rdb_save(&io,data); + if (io.error) { + return NULL; + } else { + robj *str = createObject(OBJ_STRING,payload.io.buffer.ptr); + autoMemoryAdd(ctx,REDISMODULE_AM_STRING,str); + return str; + } +} + /* -------------------------------------------------------------------------- * AOF API for modules data types * -------------------------------------------------------------------------- */ @@ -7011,6 +7065,32 @@ int RM_GetLRUOrLFU(RedisModuleKey *key, long long *lfu_freq, long long *lru_idle return REDISMODULE_OK; } +/* Replace the value assigned to a module type. + * + * The key must be open for writing, have an existing value, and have a moduleType + * that matches the one specified by the caller. + * + * Unlike RM_ModuleTypeSetValue() which will free the old value, this function + * simply swaps the old value with the new value. + * + * The function returns the old value, or NULL if any of the above conditions is + * not met. + */ +void *RM_ModuleTypeReplaceValue(RedisModuleKey *key, moduleType *mt, void *new_value) { + if (!(key->mode & REDISMODULE_WRITE) || key->iter) + return NULL; + if (!key->value || key->value->type != OBJ_MODULE) + return NULL; + + moduleValue *mv = key->value->ptr; + if (mv->type != mt) + return NULL; + + void *old_val = mv->value; + mv->value = new_value; + return old_val; +} + /* Register all the APIs we export. Keep this function at the end of the * file so that's easy to seek it to add new entries. */ void moduleRegisterCoreAPI(void) { @@ -7100,6 +7180,7 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(PoolAlloc); REGISTER_API(CreateDataType); REGISTER_API(ModuleTypeSetValue); + REGISTER_API(ModuleTypeReplaceValue); REGISTER_API(ModuleTypeGetType); REGISTER_API(ModuleTypeGetValue); REGISTER_API(IsIOError); @@ -7119,6 +7200,8 @@ void moduleRegisterCoreAPI(void) { REGISTER_API(LoadFloat); REGISTER_API(SaveLongDouble); REGISTER_API(LoadLongDouble); + REGISTER_API(SaveDataTypeToString); + REGISTER_API(LoadDataTypeFromString); REGISTER_API(EmitAOF); REGISTER_API(Log); REGISTER_API(LogIOError); diff --git a/src/rax.c b/src/rax.c index be474b058..29b74ae90 100644 --- a/src/rax.c +++ b/src/rax.c @@ -1673,6 +1673,7 @@ int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len) { * node, but will be our match, representing the key "f". * * So in that case, we don't seek backward. */ + it->data = raxGetData(it->node); } else { if (gt && !raxIteratorNextStep(it,0)) return 0; if (lt && !raxIteratorPrevStep(it,0)) return 0; @@ -1791,7 +1792,7 @@ int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key if (eq && key_len == iter->key_len) return 1; else if (lt) return iter->key_len < key_len; else if (gt) return iter->key_len > key_len; - return 0; + else return 0; /* Avoid warning, just 'eq' is handled before. */ } else if (cmp > 0) { return gt ? 1 : 0; } else /* (cmp < 0) */ { diff --git a/src/redismodule.h b/src/redismodule.h index 07e1452e1..64be5a32b 100644 --- a/src/redismodule.h +++ b/src/redismodule.h @@ -520,6 +520,7 @@ int REDISMODULE_API_FUNC(RedisModule_GetContextFlags)(RedisModuleCtx *ctx); void *REDISMODULE_API_FUNC(RedisModule_PoolAlloc)(RedisModuleCtx *ctx, size_t bytes); RedisModuleType *REDISMODULE_API_FUNC(RedisModule_CreateDataType)(RedisModuleCtx *ctx, const char *name, int encver, RedisModuleTypeMethods *typemethods); int REDISMODULE_API_FUNC(RedisModule_ModuleTypeSetValue)(RedisModuleKey *key, RedisModuleType *mt, void *value); +void *REDISMODULE_API_FUNC(RedisModule_ModuleTypeReplaceValue)(RedisModuleKey *key, RedisModuleType *mt, void *new_value); RedisModuleType *REDISMODULE_API_FUNC(RedisModule_ModuleTypeGetType)(RedisModuleKey *key); void *REDISMODULE_API_FUNC(RedisModule_ModuleTypeGetValue)(RedisModuleKey *key); int REDISMODULE_API_FUNC(RedisModule_IsIOError)(RedisModuleIO *io); @@ -540,6 +541,8 @@ void REDISMODULE_API_FUNC(RedisModule_SaveFloat)(RedisModuleIO *io, float value) float REDISMODULE_API_FUNC(RedisModule_LoadFloat)(RedisModuleIO *io); void REDISMODULE_API_FUNC(RedisModule_SaveLongDouble)(RedisModuleIO *io, long double value); long double REDISMODULE_API_FUNC(RedisModule_LoadLongDouble)(RedisModuleIO *io); +void *REDISMODULE_API_FUNC(RedisModule_LoadDataTypeFromString)(const RedisModuleString *str, const RedisModuleType *mt); +RedisModuleString *REDISMODULE_API_FUNC(RedisModule_SaveDataTypeToString)(RedisModuleCtx *ctx, void *data, const RedisModuleType *mt); void REDISMODULE_API_FUNC(RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...); void REDISMODULE_API_FUNC(RedisModule_LogIOError)(RedisModuleIO *io, const char *levelstr, const char *fmt, ...); void REDISMODULE_API_FUNC(RedisModule__Assert)(const char *estr, const char *file, int line); @@ -734,6 +737,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(PoolAlloc); REDISMODULE_GET_API(CreateDataType); REDISMODULE_GET_API(ModuleTypeSetValue); + REDISMODULE_GET_API(ModuleTypeReplaceValue); REDISMODULE_GET_API(ModuleTypeGetType); REDISMODULE_GET_API(ModuleTypeGetValue); REDISMODULE_GET_API(IsIOError); @@ -753,6 +757,8 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int REDISMODULE_GET_API(LoadFloat); REDISMODULE_GET_API(SaveLongDouble); REDISMODULE_GET_API(LoadLongDouble); + REDISMODULE_GET_API(SaveDataTypeToString); + REDISMODULE_GET_API(LoadDataTypeFromString); REDISMODULE_GET_API(EmitAOF); REDISMODULE_GET_API(Log); REDISMODULE_GET_API(LogIOError); diff --git a/src/server.c b/src/server.c index 113d92cbb..c811b869d 100644 --- a/src/server.c +++ b/src/server.c @@ -2294,6 +2294,7 @@ void initServerConfig(void) { server.maxidletime = CONFIG_DEFAULT_CLIENT_TIMEOUT; server.tcpkeepalive = CONFIG_DEFAULT_TCP_KEEPALIVE; server.active_expire_enabled = 1; + server.active_expire_effort = CONFIG_DEFAULT_ACTIVE_EXPIRE_EFFORT; server.jemalloc_bg_thread = 1; server.active_defrag_enabled = CONFIG_DEFAULT_ACTIVE_DEFRAG; server.active_defrag_ignore_bytes = CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES; @@ -2745,6 +2746,7 @@ void resetServerStats(void) { server.stat_expiredkeys = 0; server.stat_expired_stale_perc = 0; server.stat_expired_time_cap_reached_count = 0; + server.stat_expire_cycle_time_used = 0; server.stat_evictedkeys = 0; server.stat_keyspace_misses = 0; server.stat_keyspace_hits = 0; @@ -2848,6 +2850,7 @@ void initServer(void) { for (j = 0; j < server.dbnum; j++) { server.db[j].dict = dictCreate(&dbDictType,NULL); server.db[j].expires = dictCreate(&keyptrDictType,NULL); + server.db[j].expires_cursor = 0; server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL); server.db[j].ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL); server.db[j].watched_keys = dictCreate(&keylistDictType,NULL); @@ -4268,6 +4271,7 @@ sds genRedisInfoString(char *section) { "expired_keys:%lld\r\n" "expired_stale_perc:%.2f\r\n" "expired_time_cap_reached_count:%lld\r\n" + "expire_cycle_cpu_milliseconds:%lld\r\n" "evicted_keys:%lld\r\n" "keyspace_hits:%lld\r\n" "keyspace_misses:%lld\r\n" @@ -4295,6 +4299,7 @@ sds genRedisInfoString(char *section) { server.stat_expiredkeys, server.stat_expired_stale_perc*100, server.stat_expired_time_cap_reached_count, + server.stat_expire_cycle_time_used/1000, server.stat_evictedkeys, server.stat_keyspace_hits, server.stat_keyspace_misses, diff --git a/src/server.h b/src/server.h index a2c94a22a..51dce955d 100644 --- a/src/server.h +++ b/src/server.h @@ -179,10 +179,8 @@ typedef long long ustime_t; /* microsecond time type. */ #define CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS 1000 /* keys with more than 1000 fields will be processed separately */ #define CONFIG_DEFAULT_PROTO_MAX_BULK_LEN (512ll*1024*1024) /* Bulk request max size */ #define CONFIG_DEFAULT_TRACKING_TABLE_MAX_FILL 10 /* 10% tracking table max fill. */ +#define CONFIG_DEFAULT_ACTIVE_EXPIRE_EFFORT 1 /* From 1 to 10. */ -#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */ -#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */ -#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* CPU max % for keys collection */ #define ACTIVE_EXPIRE_CYCLE_SLOW 0 #define ACTIVE_EXPIRE_CYCLE_FAST 1 @@ -721,6 +719,7 @@ typedef struct redisDb { dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */ int id; /* Database ID */ long long avg_ttl; /* Average TTL, just for stats */ + unsigned long expires_cursor; /* Cursor of the active expire cycle. */ list *defrag_later; /* List of key names to attempt to defrag one by one, gradually. */ } redisDb; @@ -1167,6 +1166,7 @@ struct redisServer { long long stat_expiredkeys; /* Number of expired keys */ double stat_expired_stale_perc; /* Percentage of keys probably expired */ long long stat_expired_time_cap_reached_count; /* Early expire cylce stops.*/ + long long stat_expire_cycle_time_used; /* Cumulative microseconds used. */ long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */ long long stat_keyspace_hits; /* Number of successful lookups of keys */ long long stat_keyspace_misses; /* Number of failed lookups of keys */ @@ -1205,6 +1205,7 @@ struct redisServer { int maxidletime; /* Client timeout in seconds */ int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */ int active_expire_enabled; /* Can be disabled for testing purposes. */ + int active_expire_effort; /* From 1 (default) to 10, active effort. */ int active_defrag_enabled; int jemalloc_bg_thread; /* Enable jemalloc background thread */ size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */ diff --git a/src/t_stream.c b/src/t_stream.c index 58b59f521..9bf87831f 100644 --- a/src/t_stream.c +++ b/src/t_stream.c @@ -1220,6 +1220,14 @@ void xaddCommand(client *c) { return; } + /* Return ASAP if minimal ID (0-0) was given so we avoid possibly creating + * a new stream and have streamAppendItem fail, leaving an empty key in the + * database. */ + if (id_given && id.ms == 0 && id.seq == 0) { + addReplyError(c,"The ID specified in XADD must be greater than 0-0"); + return; + } + /* Lookup the stream at key. */ robj *o; stream *s; diff --git a/tests/modules/Makefile b/tests/modules/Makefile index 07c3cb829..f33f9e80e 100644 --- a/tests/modules/Makefile +++ b/tests/modules/Makefile @@ -20,7 +20,8 @@ TEST_MODULES = \ misc.so \ hooks.so \ blockonkeys.so \ - scan.so + scan.so \ + datatype.so .PHONY: all diff --git a/tests/modules/datatype.c b/tests/modules/datatype.c new file mode 100644 index 000000000..7c39ab457 --- /dev/null +++ b/tests/modules/datatype.c @@ -0,0 +1,161 @@ +/* This module current tests a small subset but should be extended in the future + * for general ModuleDataType coverage. + */ + +#include "redismodule.h" + +static RedisModuleType *datatype = NULL; + +typedef struct { + long long intval; + RedisModuleString *strval; +} DataType; + +static void *datatype_load(RedisModuleIO *io, int encver) { + (void) encver; + + int intval = RedisModule_LoadSigned(io); + if (RedisModule_IsIOError(io)) return NULL; + + RedisModuleString *strval = RedisModule_LoadString(io); + if (RedisModule_IsIOError(io)) return NULL; + + DataType *dt = (DataType *) RedisModule_Alloc(sizeof(DataType)); + dt->intval = intval; + dt->strval = strval; + return dt; +} + +static void datatype_save(RedisModuleIO *io, void *value) { + DataType *dt = (DataType *) value; + RedisModule_SaveSigned(io, dt->intval); + RedisModule_SaveString(io, dt->strval); +} + +static void datatype_free(void *value) { + if (value) { + DataType *dt = (DataType *) value; + + if (dt->strval) RedisModule_FreeString(NULL, dt->strval); + RedisModule_Free(dt); + } +} + +static int datatype_set(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 4) { + RedisModule_WrongArity(ctx); + return REDISMODULE_OK; + } + + long long intval; + + if (RedisModule_StringToLongLong(argv[2], &intval) != REDISMODULE_OK) { + RedisModule_ReplyWithError(ctx, "Invalid integr value"); + return REDISMODULE_OK; + } + + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE); + DataType *dt = RedisModule_Calloc(sizeof(DataType), 1); + dt->intval = intval; + dt->strval = argv[3]; + RedisModule_RetainString(ctx, dt->strval); + + RedisModule_ModuleTypeSetValue(key, datatype, dt); + RedisModule_CloseKey(key); + RedisModule_ReplyWithSimpleString(ctx, "OK"); + + return REDISMODULE_OK; +} + +static int datatype_restore(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 3) { + RedisModule_WrongArity(ctx); + return REDISMODULE_OK; + } + + DataType *dt = RedisModule_LoadDataTypeFromString(argv[2], datatype); + if (!dt) { + RedisModule_ReplyWithError(ctx, "Invalid data"); + return REDISMODULE_OK; + } + + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE); + RedisModule_ModuleTypeSetValue(key, datatype, dt); + RedisModule_CloseKey(key); + RedisModule_ReplyWithSimpleString(ctx, "OK"); + + return REDISMODULE_OK; +} + +static int datatype_get(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 2) { + RedisModule_WrongArity(ctx); + return REDISMODULE_OK; + } + + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_READ); + DataType *dt = RedisModule_ModuleTypeGetValue(key); + RedisModule_CloseKey(key); + + RedisModule_ReplyWithArray(ctx, 2); + RedisModule_ReplyWithLongLong(ctx, dt->intval); + RedisModule_ReplyWithString(ctx, dt->strval); + return REDISMODULE_OK; +} + +static int datatype_dump(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (argc != 2) { + RedisModule_WrongArity(ctx); + return REDISMODULE_OK; + } + + RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_READ); + DataType *dt = RedisModule_ModuleTypeGetValue(key); + RedisModule_CloseKey(key); + + RedisModuleString *reply = RedisModule_SaveDataTypeToString(ctx, dt, datatype); + if (!reply) { + RedisModule_ReplyWithError(ctx, "Failed to save"); + return REDISMODULE_OK; + } + + RedisModule_ReplyWithString(ctx, reply); + RedisModule_FreeString(ctx, reply); + return REDISMODULE_OK; +} + + +int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + REDISMODULE_NOT_USED(argv); + REDISMODULE_NOT_USED(argc); + + if (RedisModule_Init(ctx,"datatype",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + RedisModule_SetModuleOptions(ctx, REDISMODULE_OPTIONS_HANDLE_IO_ERRORS); + + RedisModuleTypeMethods datatype_methods = { + .version = REDISMODULE_TYPE_METHOD_VERSION, + .rdb_load = datatype_load, + .rdb_save = datatype_save, + .free = datatype_free, + }; + + datatype = RedisModule_CreateDataType(ctx, "test___dt", 1, &datatype_methods); + if (datatype == NULL) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"datatype.set", datatype_set,"deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"datatype.get", datatype_get,"",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"datatype.restore", datatype_restore,"deny-oom",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_CreateCommand(ctx,"datatype.dump", datatype_dump,"",1,1,1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + return REDISMODULE_OK; +} diff --git a/tests/unit/moduleapi/datatype.tcl b/tests/unit/moduleapi/datatype.tcl new file mode 100644 index 000000000..c1da696d3 --- /dev/null +++ b/tests/unit/moduleapi/datatype.tcl @@ -0,0 +1,27 @@ +set testmodule [file normalize tests/modules/datatype.so] + +start_server {tags {"modules"}} { + r module load $testmodule + + test {DataType: Test module is sane, GET/SET work.} { + r datatype.set dtkey 100 stringval + assert {[r datatype.get dtkey] eq {100 stringval}} + } + + test {DataType: RM_SaveDataTypeToString(), RM_LoadDataTypeFromString() work} { + r datatype.set dtkey -1111 MyString + set encoded [r datatype.dump dtkey] + + r datatype.restore dtkeycopy $encoded + assert {[r datatype.get dtkeycopy] eq {-1111 MyString}} + } + + test {DataType: Handle truncated RM_LoadDataTypeFromString()} { + r datatype.set dtkey -1111 MyString + set encoded [r datatype.dump dtkey] + set truncated [string range $encoded 0 end-1] + + catch {r datatype.restore dtkeycopy $truncated} e + set e + } {*Invalid*} +} diff --git a/tests/unit/type/stream.tcl b/tests/unit/type/stream.tcl index aa9c5f3a9..a4431c654 100644 --- a/tests/unit/type/stream.tcl +++ b/tests/unit/type/stream.tcl @@ -123,6 +123,12 @@ start_server { assert {[r xlen mystream] == $j} } + test {XADD with ID 0-0} { + r DEL mystream + catch {r XADD mystream 0-0 k v} err + assert {[r EXISTS mystream] == 0} + } + test {XRANGE COUNT works as expected} { assert {[llength [r xrange mystream - + COUNT 10]] == 10} }