diff --git a/src/dict.c b/src/dict.c index 328c2dc81..1b7b2138f 100644 --- a/src/dict.c +++ b/src/dict.c @@ -78,7 +78,8 @@ typedef struct { /* -------------------------- private prototypes ---------------------------- */ -static int _dictExpandIfNeeded(dict *d); +static void _dictExpandIfNeeded(dict *d); +static void _dictShrinkIfNeeded(dict *d); static signed char _dictNextExp(unsigned long size); static int _dictInit(dict *d, dictType *type); static dictEntry *dictGetNext(const dictEntry *de); @@ -208,12 +209,13 @@ int _dictInit(dict *d, dictType *type) d->type = type; d->rehashidx = -1; d->pauserehash = 0; + d->pauseAutoResize = 0; return DICT_OK; } /* Resize the table to the minimal size that contains all the elements, * but with the invariant of a USED/BUCKETS ratio near to <= 1 */ -int dictResize(dict *d) +int dictShrinkToFit(dict *d) { unsigned long minimal; @@ -221,20 +223,18 @@ int dictResize(dict *d) minimal = d->ht_used[0]; if (minimal < DICT_HT_INITIAL_SIZE) minimal = DICT_HT_INITIAL_SIZE; - return dictExpand(d, minimal); + return dictShrink(d, minimal); } -/* Expand or create the hash table, +/* Resize or create the hash table, * when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1). - * Returns DICT_OK if expand was performed, and DICT_ERR if skipped. */ -int _dictExpand(dict *d, unsigned long size, int* malloc_failed) + * Returns DICT_OK if resize was performed, and DICT_ERR if skipped. */ +int _dictResize(dict *d, unsigned long size, int* malloc_failed) { if (malloc_failed) *malloc_failed = 0; - /* the size is invalid if it is smaller than the number of - * elements already inside the hash table */ - if (dictIsRehashing(d) || d->ht_used[0] > size) - return DICT_ERR; + /* We can't rehash twice if rehashing is ongoing. */ + assert(!dictIsRehashing(d)); /* the new hash table */ dictEntry **new_ht_table; @@ -286,6 +286,14 @@ int _dictExpand(dict *d, unsigned long size, int* malloc_failed) return DICT_OK; } +int _dictExpand(dict *d, unsigned long size, int* malloc_failed) { + /* the size is invalid if it is smaller than the size of the hash table + * or smaller than the number of elements already inside the hash table */ + if (dictIsRehashing(d) || d->ht_used[0] > size || DICTHT_SIZE(d->ht_size_exp[0]) >= size) + return DICT_ERR; + return _dictResize(d, size, malloc_failed); +} + /* return DICT_ERR if expand was not performed */ int dictExpand(dict *d, unsigned long size) { return _dictExpand(d, size, NULL); @@ -293,11 +301,20 @@ int dictExpand(dict *d, unsigned long size) { /* return DICT_ERR if expand failed due to memory allocation failure */ int dictTryExpand(dict *d, unsigned long size) { - int malloc_failed; + int malloc_failed = 0; _dictExpand(d, size, &malloc_failed); return malloc_failed? DICT_ERR : DICT_OK; } +/* return DICT_ERR if shrink was not performed */ +int dictShrink(dict *d, unsigned long size) { + /* the size is invalid if it is bigger than the size of the hash table + * or smaller than the number of elements already inside the hash table */ + if (dictIsRehashing(d) || d->ht_used[0] > size || DICTHT_SIZE(d->ht_size_exp[0]) <= size) + return DICT_ERR; + return _dictResize(d, size, NULL); +} + /* Performs N steps of incremental rehashing. Returns 1 if there are still * keys to move from the old to the new hash table, otherwise 0 is returned. * @@ -588,6 +605,7 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) { dictFreeUnlinkedEntry(d, he); } d->ht_used[table]--; + _dictShrinkIfNeeded(d); return he; } prevHe = he; @@ -752,6 +770,7 @@ void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table dictFreeKey(d, he); dictFreeVal(d, he); if (!entryIsKey(he)) zfree(decodeMaskedPtr(he)); + _dictShrinkIfNeeded(d); dictResumeRehashing(d); } @@ -1401,21 +1420,27 @@ unsigned long dictScanDefrag(dict *d, /* Because we may need to allocate huge memory chunk at once when dict * expands, we will check this allocation is allowed or not if the dict * type has expandAllowed member function. */ -static int dictTypeExpandAllowed(dict *d) { - if (d->type->expandAllowed == NULL) return 1; - return d->type->expandAllowed( +static int dictTypeResizeAllowed(dict *d) { + if (d->type->resizeAllowed == NULL) return 1; + return d->type->resizeAllowed( DICTHT_SIZE(_dictNextExp(d->ht_used[0] + 1)) * sizeof(dictEntry*), (double)d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0])); } /* Expand the hash table if needed */ -static int _dictExpandIfNeeded(dict *d) +static void _dictExpandIfNeeded(dict *d) { + /* Automatic resizing is disallowed. Return */ + if (d->pauseAutoResize > 0) return; + /* Incremental rehashing already in progress. Return. */ - if (dictIsRehashing(d)) return DICT_OK; + if (dictIsRehashing(d)) return; /* If the hash table is empty expand it to the initial size. */ - if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE); + if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) { + dictExpand(d, DICT_HT_INITIAL_SIZE); + return; + } /* If we reached the 1:1 ratio, and we are allowed to resize the hash * table (global setting) or we should avoid it but the ratio between @@ -1426,11 +1451,35 @@ static int _dictExpandIfNeeded(dict *d) (dict_can_resize != DICT_RESIZE_FORBID && d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0]) > dict_force_resize_ratio)) { - if (!dictTypeExpandAllowed(d)) - return DICT_OK; - return dictExpand(d, d->ht_used[0] + 1); + if (!dictTypeResizeAllowed(d)) + return; + dictExpand(d, d->ht_used[0] + 1); + } +} + +static void _dictShrinkIfNeeded(dict *d) +{ + /* Automatic resizing is disallowed. Return */ + if (d->pauseAutoResize > 0) return; + + /* Incremental rehashing already in progress. Return. */ + if (dictIsRehashing(d)) return; + + /* If the size of hash table is DICT_HT_INITIAL_SIZE, don't shrink it. */ + if (DICTHT_SIZE(d->ht_size_exp[0]) == DICT_HT_INITIAL_SIZE) return; + + /* If we reached below 1:10 elements/buckets ratio, and we are allowed to resize + * the hash table (global setting) or we should avoid it but the ratio is below 1:50, + * we'll trigger a resize of the hash table. */ + if ((dict_can_resize == DICT_RESIZE_ENABLE && + d->ht_used[0] * 100 / DICTHT_SIZE(d->ht_size_exp[0]) < HASHTABLE_MIN_FILL) || + (dict_can_resize != DICT_RESIZE_FORBID && + d->ht_used[0] * 100 / DICTHT_SIZE(d->ht_size_exp[0]) < HASHTABLE_MIN_FILL / dict_force_resize_ratio)) + { + if (!dictTypeResizeAllowed(d)) + return; + dictShrink(d, d->ht_used[0]); } - return DICT_OK; } /* Our hash table capability is a power of two */ @@ -1454,8 +1503,7 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing) if (dictIsRehashing(d)) _dictRehashStep(d); /* Expand the hash table if needed */ - if (_dictExpandIfNeeded(d) == DICT_ERR) - return NULL; + _dictExpandIfNeeded(d); for (table = 0; table <= 1; table++) { idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]); if (table == 0 && (long)idx < d->rehashidx) continue; @@ -1483,6 +1531,7 @@ void dictEmpty(dict *d, void(callback)(dict*)) { _dictClear(d,1,callback); d->rehashidx = -1; d->pauserehash = 0; + d->pauseAutoResize = 0; } void dictSetResizeEnabled(dictResizeEnable enable) { diff --git a/src/dict.h b/src/dict.h index 3d4de3be2..cebbe1498 100644 --- a/src/dict.h +++ b/src/dict.h @@ -44,6 +44,9 @@ #define DICT_OK 0 #define DICT_ERR 1 +/* Hash table parameters */ +#define HASHTABLE_MIN_FILL 10 /* Minimal hash table fill 10% */ + typedef struct dictEntry dictEntry; /* opaque */ typedef struct dict dict; @@ -54,7 +57,7 @@ typedef struct dictType { int (*keyCompare)(dict *d, const void *key1, const void *key2); void (*keyDestructor)(dict *d, void *key); void (*valDestructor)(dict *d, void *obj); - int (*expandAllowed)(size_t moreMem, double usedRatio); + int (*resizeAllowed)(size_t moreMem, double usedRatio); /* Invoked at the start of dict initialization/rehashing (old and new ht are already created) */ void (*rehashingStarted)(dict *d); /* Invoked at the end of dict initialization/rehashing of all the entries from old to new ht. Both ht still exists @@ -91,6 +94,7 @@ struct dict { /* Keep small vars at end for optimal (minimal) struct padding */ int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */ signed char ht_size_exp[2]; /* exponent of size. (size = 1<0 automatic resizing is disallowed (<0 indicates coding error) */ void *metadata[]; }; @@ -155,6 +159,8 @@ typedef struct { #define dictIsRehashing(d) ((d)->rehashidx != -1) #define dictPauseRehashing(d) ((d)->pauserehash++) #define dictResumeRehashing(d) ((d)->pauserehash--) +#define dictPauseAutoResize(d) ((d)->pauseAutoResize++) +#define dictResumeAutoResize(d) ((d)->pauseAutoResize--) /* If our unsigned long type can store a 64 bit number, use a 64 bit PRNG. */ #if ULONG_MAX >= 0xffffffffffffffff @@ -174,6 +180,7 @@ dict *dictCreate(dictType *type); dict **dictCreateMultiple(dictType *type, int count); int dictExpand(dict *d, unsigned long size); int dictTryExpand(dict *d, unsigned long size); +int dictShrink(dict *d, unsigned long size); int dictAdd(dict *d, void *key, void *val); dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing); void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing); @@ -188,7 +195,7 @@ void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table void dictRelease(dict *d); dictEntry * dictFind(dict *d, const void *key); void *dictFetchValue(dict *d, const void *key); -int dictResize(dict *d); +int dictShrinkToFit(dict *d); void dictSetKey(dict *d, dictEntry* de, void *key); void dictSetVal(dict *d, dictEntry *de, void *val); void dictSetSignedIntegerVal(dictEntry *de, int64_t val); diff --git a/src/server.c b/src/server.c index 280644e7f..30ec199c0 100644 --- a/src/server.c +++ b/src/server.c @@ -428,7 +428,7 @@ uint64_t dictEncObjHash(const void *key) { * provisionally if used memory will be over maxmemory after dict expands, * but to guarantee the performance of redis, we still allow dict to expand * if dict load factor exceeds HASHTABLE_MAX_LOAD_FACTOR. */ -int dictExpandAllowed(size_t moreMem, double usedRatio) { +int dictResizeAllowed(size_t moreMem, double usedRatio) { if (usedRatio <= HASHTABLE_MAX_LOAD_FACTOR) { return !overMaxmemoryAfterAlloc(moreMem); } else { @@ -547,7 +547,7 @@ dictType dbDictType = { dictSdsKeyCompare, /* key compare */ dictSdsDestructor, /* key destructor */ dictObjectDestructor, /* val destructor */ - dictExpandAllowed, /* allow to expand */ + dictResizeAllowed, /* allow to resize */ dbDictRehashingStarted, dbDictRehashingCompleted, dbDictMetadataSize, @@ -561,7 +561,7 @@ dictType dbExpiresDictType = { dictSdsKeyCompare, /* key compare */ NULL, /* key destructor */ NULL, /* val destructor */ - dictExpandAllowed, /* allow to expand */ + dictResizeAllowed, /* allow to resize */ dbExpiresRehashingStarted, dbExpiresRehashingCompleted, dbDictMetadataSize, @@ -693,7 +693,7 @@ dictType clientDictType = { .no_value = 1 /* no values in this dict */ }; -int htNeedsResize(dict *dict) { +int htNeedsShrink(dict *dict) { long long size, used; size = dictBuckets(dict); @@ -718,8 +718,8 @@ void tryResizeHashTables(int dbid) { for (int i = 0; i < CRON_DBS_PER_CALL && db->sub_dict[subdict].resize_cursor != -1; i++) { int slot = db->sub_dict[subdict].resize_cursor; dict *d = (subdict == DB_MAIN ? db->dict[slot] : db->expires[slot]); - if (htNeedsResize(d)) - dictResize(d); + if (htNeedsShrink(d)) + dictShrinkToFit(d); db->sub_dict[subdict].resize_cursor = dbGetNextNonEmptySlot(db, slot, subdict); } } diff --git a/src/server.h b/src/server.h index be33bf803..a913a1c8b 100644 --- a/src/server.h +++ b/src/server.h @@ -198,7 +198,6 @@ struct hdr_histogram; extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT]; /* Hash table parameters */ -#define HASHTABLE_MIN_FILL 10 /* Minimal hash table fill 10% */ #define HASHTABLE_MAX_LOAD_FACTOR 1.618 /* Maximum hash table load factor. */ /* Command flags. Please check the definition of struct redisCommand in this file @@ -3112,7 +3111,7 @@ void serverLogRaw(int level, const char *msg); void serverLogRawFromHandler(int level, const char *msg); void usage(void); void updateDictResizePolicy(void); -int htNeedsResize(dict *dict); +int htNeedsShrink(dict *dict); void populateCommandTable(void); void resetCommandTableStats(dict* commands); void resetErrorTableStats(void); diff --git a/src/t_hash.c b/src/t_hash.c index 9242d27cc..ff8746384 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -292,9 +292,6 @@ int hashTypeDelete(robj *o, sds field) { } else if (o->encoding == OBJ_ENCODING_HT) { if (dictDelete((dict*)o->ptr, field) == C_OK) { deleted = 1; - - /* Always check if the dictionary needs a resize after a delete. */ - if (htNeedsResize(o->ptr)) dictResize(o->ptr); } } else { diff --git a/src/t_set.c b/src/t_set.c index c2729105d..24e7b0e7d 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -256,7 +256,6 @@ int setTypeRemoveAux(robj *setobj, char *str, size_t len, int64_t llval, int str if (setobj->encoding == OBJ_ENCODING_HT) { sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len); int deleted = (dictDelete(setobj->ptr, sdsval) == DICT_OK); - if (deleted && htNeedsResize(setobj->ptr)) dictResize(setobj->ptr); if (sdsval != str) sdsfree(sdsval); /* free temp copy */ return deleted; } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) { diff --git a/src/t_zset.c b/src/t_zset.c index 762f4aee7..4bff5eb78 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -1596,7 +1596,6 @@ int zsetDel(robj *zobj, sds ele) { } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = zobj->ptr; if (zsetRemoveFromSkiplist(zs, ele)) { - if (htNeedsResize(zs->dict)) dictResize(zs->dict); return 1; } } else { @@ -2011,6 +2010,7 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) { } } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = zobj->ptr; + dictPauseAutoResize(zs->dict); switch(rangetype) { case ZRANGE_AUTO: case ZRANGE_RANK: @@ -2023,7 +2023,8 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) { deleted = zslDeleteRangeByLex(zs->zsl,&lexrange,zs->dict); break; } - if (htNeedsResize(zs->dict)) dictResize(zs->dict); + dictResumeAutoResize(zs->dict); + if (htNeedsShrink(zs->dict)) dictShrinkToFit(zs->dict); if (dictSize(zs->dict) == 0) { dbDelete(c->db,key); keyremoved = 1; @@ -2535,10 +2536,12 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t * dictAdd(dstzset->dict,tmp,&znode->score); cardinality++; } else { + dictPauseAutoResize(dstzset->dict); tmp = zuiSdsFromValue(&zval); if (zsetRemoveFromSkiplist(dstzset, tmp)) { cardinality--; } + dictResumeAutoResize(dstzset->dict); } /* Exit if result set is empty as any additional removal @@ -2551,7 +2554,7 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t * } /* Resize dict if needed after removing multiple elements */ - if (htNeedsResize(dstzset->dict)) dictResize(dstzset->dict); + if (htNeedsShrink(dstzset->dict)) dictShrinkToFit(dstzset->dict); /* Using this algorithm, we can't calculate the max element as we go, * we have to iterate through all elements to find the max one after. */ diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 29275622d..4ffc89281 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -1101,6 +1101,11 @@ foreach type {single multiple single_multiple} { assert_equal [r scard myset] 30 assert {[is_rehashing myset]} + # Wait for the hash set rehashing to finish. + while {[is_rehashing myset]} { + r srandmember myset 10 + } + # Now that we have a hash set with only one long chain bucket. set htstats [r debug HTSTATS-KEY myset full] assert {[regexp {different slots: ([0-9]+)} $htstats - different_slots]}