diff --git a/src/db.c b/src/db.c index d40b1599a..03e5cc5f1 100644 --- a/src/db.c +++ b/src/db.c @@ -692,7 +692,7 @@ long long emptyDbStructure(redisDb *dbarray, int dbnum, int async, for (dbKeyType subdict = DB_MAIN; subdict <= DB_EXPIRES; subdict++) { dbarray[j].sub_dict[subdict].non_empty_slots = 0; dbarray[j].sub_dict[subdict].key_count = 0; - dbarray[j].sub_dict[subdict].resize_cursor = -1; + dbarray[j].sub_dict[subdict].resize_cursor = 0; if (server.cluster_enabled) { dbarray[j].sub_dict[subdict].bucket_count = 0; unsigned long long *slot_size_index = dbarray[j].sub_dict[subdict].slot_size_index; diff --git a/src/dict.c b/src/dict.c index bc9243ce4..03659381c 100644 --- a/src/dict.c +++ b/src/dict.c @@ -216,19 +216,6 @@ int _dictInit(dict *d, dictType *type) return DICT_OK; } -/* Resize the table to the minimal size that contains all the elements, - * but with the invariant of a USED/BUCKETS ratio near to <= 1 */ -int dictShrinkToFit(dict *d) -{ - unsigned long minimal; - - if (dict_can_resize != DICT_RESIZE_ENABLE || dictIsRehashing(d)) return DICT_ERR; - minimal = d->ht_used[0]; - if (minimal < DICT_HT_INITIAL_SIZE) - minimal = DICT_HT_INITIAL_SIZE; - return dictShrink(d, minimal); -} - /* Resize or create the hash table, * when malloc_failed is non-NULL, it'll avoid panic if malloc fails (in which case it'll be set to 1). * Returns DICT_OK if resize was performed, and DICT_ERR if skipped. */ @@ -1484,19 +1471,17 @@ static int dictTypeResizeAllowed(dict *d, size_t size) { (double)d->ht_used[0] / DICTHT_SIZE(d->ht_size_exp[0])); } -/* Expand the hash table if needed */ -static void _dictExpandIfNeeded(dict *d) -{ - /* Automatic resizing is disallowed. Return */ - if (d->pauseAutoResize > 0) return; - +/* Returning DICT_OK indicates a successful expand or the dictionary is undergoing rehashing, + * and there is nothing else we need to do about this dictionary currently. While DICT_ERR indicates + * that expand has not been triggered (may be try shrinking?)*/ +int dictExpandIfNeeded(dict *d) { /* Incremental rehashing already in progress. Return. */ - if (dictIsRehashing(d)) return; + if (dictIsRehashing(d)) return DICT_OK; /* If the hash table is empty expand it to the initial size. */ if (DICTHT_SIZE(d->ht_size_exp[0]) == 0) { dictExpand(d, DICT_HT_INITIAL_SIZE); - return; + return DICT_OK; } /* If we reached the 1:1 ratio, and we are allowed to resize the hash @@ -1508,22 +1493,30 @@ static void _dictExpandIfNeeded(dict *d) (dict_can_resize != DICT_RESIZE_FORBID && d->ht_used[0] >= dict_force_resize_ratio * DICTHT_SIZE(d->ht_size_exp[0]))) { - if (!dictTypeResizeAllowed(d, d->ht_used[0] + 1)) - return; - dictExpand(d, d->ht_used[0] + 1); + if (dictTypeResizeAllowed(d, d->ht_used[0] + 1)) + dictExpand(d, d->ht_used[0] + 1); + return DICT_OK; } + return DICT_ERR; } -static void _dictShrinkIfNeeded(dict *d) -{ +/* Expand the hash table if needed */ +static void _dictExpandIfNeeded(dict *d) { /* Automatic resizing is disallowed. Return */ if (d->pauseAutoResize > 0) return; + dictExpandIfNeeded(d); +} + +/* Returning DICT_OK indicates a successful shrinking or the dictionary is undergoing rehashing, + * and there is nothing else we need to do about this dictionary currently. While DICT_ERR indicates + * that shrinking has not been triggered (may be try expanding?)*/ +int dictShrinkIfNeeded(dict *d) { /* Incremental rehashing already in progress. Return. */ - if (dictIsRehashing(d)) return; + if (dictIsRehashing(d)) return DICT_OK; /* If the size of hash table is DICT_HT_INITIAL_SIZE, don't shrink it. */ - if (DICTHT_SIZE(d->ht_size_exp[0]) == DICT_HT_INITIAL_SIZE) return; + if (DICTHT_SIZE(d->ht_size_exp[0]) <= DICT_HT_INITIAL_SIZE) return DICT_OK; /* If we reached below 1:8 elements/buckets ratio, and we are allowed to resize * the hash table (global setting) or we should avoid it but the ratio is below 1:32, @@ -1533,10 +1526,19 @@ static void _dictShrinkIfNeeded(dict *d) (dict_can_resize != DICT_RESIZE_FORBID && d->ht_used[0] * HASHTABLE_MIN_FILL * dict_force_resize_ratio <= DICTHT_SIZE(d->ht_size_exp[0]))) { - if (!dictTypeResizeAllowed(d, d->ht_used[0])) - return; - dictShrink(d, d->ht_used[0]); + if (dictTypeResizeAllowed(d, d->ht_used[0])) + dictShrink(d, d->ht_used[0]); + return DICT_OK; } + return DICT_ERR; +} + +static void _dictShrinkIfNeeded(dict *d) +{ + /* Automatic resizing is disallowed. Return */ + if (d->pauseAutoResize > 0) return; + + dictShrinkIfNeeded(d); } /* Our hash table capability is a power of two */ diff --git a/src/dict.h b/src/dict.h index 5b0831906..7a7338279 100644 --- a/src/dict.h +++ b/src/dict.h @@ -195,7 +195,8 @@ void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table void dictRelease(dict *d); dictEntry * dictFind(dict *d, const void *key); void *dictFetchValue(dict *d, const void *key); -int dictShrinkToFit(dict *d); +int dictShrinkIfNeeded(dict *d); +int dictExpandIfNeeded(dict *d); void dictSetKey(dict *d, dictEntry* de, void *key); void dictSetVal(dict *d, dictEntry *de, void *val); void dictSetSignedIntegerVal(dictEntry *de, int64_t val); diff --git a/src/server.c b/src/server.c index db4841940..5fd98c893 100644 --- a/src/server.c +++ b/src/server.c @@ -693,34 +693,23 @@ dictType clientDictType = { .no_value = 1 /* no values in this dict */ }; -int htNeedsShrink(dict *dict) { - long long size, used; - - size = dictBuckets(dict); - used = dictSize(dict); - return (size > DICT_HT_INITIAL_SIZE && - (used * HASHTABLE_MIN_FILL <= size)); -} - /* In cluster-enabled setup, this method traverses through all main/expires dictionaries (CLUSTER_SLOTS) * and triggers a resize if the percentage of used buckets in the HT reaches (100 / HASHTABLE_MIN_FILL) - * we resize the hash table to save memory. + * we shrink the hash table to save memory, or expand the hash when the percentage of used buckets reached + * 100. * * In non cluster-enabled setup, it resize main/expires dictionary based on the same condition described above. */ void tryResizeHashTables(int dbid) { redisDb *db = &server.db[dbid]; + int dicts_per_call = min(CRON_DICTS_PER_DB, db->dict_count); for (dbKeyType subdict = DB_MAIN; subdict <= DB_EXPIRES; subdict++) { - if (dbSize(db, subdict) == 0) continue; - - if (db->sub_dict[subdict].resize_cursor == -1) - db->sub_dict[subdict].resize_cursor = findSlotByKeyIndex(db, 1, subdict); - - for (int i = 0; i < CRON_DBS_PER_CALL && db->sub_dict[subdict].resize_cursor != -1; i++) { + for (int i = 0; i < dicts_per_call; i++) { int slot = db->sub_dict[subdict].resize_cursor; dict *d = (subdict == DB_MAIN ? db->dict[slot] : db->expires[slot]); - if (htNeedsShrink(d)) - dictShrinkToFit(d); - db->sub_dict[subdict].resize_cursor = dbGetNextNonEmptySlot(db, slot, subdict); + if (dictShrinkIfNeeded(d) == DICT_ERR) { + dictExpandIfNeeded(d); + } + db->sub_dict[subdict].resize_cursor = (slot + 1) % db->dict_count; } } } @@ -2685,7 +2674,7 @@ void initDbState(redisDb *db){ for (dbKeyType subdict = DB_MAIN; subdict <= DB_EXPIRES; subdict++) { db->sub_dict[subdict].non_empty_slots = 0; db->sub_dict[subdict].key_count = 0; - db->sub_dict[subdict].resize_cursor = -1; + db->sub_dict[subdict].resize_cursor = 0; db->sub_dict[subdict].slot_size_index = server.cluster_enabled ? zcalloc(sizeof(unsigned long long) * (CLUSTER_SLOTS + 1)) : NULL; db->sub_dict[subdict].bucket_count = 0; } diff --git a/src/server.h b/src/server.h index ba55e3dee..1b1e3c627 100644 --- a/src/server.h +++ b/src/server.h @@ -115,6 +115,7 @@ struct hdr_histogram; #define CONFIG_MAX_HZ 500 #define MAX_CLIENTS_PER_CLOCK_TICK 200 /* HZ is adapted based on that. */ #define CRON_DBS_PER_CALL 16 +#define CRON_DICTS_PER_DB 16 #define NET_MAX_WRITES_PER_EVENT (1024*64) #define PROTO_SHARED_SELECT_CMDS 10 #define OBJ_SHARED_INTEGERS 10000 @@ -970,7 +971,7 @@ typedef struct replBufBlock { /* When adding fields, please check the swap db related logic. */ typedef struct dbDictState { - int resize_cursor; /* Cron job uses this cursor to gradually resize dictionaries (only used for cluster-enabled). */ + int resize_cursor; /* Cron job uses this cursor to gradually resize all dictionaries. */ int non_empty_slots; /* The number of non-empty slots. */ unsigned long long key_count; /* Total number of keys in this DB. */ unsigned long long bucket_count; /* Total number of buckets in this DB across dictionaries (only used for cluster-enabled). */ @@ -3111,7 +3112,6 @@ void serverLogRaw(int level, const char *msg); void serverLogRawFromHandler(int level, const char *msg); void usage(void); void updateDictResizePolicy(void); -int htNeedsShrink(dict *dict); void populateCommandTable(void); void resetCommandTableStats(dict* commands); void resetErrorTableStats(void); diff --git a/src/t_zset.c b/src/t_zset.c index 4bff5eb78..eb1477457 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -2024,7 +2024,7 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) { break; } dictResumeAutoResize(zs->dict); - if (htNeedsShrink(zs->dict)) dictShrinkToFit(zs->dict); + dictShrinkIfNeeded(zs->dict); if (dictSize(zs->dict) == 0) { dbDelete(c->db,key); keyremoved = 1; @@ -2554,7 +2554,7 @@ static void zdiffAlgorithm2(zsetopsrc *src, long setnum, zset *dstzset, size_t * } /* Resize dict if needed after removing multiple elements */ - if (htNeedsShrink(dstzset->dict)) dictShrinkToFit(dstzset->dict); + dictShrinkIfNeeded(dstzset->dict); /* Using this algorithm, we can't calculate the max element as we go, * we have to iterate through all elements to find the max one after. */ diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index ab19be19f..18a54d4e8 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -373,12 +373,14 @@ start_server {tags {"other external:skip"}} { assert_no_match "*table size: 8192*" [r debug HTSTATS 9] exec kill -9 [get_child_pid 0] waitForBgsave r - after 200 ;# waiting for serverCron # Hash table should rehash since there is no child process, # size is power of two and over 4098, so it is 8192 - r set k3 v3 - assert_match "*table size: 8192*" [r debug HTSTATS 9] + wait_for_condition 50 100 { + [string match "*table size: 8192*" [r debug HTSTATS 9]] + } else { + fail "hash table did not rehash after child process killed" + } } {} {needs:debug needs:local-process} } @@ -487,3 +489,38 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} { assert_match "*table size: 16*" [r debug HTSTATS 0] } {} {needs:debug} } + +proc get_overhead_hashtable_main {} { + set main 0 + set stats [r memory stats] + set list_stats [split $stats " "] + for {set j 0} {$j < [llength $list_stats]} {incr j} { + if {[string equal -nocase "\{overhead.hashtable.main" [lindex $list_stats $j]]} { + set main [lindex $list_stats [expr $j+1]] + break + } + } + return $main +} + +start_server {tags {"other external:skip"}} { + test "Redis can resize empty dict" { + # Write and then delete 128 keys, creating an empty dict + r flushall + for {set j 1} {$j <= 128} {incr j} { + r set $j{b} a + } + for {set j 1} {$j <= 128} {incr j} { + r del $j{b} + } + # Set a key to enable overhead display of db 0 + r set a b + # The dict containing 128 keys must have expanded, + # its hash table itself takes a lot more than 200 bytes + wait_for_condition 100 50 { + [get_overhead_hashtable_main] < 200 + } else { + fail "dict did not resize in time" + } + } +}