Change the threshold of dict expand, shrink and rehash (#12948)

Before this change (most recently modified in
https://github.com/redis/redis/pull/12850#discussion_r1421406393), The
trigger for normal expand threshold was 100% utilization and the trigger
for normal shrink threshold was 10% (HASHTABLE_MIN_FILL).
While during fork (DICT_RESIZE_AVOID), when we want to avoid rehash, the
trigger thresholds were multiplied by 5 (`dict_force_resize_ratio`),
meaning 500% for expand and 2% (100/10/5) for shrink.

However, in `dictRehash` (the incremental rehashing), the rehashing
threshold for shrinking during fork (DICT_RESIZE_AVOID) was 20% by
mistake.
This meant that if a shrinking is triggered when `dict_can_resize` is
`DICT_RESIZE_ENABLE` which the threshold is 10%, the rehashing can
continue when `dict_can_resize` is `DICT_RESIZE_AVOID`.
This would cause unwanted CopyOnWrite damage.

It'll make sense to change the thresholds of the rehash trigger and the
thresholds of the incremental rehashing the same, however, in one we
compare the size of the hash table to the number of records, and in the
other we compare the size of ht[0] to the size of ht[1], so the formula
is not exactly the same.

to make things easier we change all the thresholds to powers of 2, so
the normal shrinking threshold is changed from 100/10 (i.e. 10%) to
100/8 (i.e. 12.5%), and we change the threshold during forks from 5 to
4, i.e. from 500% to 400% for expand, and from 2% (100/10/5) to 3.125%
(100/8/4)
This commit is contained in:
Yanqi Lv 2024-01-19 23:00:43 +08:00 committed by GitHub
parent d0640029dc
commit b07174afc2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 60 additions and 54 deletions

View File

@ -56,11 +56,11 @@
* Note that even when dict_can_resize is set to DICT_RESIZE_AVOID, not all
* resizes are prevented:
* - A hash table is still allowed to expand if the ratio between the number
* of elements and the buckets > dict_force_resize_ratio.
* of elements and the buckets >= dict_force_resize_ratio.
* - A hash table is still allowed to shrink if the ratio between the number
* of elements and the buckets < HASHTABLE_MIN_FILL / dict_force_resize_ratio. */
* of elements and the buckets <= 1 / (HASHTABLE_MIN_FILL * dict_force_resize_ratio). */
static dictResizeEnable dict_can_resize = DICT_RESIZE_ENABLE;
static unsigned int dict_force_resize_ratio = 5;
static unsigned int dict_force_resize_ratio = 4;
/* -------------------------- types ----------------------------------------- */
struct dictEntry {
@ -332,9 +332,12 @@ int dictRehash(dict *d, int n) {
unsigned long s0 = DICTHT_SIZE(d->ht_size_exp[0]);
unsigned long s1 = DICTHT_SIZE(d->ht_size_exp[1]);
if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0;
/* If dict_can_resize is DICT_RESIZE_AVOID, we want to avoid rehashing.
* - If expanding, the threshold is dict_force_resize_ratio which is 4.
* - If shrinking, the threshold is 1 / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) which is 1/32. */
if (dict_can_resize == DICT_RESIZE_AVOID &&
((s1 > s0 && s1 < dict_force_resize_ratio * s0) ||
(s1 < s0 && s0 < dict_force_resize_ratio * s1)))
(s1 < s0 && s0 < HASHTABLE_MIN_FILL * dict_force_resize_ratio * s1)))
{
return 0;
}
@ -1471,13 +1474,13 @@ static void _dictShrinkIfNeeded(dict *d)
/* If the size of hash table is DICT_HT_INITIAL_SIZE, don't shrink it. */
if (DICTHT_SIZE(d->ht_size_exp[0]) == DICT_HT_INITIAL_SIZE) return;
/* If we reached below 1:10 elements/buckets ratio, and we are allowed to resize
* the hash table (global setting) or we should avoid it but the ratio is below 1:50,
/* If we reached below 1:8 elements/buckets ratio, and we are allowed to resize
* the hash table (global setting) or we should avoid it but the ratio is below 1:32,
* we'll trigger a resize of the hash table. */
if ((dict_can_resize == DICT_RESIZE_ENABLE &&
d->ht_used[0] * 100 <= HASHTABLE_MIN_FILL * DICTHT_SIZE(d->ht_size_exp[0])) ||
d->ht_used[0] * HASHTABLE_MIN_FILL <= DICTHT_SIZE(d->ht_size_exp[0])) ||
(dict_can_resize != DICT_RESIZE_FORBID &&
d->ht_used[0] * 100 * dict_force_resize_ratio <= HASHTABLE_MIN_FILL * DICTHT_SIZE(d->ht_size_exp[0])))
d->ht_used[0] * HASHTABLE_MIN_FILL * dict_force_resize_ratio <= DICTHT_SIZE(d->ht_size_exp[0])))
{
if (!dictTypeResizeAllowed(d, d->ht_used[0]))
return;
@ -1750,6 +1753,7 @@ int dictTest(int argc, char **argv, int flags) {
int retval;
dict *dict = dictCreate(&BenchmarkDictType);
long count = 0;
unsigned long new_dict_size, current_dict_used, remain_keys;
int accurate = (flags & REDIS_TEST_ACCURATE);
if (argc == 4) {
@ -1773,67 +1777,68 @@ int dictTest(int argc, char **argv, int flags) {
assert(dictBuckets(dict) == 16);
}
TEST("Use DICT_RESIZE_AVOID to disable the dict resize and pad to 80") {
TEST("Use DICT_RESIZE_AVOID to disable the dict resize and pad to (dict_force_resize_ratio * 16)") {
/* Use DICT_RESIZE_AVOID to disable the dict resize, and pad
* the number of keys to 80, now is 16:80, so we can satisfy
* dict_force_resize_ratio. */
* the number of keys to (dict_force_resize_ratio * 16), so we can satisfy
* dict_force_resize_ratio in next test. */
dictSetResizeEnabled(DICT_RESIZE_AVOID);
for (j = 16; j < 80; j++) {
for (j = 16; j < (long)dict_force_resize_ratio * 16; j++) {
retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
assert(retval == DICT_OK);
}
assert(dictSize(dict) == 80);
current_dict_used = dict_force_resize_ratio * 16;
assert(dictSize(dict) == current_dict_used);
assert(dictBuckets(dict) == 16);
}
TEST("Add one more key, trigger the dict resize") {
retval = dictAdd(dict,stringFromLongLong(80),(void*)80);
retval = dictAdd(dict,stringFromLongLong(current_dict_used),(void*)(current_dict_used));
assert(retval == DICT_OK);
assert(dictSize(dict) == 81);
current_dict_used++;
new_dict_size = 1UL << _dictNextExp(current_dict_used);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 16);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 128);
assert(dictBuckets(dict) == 144);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size);
/* Wait for rehashing. */
dictSetResizeEnabled(DICT_RESIZE_ENABLE);
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
assert(dictSize(dict) == 81);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 128);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
assert(dictBuckets(dict) == 128);
}
TEST("Delete keys until 13 keys remain") {
/* Delete keys until 13 keys remain, now is 13:128, so we can
* satisfy HASHTABLE_MIN_FILL in the next test. */
for (j = 0; j < 68; j++) {
TEST("Delete keys until we can trigger shrink in next test") {
/* Delete keys until we can satisfy (1 / HASHTABLE_MIN_FILL) in the next test. */
for (j = new_dict_size / HASHTABLE_MIN_FILL + 1; j < (long)current_dict_used; j++) {
char *key = stringFromLongLong(j);
retval = dictDelete(dict, key);
zfree(key);
assert(retval == DICT_OK);
}
assert(dictSize(dict) == 13);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 128);
current_dict_used = new_dict_size / HASHTABLE_MIN_FILL + 1;
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
assert(dictBuckets(dict) == 128);
}
TEST("Delete one more key, trigger the dict resize") {
char *key = stringFromLongLong(68);
current_dict_used--;
char *key = stringFromLongLong(current_dict_used);
retval = dictDelete(dict, key);
zfree(key);
unsigned long oldDictSize = new_dict_size;
new_dict_size = 1UL << _dictNextExp(current_dict_used);
assert(retval == DICT_OK);
assert(dictSize(dict) == 12);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 128);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 16);
assert(dictBuckets(dict) == 144);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == oldDictSize);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size);
/* Wait for rehashing. */
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
assert(dictSize(dict) == 12);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 16);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
assert(dictBuckets(dict) == 16);
}
TEST("Empty the dictionary and add 128 keys") {
@ -1849,36 +1854,37 @@ int dictTest(int argc, char **argv, int flags) {
TEST("Use DICT_RESIZE_AVOID to disable the dict resize and reduce to 3") {
/* Use DICT_RESIZE_AVOID to disable the dict reset, and reduce
* the number of keys to 3, now is 3:128, so we can satisfy
* HASHTABLE_MIN_FILL / dict_force_resize_ratio. */
* the number of keys until we can trigger shrinking in next test. */
dictSetResizeEnabled(DICT_RESIZE_AVOID);
for (j = 0; j < 125; j++) {
remain_keys = DICTHT_SIZE(dict->ht_size_exp[0]) / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) + 1;
for (j = remain_keys; j < 128; j++) {
char *key = stringFromLongLong(j);
retval = dictDelete(dict, key);
zfree(key);
assert(retval == DICT_OK);
}
assert(dictSize(dict) == 3);
current_dict_used = remain_keys;
assert(dictSize(dict) == remain_keys);
assert(dictBuckets(dict) == 128);
}
TEST("Delete one more key, trigger the dict resize") {
char *key = stringFromLongLong(125);
current_dict_used--;
char *key = stringFromLongLong(current_dict_used);
retval = dictDelete(dict, key);
zfree(key);
new_dict_size = 1UL << _dictNextExp(current_dict_used);
assert(retval == DICT_OK);
assert(dictSize(dict) == 2);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 128);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 4);
assert(dictBuckets(dict) == 132);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size);
/* Wait for rehashing. */
dictSetResizeEnabled(DICT_RESIZE_ENABLE);
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
assert(dictSize(dict) == 2);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 4);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
assert(dictBuckets(dict) == 4);
}
TEST("Restore to original state") {

View File

@ -45,7 +45,7 @@
#define DICT_ERR 1
/* Hash table parameters */
#define HASHTABLE_MIN_FILL 10 /* Minimal hash table fill 10% */
#define HASHTABLE_MIN_FILL 8 /* Minimal hash table fill 12.5%(100/8) */
typedef struct dictEntry dictEntry; /* opaque */
typedef struct dict dict;

View File

@ -699,11 +699,11 @@ int htNeedsShrink(dict *dict) {
size = dictBuckets(dict);
used = dictSize(dict);
return (size > DICT_HT_INITIAL_SIZE &&
(used*100 <= HASHTABLE_MIN_FILL*size));
(used * HASHTABLE_MIN_FILL <= size));
}
/* In cluster-enabled setup, this method traverses through all main/expires dictionaries (CLUSTER_SLOTS)
* and triggers a resize if the percentage of used buckets in the HT reaches HASHTABLE_MIN_FILL
* and triggers a resize if the percentage of used buckets in the HT reaches (100 / HASHTABLE_MIN_FILL)
* we resize the hash table to save memory.
*
* In non cluster-enabled setup, it resize main/expires dictionary based on the same condition described above. */

View File

@ -439,8 +439,8 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
r config set rdb-key-save-delay 10000000
r bgsave
# delete data to have lot's (98%) of empty buckets
for {set j 1} {$j <= 125} {incr j} {
# delete data to have lot's (96%) of empty buckets
for {set j 1} {$j <= 123} {incr j} {
r del "{foo}$j"
}
assert_match "*table size: 128*" [r debug HTSTATS 0]
@ -455,7 +455,7 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
}
after 200;# waiting for serverCron
assert_match "*table size: 4*" [r debug HTSTATS 0]
assert_match "*table size: 8*" [r debug HTSTATS 0]
} {} {needs:debug}
test "Redis can rewind and trigger smaller slot resizing" {
@ -470,7 +470,7 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
r config set rdb-key-save-delay 10000000
r bgsave
for {set j 1} {$j <= 125} {incr j} {
for {set j 1} {$j <= 123} {incr j} {
r del "{alice}$j"
}
@ -484,6 +484,6 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} {
}
after 200;# waiting for serverCron
assert_match "*table size: 8*" [r debug HTSTATS 0]
assert_match "*table size: 16*" [r debug HTSTATS 0]
} {} {needs:debug}
}