From 165afc5f2a2bc8ef18af85a6e5cb345c7af614ec Mon Sep 17 00:00:00 2001 From: "debing.sun" Date: Thu, 22 Feb 2024 16:02:38 +0800 Subject: [PATCH] Determine the large limit of the quicklist node based on fill (#12659) Following #12568 In issue #9357, when inserting an element larger than 1GB, we currently store it in a plain node instead of a listpack. Presently, when we insert an element that exceeds the maximum size of a packed node, it cannot be accommodated in any other nodes, thus ending up isolated like a large element. I.e. it's a node with only one element, but it's listpack encoded rather than a plain buffer. This PR lowers the threshold for considering an element as 'large' from 1GB to the maximum size of a node. While this change doesn't completely resolve the bug mentioned in the previous PR, it does mitigate its potential impact. As a result of this change, we can now only use LSET to replace an element with another element that falls below the maximum size threshold. In the worst-case scenario, with a fill of -5, the largest packed node we can create is 2GB (32k * 64k): * 32k: The smallest element in a listpack is 2 bytes, which allows us to store up to 32k elements. * 64k: This is the maximum size for a single quicklist node. ## Others To fully fix #9357, we need more work, as discussed in #12568, when we insert an element into a quicklistNode, it may be created in a new node, put into another node, or merged, and we can't correctly delete the node that was supposed to be deleted. I'm not sure it's worth it, since it involves a lot of modifications. --- src/debug.c | 2 +- src/quicklist.c | 113 ++++++++++++++++++++++------------ src/quicklist.h | 2 +- tests/unit/type/list.tcl | 130 ++++++++++++++++----------------------- 4 files changed, 127 insertions(+), 120 deletions(-) diff --git a/src/debug.c b/src/debug.c index fcd62855c..c1327ed48 100644 --- a/src/debug.c +++ b/src/debug.c @@ -857,7 +857,7 @@ NULL { int memerr; unsigned long long sz = memtoull((const char *)c->argv[2]->ptr, &memerr); - if (memerr || !quicklistisSetPackedThreshold(sz)) { + if (memerr || !quicklistSetPackedThreshold(sz)) { addReplyError(c, "argument must be a memory value bigger than 1 and smaller than 4gb"); } else { addReply(c,shared.ok); diff --git a/src/quicklist.c b/src/quicklist.c index a08431bff..3776c9b85 100644 --- a/src/quicklist.c +++ b/src/quicklist.c @@ -48,18 +48,14 @@ * just one byte, it still won't overflow the 16 bit count field. */ static const size_t optimization_level[] = {4096, 8192, 16384, 32768, 65536}; -/* packed_threshold is initialized to 1gb*/ -static size_t packed_threshold = (1 << 30); +/* This is for test suite development purposes only, 0 means disabled. */ +static size_t packed_threshold = 0; -/* set threshold for PLAIN nodes, the real limit is 4gb */ -#define isLargeElement(size) ((size) >= packed_threshold) - -int quicklistisSetPackedThreshold(size_t sz) { +/* set threshold for PLAIN nodes for test suit, the real limit is based on `fill` */ +int quicklistSetPackedThreshold(size_t sz) { /* Don't allow threshold to be set above or even slightly below 4GB */ if (sz > (1ull<<32) - (1<<20)) { return 0; - } else if (sz == 0) { /* 0 means restore threshold */ - sz = (1 << 30); } packed_threshold = sz; return 1; @@ -462,6 +458,15 @@ REDIS_STATIC void _quicklistInsertNodeAfter(quicklist *quicklist, #define sizeMeetsSafetyLimit(sz) ((sz) <= SIZE_SAFETY_LIMIT) +/* Calculate the size limit of the quicklist node based on negative 'fill'. */ +static size_t quicklistNodeNegFillLimit(int fill) { + assert(fill < 0); + size_t offset = (-fill) - 1; + size_t max_level = sizeof(optimization_level) / sizeof(*optimization_level); + if (offset >= max_level) offset = max_level - 1; + return optimization_level[offset]; +} + /* Calculate the size limit or length limit of the quicklist node * based on 'fill', and is also used to limit list listpack. */ void quicklistNodeLimit(int fill, size_t *size, unsigned int *count) { @@ -472,10 +477,7 @@ void quicklistNodeLimit(int fill, size_t *size, unsigned int *count) { /* Ensure that one node have at least one entry */ *count = (fill == 0) ? 1 : fill; } else { - size_t offset = (-fill) - 1; - size_t max_level = sizeof(optimization_level) / sizeof(*optimization_level); - if (offset >= max_level) offset = max_level - 1; - *size = optimization_level[offset]; + *size = quicklistNodeNegFillLimit(fill); } } @@ -500,12 +502,23 @@ int quicklistNodeExceedsLimit(int fill, size_t new_sz, unsigned int new_count) { redis_unreachable(); } +/* Determines whether a given size qualifies as a large element based on a threshold + * determined by the 'fill'. If the size is considered large, it will be stored in + * a plain node. */ +static int isLargeElement(size_t sz, int fill) { + if (unlikely(packed_threshold != 0)) return sz >= packed_threshold; + if (fill >= 0) + return !sizeMeetsSafetyLimit(sz); + else + return sz > quicklistNodeNegFillLimit(fill); +} + REDIS_STATIC int _quicklistNodeAllowInsert(const quicklistNode *node, const int fill, const size_t sz) { if (unlikely(!node)) return 0; - if (unlikely(QL_NODE_IS_PLAIN(node) || isLargeElement(sz))) + if (unlikely(QL_NODE_IS_PLAIN(node) || isLargeElement(sz, fill))) return 0; /* Estimate how many bytes will be added to the listpack by this one entry. @@ -570,7 +583,7 @@ static void __quicklistInsertPlainNode(quicklist *quicklist, quicklistNode *old_ int quicklistPushHead(quicklist *quicklist, void *value, size_t sz) { quicklistNode *orig_head = quicklist->head; - if (unlikely(isLargeElement(sz))) { + if (unlikely(isLargeElement(sz, quicklist->fill))) { __quicklistInsertPlainNode(quicklist, quicklist->head, value, sz, 0); return 1; } @@ -597,7 +610,7 @@ int quicklistPushHead(quicklist *quicklist, void *value, size_t sz) { * Returns 1 if new tail created. */ int quicklistPushTail(quicklist *quicklist, void *value, size_t sz) { quicklistNode *orig_tail = quicklist->tail; - if (unlikely(isLargeElement(sz))) { + if (unlikely(isLargeElement(sz, quicklist->fill))) { __quicklistInsertPlainNode(quicklist, quicklist->tail, value, sz, 1); return 1; } @@ -762,7 +775,7 @@ void quicklistReplaceEntry(quicklistIter *iter, quicklistEntry *entry, quicklistNode *node = entry->node; unsigned char *newentry; - if (likely(!QL_NODE_IS_PLAIN(entry->node) && !isLargeElement(sz) && + if (likely(!QL_NODE_IS_PLAIN(entry->node) && !isLargeElement(sz, quicklist->fill) && (newentry = lpReplace(entry->node->entry, &entry->zi, data, sz)) != NULL)) { entry->node->entry = newentry; @@ -770,7 +783,7 @@ void quicklistReplaceEntry(quicklistIter *iter, quicklistEntry *entry, /* quicklistNext() and quicklistGetIteratorEntryAtIdx() provide an uncompressed node */ quicklistCompress(quicklist, entry->node); } else if (QL_NODE_IS_PLAIN(entry->node)) { - if (isLargeElement(sz)) { + if (isLargeElement(sz, quicklist->fill)) { zfree(entry->node->entry); entry->node->entry = zmalloc(sz); entry->node->sz = sz; @@ -790,7 +803,7 @@ void quicklistReplaceEntry(quicklistIter *iter, quicklistEntry *entry, /* Create a new node and insert it after the original node. * If the original node was split, insert the split node after the new node. */ - new_node = __quicklistCreateNode(isLargeElement(sz) ? + new_node = __quicklistCreateNode(isLargeElement(sz, quicklist->fill) ? QUICKLIST_NODE_CONTAINER_PLAIN : QUICKLIST_NODE_CONTAINER_PACKED, data, sz); __quicklistInsertNode(quicklist, node, new_node, 1); if (split_node) __quicklistInsertNode(quicklist, new_node, split_node, 1); @@ -1005,7 +1018,7 @@ REDIS_STATIC void _quicklistInsert(quicklistIter *iter, quicklistEntry *entry, if (!node) { /* we have no reference node, so let's create only node in the list */ D("No node given!"); - if (unlikely(isLargeElement(sz))) { + if (unlikely(isLargeElement(sz, quicklist->fill))) { __quicklistInsertPlainNode(quicklist, quicklist->tail, value, sz, after); return; } @@ -1042,7 +1055,7 @@ REDIS_STATIC void _quicklistInsert(quicklistIter *iter, quicklistEntry *entry, } } - if (unlikely(isLargeElement(sz))) { + if (unlikely(isLargeElement(sz, quicklist->fill))) { if (QL_NODE_IS_PLAIN(node) || (at_tail && after) || (at_head && !after)) { __quicklistInsertPlainNode(quicklist, node, value, sz, after); } else { @@ -2107,20 +2120,23 @@ int quicklistTest(int argc, char *argv[], int flags) { } TEST("Comprassion Plain node") { - char buf[256]; - quicklistisSetPackedThreshold(1); - quicklist *ql = quicklistNew(-2, 1); + for (int f = 0; f < fill_count; f++) { + size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1; + + char buf[large_limit]; + quicklist *ql = quicklistNew(fills[f], 1); for (int i = 0; i < 500; i++) { /* Set to 256 to allow the node to be triggered to compress, * if it is less than 48(nocompress), the test will be successful. */ snprintf(buf, sizeof(buf), "hello%d", i); - quicklistPushHead(ql, buf, 256); + quicklistPushHead(ql, buf, large_limit); } quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL); quicklistEntry entry; int i = 0; while (quicklistNext(iter, &entry)) { + assert(QL_NODE_IS_PLAIN(entry.node)); snprintf(buf, sizeof(buf), "hello%d", i); if (strcmp((char *)entry.value, buf)) ERR("value [%s] didn't match [%s] at position %d", @@ -2130,42 +2146,57 @@ int quicklistTest(int argc, char *argv[], int flags) { ql_release_iterator(iter); quicklistRelease(ql); } + } - TEST("NEXT plain node") - { - packed_threshold = 3; - quicklist *ql = quicklistNew(-2, options[_i]); - char *strings[] = {"hello1", "hello2", "h3", "h4", "hello5"}; + TEST("NEXT plain node") { + for (int f = 0; f < fill_count; f++) { + size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1; + quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 5; ++i) - quicklistPushHead(ql, strings[i], strlen(strings[i])); + char buf[large_limit]; + memcpy(buf, "plain", 5); + quicklistPushHead(ql, buf, large_limit); + quicklistPushHead(ql, buf, large_limit); + quicklistPushHead(ql, "packed3", 7); + quicklistPushHead(ql, "packed4", 7); + quicklistPushHead(ql, buf, large_limit); quicklistEntry entry; quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL); - int j = 0; while(quicklistNext(iter, &entry) != 0) { - assert(strncmp(strings[j], (char *)entry.value, strlen(strings[j])) == 0); - j++; + if (QL_NODE_IS_PLAIN(entry.node)) + assert(!memcmp(entry.value, "plain", 5)); + else + assert(!memcmp(entry.value, "packed", 6)); } ql_release_iterator(iter); quicklistRelease(ql); } + } TEST("rotate plain node ") { + for (int f = 0; f < fill_count; f++) { + size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1; + unsigned char *data = NULL; size_t sz; long long lv; int i =0; - packed_threshold = 5; - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistPushHead(ql, "hello1", 6); - quicklistPushHead(ql, "hello4", 6); - quicklistPushHead(ql, "hello3", 6); - quicklistPushHead(ql, "hello2", 6); + quicklist *ql = quicklistNew(fills[f], options[_i]); + char buf[large_limit]; + memcpy(buf, "hello1", 6); + quicklistPushHead(ql, buf, large_limit); + memcpy(buf, "hello4", 6); + quicklistPushHead(ql, buf, large_limit); + memcpy(buf, "hello3", 6); + quicklistPushHead(ql, buf, large_limit); + memcpy(buf, "hello2", 6); + quicklistPushHead(ql, buf, large_limit); quicklistRotate(ql); for(i = 1 ; i < 5; i++) { + assert(QL_NODE_IS_PLAIN(ql->tail)); quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv); int temp_char = data[5]; zfree(data); @@ -2174,7 +2205,7 @@ int quicklistTest(int argc, char *argv[], int flags) { ql_verify(ql, 0, 0, 0, 0); quicklistRelease(ql); - packed_threshold = (1 << 30); + } } TEST("rotate one val once") { diff --git a/src/quicklist.h b/src/quicklist.h index e17bc1f57..c4b07e0c0 100644 --- a/src/quicklist.h +++ b/src/quicklist.h @@ -202,7 +202,7 @@ int quicklistBookmarkCreate(quicklist **ql_ref, const char *name, quicklistNode int quicklistBookmarkDelete(quicklist *ql, const char *name); quicklistNode *quicklistBookmarkFind(quicklist *ql, const char *name); void quicklistBookmarksClear(quicklist *ql); -int quicklistisSetPackedThreshold(size_t sz); +int quicklistSetPackedThreshold(size_t sz); #ifdef REDIS_TEST int quicklistTest(int argc, char *argv[], int flags); diff --git a/tests/unit/type/list.tcl b/tests/unit/type/list.tcl index 2344b26fd..5ea62cb91 100644 --- a/tests/unit/type/list.tcl +++ b/tests/unit/type/list.tcl @@ -1,95 +1,54 @@ -# check functionality compression of plain and zipped nodes +# check functionality compression of plain and packed nodes start_server [list overrides [list save ""] ] { r config set list-compress-depth 2 r config set list-max-ziplist-size 1 - # 3 test to check compression with regular ziplist nodes + # 3 test to check compression with plain and packed nodes # 1. using push + insert # 2. using push + insert + trim # 3. using push + insert + set - test {reg node check compression with insert and pop} { - r lpush list1 [string repeat a 500] - r lpush list1 [string repeat b 500] - r lpush list1 [string repeat c 500] - r lpush list1 [string repeat d 500] - r linsert list1 after [string repeat d 500] [string repeat e 500] - r linsert list1 after [string repeat d 500] [string repeat f 500] - r linsert list1 after [string repeat d 500] [string repeat g 500] - r linsert list1 after [string repeat d 500] [string repeat j 500] - assert_equal [r lpop list1] [string repeat d 500] - assert_equal [r lpop list1] [string repeat j 500] - assert_equal [r lpop list1] [string repeat g 500] - assert_equal [r lpop list1] [string repeat f 500] - assert_equal [r lpop list1] [string repeat e 500] - assert_equal [r lpop list1] [string repeat c 500] - assert_equal [r lpop list1] [string repeat b 500] - assert_equal [r lpop list1] [string repeat a 500] + foreach {container size} {packed 500 plain 8193} { + test "$container node check compression with insert and pop" { + r flushdb + r lpush list1 [string repeat a $size] + r lpush list1 [string repeat b $size] + r lpush list1 [string repeat c $size] + r lpush list1 [string repeat d $size] + r linsert list1 after [string repeat d $size] [string repeat e $size] + r linsert list1 after [string repeat d $size] [string repeat f $size] + r linsert list1 after [string repeat d $size] [string repeat g $size] + r linsert list1 after [string repeat d $size] [string repeat j $size] + assert_equal [r lpop list1] [string repeat d $size] + assert_equal [r lpop list1] [string repeat j $size] + assert_equal [r lpop list1] [string repeat g $size] + assert_equal [r lpop list1] [string repeat f $size] + assert_equal [r lpop list1] [string repeat e $size] + assert_equal [r lpop list1] [string repeat c $size] + assert_equal [r lpop list1] [string repeat b $size] + assert_equal [r lpop list1] [string repeat a $size] }; - test {reg node check compression combined with trim} { - r lpush list2 [string repeat a 500] - r linsert list2 after [string repeat a 500] [string repeat b 500] - r rpush list2 [string repeat c 500] - assert_equal [string repeat b 500] [r lindex list2 1] + test "$container node check compression combined with trim" { + r flushdb + r lpush list2 [string repeat a $size] + r linsert list2 after [string repeat a $size] [string repeat b $size] + r rpush list2 [string repeat c $size] + assert_equal [string repeat b $size] [r lindex list2 1] r LTRIM list2 1 -1 r llen list2 } {2} - test {reg node check compression with lset} { - r lpush list3 [string repeat a 500] - r LSET list3 0 [string repeat b 500] - assert_equal [string repeat b 500] [r lindex list3 0] - r lpush list3 [string repeat c 500] - r LSET list3 0 [string repeat d 500] - assert_equal [string repeat d 500] [r lindex list3 0] + test "$container node check compression with lset" { + r flushdb + r lpush list3 [string repeat a $size] + r LSET list3 0 [string repeat b $size] + assert_equal [string repeat b $size] [r lindex list3 0] + r lpush list3 [string repeat c $size] + r LSET list3 0 [string repeat d $size] + assert_equal [string repeat d $size] [r lindex list3 0] } - - # repeating the 3 tests with plain nodes - # (by adjusting quicklist-packed-threshold) - - test {plain node check compression} { - r debug quicklist-packed-threshold 1b - r lpush list4 [string repeat a 500] - r lpush list4 [string repeat b 500] - r lpush list4 [string repeat c 500] - r lpush list4 [string repeat d 500] - r linsert list4 after [string repeat d 500] [string repeat e 500] - r linsert list4 after [string repeat d 500] [string repeat f 500] - r linsert list4 after [string repeat d 500] [string repeat g 500] - r linsert list4 after [string repeat d 500] [string repeat j 500] - assert_equal [r lpop list4] [string repeat d 500] - assert_equal [r lpop list4] [string repeat j 500] - assert_equal [r lpop list4] [string repeat g 500] - assert_equal [r lpop list4] [string repeat f 500] - assert_equal [r lpop list4] [string repeat e 500] - assert_equal [r lpop list4] [string repeat c 500] - assert_equal [r lpop list4] [string repeat b 500] - assert_equal [r lpop list4] [string repeat a 500] - r debug quicklist-packed-threshold 0 - } {OK} {needs:debug} - - test {plain node check compression with ltrim} { - r debug quicklist-packed-threshold 1b - r lpush list5 [string repeat a 500] - r linsert list5 after [string repeat a 500] [string repeat b 500] - r rpush list5 [string repeat c 500] - assert_equal [string repeat b 500] [r lindex list5 1] - r LTRIM list5 1 -1 - assert_equal [r llen list5] 2 - r debug quicklist-packed-threshold 0 - } {OK} {needs:debug} - - test {plain node check compression using lset} { - r debug quicklist-packed-threshold 1b - r lpush list6 [string repeat a 500] - r LSET list6 0 [string repeat b 500] - assert_equal [string repeat b 500] [r lindex list6 0] - r lpush list6 [string repeat c 500] - r LSET list6 0 [string repeat d 500] - assert_equal [string repeat d 500] [r lindex list6 0] - r debug quicklist-packed-threshold 0 - } {OK} {needs:debug} + } ;# foreach # revert config for external mode tests. r config set list-compress-depth 0 @@ -97,6 +56,13 @@ start_server [list overrides [list save ""] ] { # check functionality of plain nodes using low packed-threshold start_server [list overrides [list save ""] ] { +foreach type {listpack quicklist} { + if {$type eq "listpack"} { + r config set list-max-listpack-size -2 + } else { + r config set list-max-listpack-size 1 + } + # basic command check for plain nodes - "LPUSH & LPOP" test {Test LPUSH and LPOP on plain nodes} { r flushdb @@ -104,6 +70,7 @@ start_server [list overrides [list save ""] ] { r lpush lst 9 r lpush lst xxxxxxxxxx r lpush lst xxxxxxxxxx + assert_encoding $type lst set s0 [s used_memory] assert {$s0 > 10} assert {[r llen lst] == 3} @@ -128,6 +95,7 @@ start_server [list overrides [list save ""] ] { r lpush lst xxxxxxxxxxx r lpush lst 9 r lpush lst xxxxxxxxxxx + assert_encoding $type lst r linsert lst before "9" "8" assert {[r lindex lst 1] eq "8"} r linsert lst BEFORE "9" "7" @@ -143,6 +111,7 @@ start_server [list overrides [list save ""] ] { r lpush lst1 9 r lpush lst1 xxxxxxxxxxx r lpush lst1 9 + assert_encoding $type lst1 r LTRIM lst1 1 -1 assert_equal [r llen lst1] 2 r debug quicklist-packed-threshold 0 @@ -154,6 +123,7 @@ start_server [list overrides [list save ""] ] { r debug quicklist-packed-threshold 1b r lpush lst one r lpush lst xxxxxxxxxxx + assert_encoding $type lst set s0 [s used_memory] assert {$s0 > 10} r lpush lst 9 @@ -169,6 +139,7 @@ start_server [list overrides [list save ""] ] { r RPUSH lst "aa" r RPUSH lst "bb" r RPUSH lst "cc" + assert_encoding $type lst r LSET lst 0 "xxxxxxxxxxx" assert_equal [r LPOS lst "xxxxxxxxxxx"] 0 r debug quicklist-packed-threshold 0 @@ -180,6 +151,7 @@ start_server [list overrides [list save ""] ] { r debug quicklist-packed-threshold 1b r RPUSH lst2{t} "aa" r RPUSH lst2{t} "bb" + assert_encoding $type lst2{t} r LSET lst2{t} 0 xxxxxxxxxxx r RPUSH lst2{t} "cc" r RPUSH lst2{t} "dd" @@ -200,6 +172,7 @@ start_server [list overrides [list save ""] ] { r debug quicklist-packed-threshold 5b r RPUSH lst "aa" r RPUSH lst "bb" + assert_encoding $type lst r lset lst 0 [string repeat d 50001] set s1 [r lpop lst] assert_equal $s1 [string repeat d 50001] @@ -281,6 +254,7 @@ start_server [list overrides [list save ""] ] { # Insert two elements and keep them in the same node r RPUSH lst $small_ele r RPUSH lst $small_ele + assert_encoding $type lst # When setting the position of -1 to a large element, we first insert # a large element at the end and then delete its previous element. @@ -297,6 +271,7 @@ start_server [list overrides [list save ""] ] { r LPUSH lst "aa" r LPUSH lst "bb" + assert_encoding $type lst r LSET lst -2 [string repeat x 10] r RPOP lst assert_equal [string repeat x 10] [r LRANGE lst 0 -1] @@ -304,6 +279,7 @@ start_server [list overrides [list save ""] ] { r debug quicklist-packed-threshold 0 } {OK} {needs:debug} } +} run_solo {list-large-memory} { start_server [list overrides [list save ""] ] {