Determine the large limit of the quicklist node based on fill (#12659)

Following #12568

In issue #9357, when inserting an element larger than 1GB, we currently
store it in a plain node instead of a listpack.
Presently, when we insert an element that exceeds the maximum size of a
packed node, it cannot be accommodated in any other nodes, thus ending
up isolated like a large element.
I.e. it's a node with only one element, but it's listpack encoded rather
than a plain buffer.

This PR lowers the threshold for considering an element as 'large' from
1GB to the maximum size of a node.
While this change doesn't completely resolve the bug mentioned in the
previous PR, it does mitigate its potential impact.

As a result of this change, we can now only use LSET to replace an
element with another element that falls below the maximum size
threshold.
In the worst-case scenario, with a fill of -5, the largest packed node
we can create is 2GB (32k * 64k):
* 32k: The smallest element in a listpack is 2 bytes, which allows us to
store up to 32k elements.
* 64k: This is the maximum size for a single quicklist node.

## Others
To fully fix #9357, we need more work, as discussed in #12568, when we
insert an element into a quicklistNode, it may be created in a new node,
put into another node, or merged, and we can't correctly delete the node
that was supposed to be deleted.
I'm not sure it's worth it, since it involves a lot of modifications.
This commit is contained in:
debing.sun 2024-02-22 16:02:38 +08:00 committed by GitHub
parent 820a4e45f1
commit 165afc5f2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 127 additions and 120 deletions

View File

@ -857,7 +857,7 @@ NULL
{
int memerr;
unsigned long long sz = memtoull((const char *)c->argv[2]->ptr, &memerr);
if (memerr || !quicklistisSetPackedThreshold(sz)) {
if (memerr || !quicklistSetPackedThreshold(sz)) {
addReplyError(c, "argument must be a memory value bigger than 1 and smaller than 4gb");
} else {
addReply(c,shared.ok);

View File

@ -48,18 +48,14 @@
* just one byte, it still won't overflow the 16 bit count field. */
static const size_t optimization_level[] = {4096, 8192, 16384, 32768, 65536};
/* packed_threshold is initialized to 1gb*/
static size_t packed_threshold = (1 << 30);
/* This is for test suite development purposes only, 0 means disabled. */
static size_t packed_threshold = 0;
/* set threshold for PLAIN nodes, the real limit is 4gb */
#define isLargeElement(size) ((size) >= packed_threshold)
int quicklistisSetPackedThreshold(size_t sz) {
/* set threshold for PLAIN nodes for test suit, the real limit is based on `fill` */
int quicklistSetPackedThreshold(size_t sz) {
/* Don't allow threshold to be set above or even slightly below 4GB */
if (sz > (1ull<<32) - (1<<20)) {
return 0;
} else if (sz == 0) { /* 0 means restore threshold */
sz = (1 << 30);
}
packed_threshold = sz;
return 1;
@ -462,6 +458,15 @@ REDIS_STATIC void _quicklistInsertNodeAfter(quicklist *quicklist,
#define sizeMeetsSafetyLimit(sz) ((sz) <= SIZE_SAFETY_LIMIT)
/* Calculate the size limit of the quicklist node based on negative 'fill'. */
static size_t quicklistNodeNegFillLimit(int fill) {
assert(fill < 0);
size_t offset = (-fill) - 1;
size_t max_level = sizeof(optimization_level) / sizeof(*optimization_level);
if (offset >= max_level) offset = max_level - 1;
return optimization_level[offset];
}
/* Calculate the size limit or length limit of the quicklist node
* based on 'fill', and is also used to limit list listpack. */
void quicklistNodeLimit(int fill, size_t *size, unsigned int *count) {
@ -472,10 +477,7 @@ void quicklistNodeLimit(int fill, size_t *size, unsigned int *count) {
/* Ensure that one node have at least one entry */
*count = (fill == 0) ? 1 : fill;
} else {
size_t offset = (-fill) - 1;
size_t max_level = sizeof(optimization_level) / sizeof(*optimization_level);
if (offset >= max_level) offset = max_level - 1;
*size = optimization_level[offset];
*size = quicklistNodeNegFillLimit(fill);
}
}
@ -500,12 +502,23 @@ int quicklistNodeExceedsLimit(int fill, size_t new_sz, unsigned int new_count) {
redis_unreachable();
}
/* Determines whether a given size qualifies as a large element based on a threshold
* determined by the 'fill'. If the size is considered large, it will be stored in
* a plain node. */
static int isLargeElement(size_t sz, int fill) {
if (unlikely(packed_threshold != 0)) return sz >= packed_threshold;
if (fill >= 0)
return !sizeMeetsSafetyLimit(sz);
else
return sz > quicklistNodeNegFillLimit(fill);
}
REDIS_STATIC int _quicklistNodeAllowInsert(const quicklistNode *node,
const int fill, const size_t sz) {
if (unlikely(!node))
return 0;
if (unlikely(QL_NODE_IS_PLAIN(node) || isLargeElement(sz)))
if (unlikely(QL_NODE_IS_PLAIN(node) || isLargeElement(sz, fill)))
return 0;
/* Estimate how many bytes will be added to the listpack by this one entry.
@ -570,7 +583,7 @@ static void __quicklistInsertPlainNode(quicklist *quicklist, quicklistNode *old_
int quicklistPushHead(quicklist *quicklist, void *value, size_t sz) {
quicklistNode *orig_head = quicklist->head;
if (unlikely(isLargeElement(sz))) {
if (unlikely(isLargeElement(sz, quicklist->fill))) {
__quicklistInsertPlainNode(quicklist, quicklist->head, value, sz, 0);
return 1;
}
@ -597,7 +610,7 @@ int quicklistPushHead(quicklist *quicklist, void *value, size_t sz) {
* Returns 1 if new tail created. */
int quicklistPushTail(quicklist *quicklist, void *value, size_t sz) {
quicklistNode *orig_tail = quicklist->tail;
if (unlikely(isLargeElement(sz))) {
if (unlikely(isLargeElement(sz, quicklist->fill))) {
__quicklistInsertPlainNode(quicklist, quicklist->tail, value, sz, 1);
return 1;
}
@ -762,7 +775,7 @@ void quicklistReplaceEntry(quicklistIter *iter, quicklistEntry *entry,
quicklistNode *node = entry->node;
unsigned char *newentry;
if (likely(!QL_NODE_IS_PLAIN(entry->node) && !isLargeElement(sz) &&
if (likely(!QL_NODE_IS_PLAIN(entry->node) && !isLargeElement(sz, quicklist->fill) &&
(newentry = lpReplace(entry->node->entry, &entry->zi, data, sz)) != NULL))
{
entry->node->entry = newentry;
@ -770,7 +783,7 @@ void quicklistReplaceEntry(quicklistIter *iter, quicklistEntry *entry,
/* quicklistNext() and quicklistGetIteratorEntryAtIdx() provide an uncompressed node */
quicklistCompress(quicklist, entry->node);
} else if (QL_NODE_IS_PLAIN(entry->node)) {
if (isLargeElement(sz)) {
if (isLargeElement(sz, quicklist->fill)) {
zfree(entry->node->entry);
entry->node->entry = zmalloc(sz);
entry->node->sz = sz;
@ -790,7 +803,7 @@ void quicklistReplaceEntry(quicklistIter *iter, quicklistEntry *entry,
/* Create a new node and insert it after the original node.
* If the original node was split, insert the split node after the new node. */
new_node = __quicklistCreateNode(isLargeElement(sz) ?
new_node = __quicklistCreateNode(isLargeElement(sz, quicklist->fill) ?
QUICKLIST_NODE_CONTAINER_PLAIN : QUICKLIST_NODE_CONTAINER_PACKED, data, sz);
__quicklistInsertNode(quicklist, node, new_node, 1);
if (split_node) __quicklistInsertNode(quicklist, new_node, split_node, 1);
@ -1005,7 +1018,7 @@ REDIS_STATIC void _quicklistInsert(quicklistIter *iter, quicklistEntry *entry,
if (!node) {
/* we have no reference node, so let's create only node in the list */
D("No node given!");
if (unlikely(isLargeElement(sz))) {
if (unlikely(isLargeElement(sz, quicklist->fill))) {
__quicklistInsertPlainNode(quicklist, quicklist->tail, value, sz, after);
return;
}
@ -1042,7 +1055,7 @@ REDIS_STATIC void _quicklistInsert(quicklistIter *iter, quicklistEntry *entry,
}
}
if (unlikely(isLargeElement(sz))) {
if (unlikely(isLargeElement(sz, quicklist->fill))) {
if (QL_NODE_IS_PLAIN(node) || (at_tail && after) || (at_head && !after)) {
__quicklistInsertPlainNode(quicklist, node, value, sz, after);
} else {
@ -2107,20 +2120,23 @@ int quicklistTest(int argc, char *argv[], int flags) {
}
TEST("Comprassion Plain node") {
char buf[256];
quicklistisSetPackedThreshold(1);
quicklist *ql = quicklistNew(-2, 1);
for (int f = 0; f < fill_count; f++) {
size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1;
char buf[large_limit];
quicklist *ql = quicklistNew(fills[f], 1);
for (int i = 0; i < 500; i++) {
/* Set to 256 to allow the node to be triggered to compress,
* if it is less than 48(nocompress), the test will be successful. */
snprintf(buf, sizeof(buf), "hello%d", i);
quicklistPushHead(ql, buf, 256);
quicklistPushHead(ql, buf, large_limit);
}
quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
quicklistEntry entry;
int i = 0;
while (quicklistNext(iter, &entry)) {
assert(QL_NODE_IS_PLAIN(entry.node));
snprintf(buf, sizeof(buf), "hello%d", i);
if (strcmp((char *)entry.value, buf))
ERR("value [%s] didn't match [%s] at position %d",
@ -2130,42 +2146,57 @@ int quicklistTest(int argc, char *argv[], int flags) {
ql_release_iterator(iter);
quicklistRelease(ql);
}
}
TEST("NEXT plain node")
{
packed_threshold = 3;
quicklist *ql = quicklistNew(-2, options[_i]);
char *strings[] = {"hello1", "hello2", "h3", "h4", "hello5"};
TEST("NEXT plain node") {
for (int f = 0; f < fill_count; f++) {
size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1;
quicklist *ql = quicklistNew(fills[f], options[_i]);
for (int i = 0; i < 5; ++i)
quicklistPushHead(ql, strings[i], strlen(strings[i]));
char buf[large_limit];
memcpy(buf, "plain", 5);
quicklistPushHead(ql, buf, large_limit);
quicklistPushHead(ql, buf, large_limit);
quicklistPushHead(ql, "packed3", 7);
quicklistPushHead(ql, "packed4", 7);
quicklistPushHead(ql, buf, large_limit);
quicklistEntry entry;
quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL);
int j = 0;
while(quicklistNext(iter, &entry) != 0) {
assert(strncmp(strings[j], (char *)entry.value, strlen(strings[j])) == 0);
j++;
if (QL_NODE_IS_PLAIN(entry.node))
assert(!memcmp(entry.value, "plain", 5));
else
assert(!memcmp(entry.value, "packed", 6));
}
ql_release_iterator(iter);
quicklistRelease(ql);
}
}
TEST("rotate plain node ") {
for (int f = 0; f < fill_count; f++) {
size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1;
unsigned char *data = NULL;
size_t sz;
long long lv;
int i =0;
packed_threshold = 5;
quicklist *ql = quicklistNew(-2, options[_i]);
quicklistPushHead(ql, "hello1", 6);
quicklistPushHead(ql, "hello4", 6);
quicklistPushHead(ql, "hello3", 6);
quicklistPushHead(ql, "hello2", 6);
quicklist *ql = quicklistNew(fills[f], options[_i]);
char buf[large_limit];
memcpy(buf, "hello1", 6);
quicklistPushHead(ql, buf, large_limit);
memcpy(buf, "hello4", 6);
quicklistPushHead(ql, buf, large_limit);
memcpy(buf, "hello3", 6);
quicklistPushHead(ql, buf, large_limit);
memcpy(buf, "hello2", 6);
quicklistPushHead(ql, buf, large_limit);
quicklistRotate(ql);
for(i = 1 ; i < 5; i++) {
assert(QL_NODE_IS_PLAIN(ql->tail));
quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv);
int temp_char = data[5];
zfree(data);
@ -2174,7 +2205,7 @@ int quicklistTest(int argc, char *argv[], int flags) {
ql_verify(ql, 0, 0, 0, 0);
quicklistRelease(ql);
packed_threshold = (1 << 30);
}
}
TEST("rotate one val once") {

View File

@ -202,7 +202,7 @@ int quicklistBookmarkCreate(quicklist **ql_ref, const char *name, quicklistNode
int quicklistBookmarkDelete(quicklist *ql, const char *name);
quicklistNode *quicklistBookmarkFind(quicklist *ql, const char *name);
void quicklistBookmarksClear(quicklist *ql);
int quicklistisSetPackedThreshold(size_t sz);
int quicklistSetPackedThreshold(size_t sz);
#ifdef REDIS_TEST
int quicklistTest(int argc, char *argv[], int flags);

View File

@ -1,95 +1,54 @@
# check functionality compression of plain and zipped nodes
# check functionality compression of plain and packed nodes
start_server [list overrides [list save ""] ] {
r config set list-compress-depth 2
r config set list-max-ziplist-size 1
# 3 test to check compression with regular ziplist nodes
# 3 test to check compression with plain and packed nodes
# 1. using push + insert
# 2. using push + insert + trim
# 3. using push + insert + set
test {reg node check compression with insert and pop} {
r lpush list1 [string repeat a 500]
r lpush list1 [string repeat b 500]
r lpush list1 [string repeat c 500]
r lpush list1 [string repeat d 500]
r linsert list1 after [string repeat d 500] [string repeat e 500]
r linsert list1 after [string repeat d 500] [string repeat f 500]
r linsert list1 after [string repeat d 500] [string repeat g 500]
r linsert list1 after [string repeat d 500] [string repeat j 500]
assert_equal [r lpop list1] [string repeat d 500]
assert_equal [r lpop list1] [string repeat j 500]
assert_equal [r lpop list1] [string repeat g 500]
assert_equal [r lpop list1] [string repeat f 500]
assert_equal [r lpop list1] [string repeat e 500]
assert_equal [r lpop list1] [string repeat c 500]
assert_equal [r lpop list1] [string repeat b 500]
assert_equal [r lpop list1] [string repeat a 500]
foreach {container size} {packed 500 plain 8193} {
test "$container node check compression with insert and pop" {
r flushdb
r lpush list1 [string repeat a $size]
r lpush list1 [string repeat b $size]
r lpush list1 [string repeat c $size]
r lpush list1 [string repeat d $size]
r linsert list1 after [string repeat d $size] [string repeat e $size]
r linsert list1 after [string repeat d $size] [string repeat f $size]
r linsert list1 after [string repeat d $size] [string repeat g $size]
r linsert list1 after [string repeat d $size] [string repeat j $size]
assert_equal [r lpop list1] [string repeat d $size]
assert_equal [r lpop list1] [string repeat j $size]
assert_equal [r lpop list1] [string repeat g $size]
assert_equal [r lpop list1] [string repeat f $size]
assert_equal [r lpop list1] [string repeat e $size]
assert_equal [r lpop list1] [string repeat c $size]
assert_equal [r lpop list1] [string repeat b $size]
assert_equal [r lpop list1] [string repeat a $size]
};
test {reg node check compression combined with trim} {
r lpush list2 [string repeat a 500]
r linsert list2 after [string repeat a 500] [string repeat b 500]
r rpush list2 [string repeat c 500]
assert_equal [string repeat b 500] [r lindex list2 1]
test "$container node check compression combined with trim" {
r flushdb
r lpush list2 [string repeat a $size]
r linsert list2 after [string repeat a $size] [string repeat b $size]
r rpush list2 [string repeat c $size]
assert_equal [string repeat b $size] [r lindex list2 1]
r LTRIM list2 1 -1
r llen list2
} {2}
test {reg node check compression with lset} {
r lpush list3 [string repeat a 500]
r LSET list3 0 [string repeat b 500]
assert_equal [string repeat b 500] [r lindex list3 0]
r lpush list3 [string repeat c 500]
r LSET list3 0 [string repeat d 500]
assert_equal [string repeat d 500] [r lindex list3 0]
test "$container node check compression with lset" {
r flushdb
r lpush list3 [string repeat a $size]
r LSET list3 0 [string repeat b $size]
assert_equal [string repeat b $size] [r lindex list3 0]
r lpush list3 [string repeat c $size]
r LSET list3 0 [string repeat d $size]
assert_equal [string repeat d $size] [r lindex list3 0]
}
# repeating the 3 tests with plain nodes
# (by adjusting quicklist-packed-threshold)
test {plain node check compression} {
r debug quicklist-packed-threshold 1b
r lpush list4 [string repeat a 500]
r lpush list4 [string repeat b 500]
r lpush list4 [string repeat c 500]
r lpush list4 [string repeat d 500]
r linsert list4 after [string repeat d 500] [string repeat e 500]
r linsert list4 after [string repeat d 500] [string repeat f 500]
r linsert list4 after [string repeat d 500] [string repeat g 500]
r linsert list4 after [string repeat d 500] [string repeat j 500]
assert_equal [r lpop list4] [string repeat d 500]
assert_equal [r lpop list4] [string repeat j 500]
assert_equal [r lpop list4] [string repeat g 500]
assert_equal [r lpop list4] [string repeat f 500]
assert_equal [r lpop list4] [string repeat e 500]
assert_equal [r lpop list4] [string repeat c 500]
assert_equal [r lpop list4] [string repeat b 500]
assert_equal [r lpop list4] [string repeat a 500]
r debug quicklist-packed-threshold 0
} {OK} {needs:debug}
test {plain node check compression with ltrim} {
r debug quicklist-packed-threshold 1b
r lpush list5 [string repeat a 500]
r linsert list5 after [string repeat a 500] [string repeat b 500]
r rpush list5 [string repeat c 500]
assert_equal [string repeat b 500] [r lindex list5 1]
r LTRIM list5 1 -1
assert_equal [r llen list5] 2
r debug quicklist-packed-threshold 0
} {OK} {needs:debug}
test {plain node check compression using lset} {
r debug quicklist-packed-threshold 1b
r lpush list6 [string repeat a 500]
r LSET list6 0 [string repeat b 500]
assert_equal [string repeat b 500] [r lindex list6 0]
r lpush list6 [string repeat c 500]
r LSET list6 0 [string repeat d 500]
assert_equal [string repeat d 500] [r lindex list6 0]
r debug quicklist-packed-threshold 0
} {OK} {needs:debug}
} ;# foreach
# revert config for external mode tests.
r config set list-compress-depth 0
@ -97,6 +56,13 @@ start_server [list overrides [list save ""] ] {
# check functionality of plain nodes using low packed-threshold
start_server [list overrides [list save ""] ] {
foreach type {listpack quicklist} {
if {$type eq "listpack"} {
r config set list-max-listpack-size -2
} else {
r config set list-max-listpack-size 1
}
# basic command check for plain nodes - "LPUSH & LPOP"
test {Test LPUSH and LPOP on plain nodes} {
r flushdb
@ -104,6 +70,7 @@ start_server [list overrides [list save ""] ] {
r lpush lst 9
r lpush lst xxxxxxxxxx
r lpush lst xxxxxxxxxx
assert_encoding $type lst
set s0 [s used_memory]
assert {$s0 > 10}
assert {[r llen lst] == 3}
@ -128,6 +95,7 @@ start_server [list overrides [list save ""] ] {
r lpush lst xxxxxxxxxxx
r lpush lst 9
r lpush lst xxxxxxxxxxx
assert_encoding $type lst
r linsert lst before "9" "8"
assert {[r lindex lst 1] eq "8"}
r linsert lst BEFORE "9" "7"
@ -143,6 +111,7 @@ start_server [list overrides [list save ""] ] {
r lpush lst1 9
r lpush lst1 xxxxxxxxxxx
r lpush lst1 9
assert_encoding $type lst1
r LTRIM lst1 1 -1
assert_equal [r llen lst1] 2
r debug quicklist-packed-threshold 0
@ -154,6 +123,7 @@ start_server [list overrides [list save ""] ] {
r debug quicklist-packed-threshold 1b
r lpush lst one
r lpush lst xxxxxxxxxxx
assert_encoding $type lst
set s0 [s used_memory]
assert {$s0 > 10}
r lpush lst 9
@ -169,6 +139,7 @@ start_server [list overrides [list save ""] ] {
r RPUSH lst "aa"
r RPUSH lst "bb"
r RPUSH lst "cc"
assert_encoding $type lst
r LSET lst 0 "xxxxxxxxxxx"
assert_equal [r LPOS lst "xxxxxxxxxxx"] 0
r debug quicklist-packed-threshold 0
@ -180,6 +151,7 @@ start_server [list overrides [list save ""] ] {
r debug quicklist-packed-threshold 1b
r RPUSH lst2{t} "aa"
r RPUSH lst2{t} "bb"
assert_encoding $type lst2{t}
r LSET lst2{t} 0 xxxxxxxxxxx
r RPUSH lst2{t} "cc"
r RPUSH lst2{t} "dd"
@ -200,6 +172,7 @@ start_server [list overrides [list save ""] ] {
r debug quicklist-packed-threshold 5b
r RPUSH lst "aa"
r RPUSH lst "bb"
assert_encoding $type lst
r lset lst 0 [string repeat d 50001]
set s1 [r lpop lst]
assert_equal $s1 [string repeat d 50001]
@ -281,6 +254,7 @@ start_server [list overrides [list save ""] ] {
# Insert two elements and keep them in the same node
r RPUSH lst $small_ele
r RPUSH lst $small_ele
assert_encoding $type lst
# When setting the position of -1 to a large element, we first insert
# a large element at the end and then delete its previous element.
@ -297,6 +271,7 @@ start_server [list overrides [list save ""] ] {
r LPUSH lst "aa"
r LPUSH lst "bb"
assert_encoding $type lst
r LSET lst -2 [string repeat x 10]
r RPOP lst
assert_equal [string repeat x 10] [r LRANGE lst 0 -1]
@ -304,6 +279,7 @@ start_server [list overrides [list save ""] ] {
r debug quicklist-packed-threshold 0
} {OK} {needs:debug}
}
}
run_solo {list-large-memory} {
start_server [list overrides [list save ""] ] {