From 985430b4fca5ac55b121a98ac0407909c6767530 Mon Sep 17 00:00:00 2001 From: sundb Date: Tue, 16 Nov 2021 19:12:25 +0800 Subject: [PATCH] Change lzf to handle values larger than UINT32_MAX (#9776) Redis supports inserting data over 4GB into string (and recently for lists too, see #9357), But LZF compression used in RDB files (see `rdbcompression` config), and in quicklist (see `list-compress-depth` config) does not support compress/decompress data over UINT32_MAX, which will result in corrupting the rdb after compression. Internal changes: 1. Modify the `unsigned int` parameter of `lzf_compress/lzf_decompress` to `size_t`. 2. Modify the variable types in `lzf_compress` involving offsets and lengths to `size_t`. 3. Set LZF_USE_OFFSETS to 0. When LZF_USE_OFFSETS is 1, lzf store offset into `LZF_HSLOT`(32bit). Even in 64-bit, `LZF_USE_OFFSETS` defaults to 1, because lzf assumes that it only compresses and decompresses data smaller than UINT32_MAX. But now we need to make lzf support 64-bit, turning on `LZF_USE_OFFSETS` will make it impossible to store 64-bit offsets or pointers. BTW, disable LZF_USE_OFFSETS also brings a few performance improvements. Tests: 1. Add test for compress/decompress string large than UINT32_MAX. 2. Add unittest for compress/decompress quicklistNode. --- src/lzf.h | 12 +++---- src/lzfP.h | 5 +-- src/lzf_c.c | 13 ++++---- src/lzf_d.c | 6 ++-- src/quicklist.c | 76 +++++++++++++++++++++++++++++++++++++++++++ tests/unit/bitops.tcl | 14 ++++++++ 6 files changed, 109 insertions(+), 17 deletions(-) diff --git a/src/lzf.h b/src/lzf.h index 98e038f31..45ddfa8da 100644 --- a/src/lzf.h +++ b/src/lzf.h @@ -73,9 +73,9 @@ * and lzf_c.c. * */ -unsigned int -lzf_compress (const void *const in_data, unsigned int in_len, - void *out_data, unsigned int out_len); +size_t +lzf_compress (const void *const in_data, size_t in_len, + void *out_data, size_t out_len); /* * Decompress data compressed with some version of the lzf_compress @@ -92,9 +92,9 @@ lzf_compress (const void *const in_data, unsigned int in_len, * * This function is very fast, about as fast as a copying loop. */ -unsigned int -lzf_decompress (const void *const in_data, unsigned int in_len, - void *out_data, unsigned int out_len); +size_t +lzf_decompress (const void *const in_data, size_t in_len, + void *out_data, size_t out_len); #endif diff --git a/src/lzfP.h b/src/lzfP.h index 78c858fad..567f5a201 100644 --- a/src/lzfP.h +++ b/src/lzfP.h @@ -129,8 +129,9 @@ * Whether to store pointers or offsets inside the hash table. On * 64 bit architectures, pointers take up twice as much space, * and might also be slower. Default is to autodetect. - */ -/*#define LZF_USER_OFFSETS autodetect */ + * Notice: Don't set this value to 1, it will result in 'LZF_HSLOT' + * not being able to store offset above UINT32_MAX in 64bit. */ +#define LZF_USE_OFFSETS 0 /*****************************************************************************/ /* nothing should be changed below */ diff --git a/src/lzf_c.c b/src/lzf_c.c index 6c117dab1..7cbbc82fa 100644 --- a/src/lzf_c.c +++ b/src/lzf_c.c @@ -105,9 +105,9 @@ * */ NO_SANITIZE("alignment") -unsigned int -lzf_compress (const void *const in_data, unsigned int in_len, - void *out_data, unsigned int out_len +size_t +lzf_compress (const void *const in_data, size_t in_len, + void *out_data, size_t out_len #if LZF_STATE_ARG , LZF_STATE htab #endif @@ -132,7 +132,7 @@ lzf_compress (const void *const in_data, unsigned int in_len, #if defined (WIN32) && defined (_M_X64) unsigned _int64 off; /* workaround for missing POSIX compliance */ #else - unsigned long off; + size_t off; #endif unsigned int hval; int lit; @@ -153,7 +153,8 @@ lzf_compress (const void *const in_data, unsigned int in_len, hval = NEXT (hval, ip); hslot = htab + IDX (hval); - ref = *hslot + LZF_HSLOT_BIAS; *hslot = ip - LZF_HSLOT_BIAS; + ref = *hslot ? (*hslot + LZF_HSLOT_BIAS) : NULL; /* avoid applying zero offset to null pointer */ + *hslot = ip - LZF_HSLOT_BIAS; if (1 #if INIT_HTAB @@ -171,7 +172,7 @@ lzf_compress (const void *const in_data, unsigned int in_len, { /* match found at *ref++ */ unsigned int len = 2; - unsigned int maxlen = in_end - ip - len; + size_t maxlen = in_end - ip - len; maxlen = maxlen > MAX_REF ? MAX_REF : maxlen; if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */ diff --git a/src/lzf_d.c b/src/lzf_d.c index dbf1ae772..ff32be892 100644 --- a/src/lzf_d.c +++ b/src/lzf_d.c @@ -56,9 +56,9 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" #endif -unsigned int -lzf_decompress (const void *const in_data, unsigned int in_len, - void *out_data, unsigned int out_len) +size_t +lzf_decompress (const void *const in_data, size_t in_len, + void *out_data, size_t out_len) { u8 const *ip = (const u8 *)in_data; u8 *op = (u8 *)out_data; diff --git a/src/quicklist.c b/src/quicklist.c index b1051bd9b..22bd9b516 100644 --- a/src/quicklist.c +++ b/src/quicklist.c @@ -1721,6 +1721,7 @@ void quicklistBookmarksClear(quicklist *ql) { #include #include #include "testhelp.h" +#include #define yell(str, ...) printf("ERROR! " str "\n\n", __VA_ARGS__) @@ -1902,6 +1903,30 @@ static char *genstr(char *prefix, int i) { return result; } +static void randstring(unsigned char *target, size_t sz) { + size_t p = 0; + int minval, maxval; + switch(rand() % 3) { + case 0: + minval = 'a'; + maxval = 'z'; + break; + case 1: + minval = '0'; + maxval = '9'; + break; + case 2: + minval = 'A'; + maxval = 'Z'; + break; + default: + assert(NULL); + } + + while(p < sz) + target[p++] = minval+rand()%(maxval-minval+1); +} + /* main test, but callable from other files */ int quicklistTest(int argc, char *argv[], int flags) { UNUSED(argc); @@ -3125,6 +3150,57 @@ int quicklistTest(int argc, char *argv[], int flags) { quicklistRelease(ql); } + if (flags & REDIS_TEST_LARGE_MEMORY) { + TEST("compress and decompress quicklist ziplist node") { + quicklistNode *node = quicklistCreateNode(); + node->entry = ziplistNew(); + + /* Create a rand string */ + size_t sz = (1 << 25); /* 32MB per one entry */ + unsigned char *s = zmalloc(sz); + randstring(s, sz); + + /* Keep filling the node, until it reaches 1GB */ + for (int i = 0; i < 32; i++) { + node->entry = ziplistPush(node->entry, s, sz, ZIPLIST_TAIL); + quicklistNodeUpdateSz(node); + + long long start = mstime(); + assert(__quicklistCompressNode(node)); + assert(__quicklistDecompressNode(node)); + printf("Compress and decompress: %zu MB in %.2f seconds.\n", + node->sz/1024/1024, (float)(mstime() - start) / 1000); + } + + zfree(s); + zfree(node->entry); + zfree(node); + } + +#if ULONG_MAX >= 0xffffffffffffffff + TEST("compress and decomress quicklist plain node large than UINT32_MAX") { + size_t sz = (1ull << 32); + unsigned char *s = zmalloc(sz); + randstring(s, sz); + memcpy(s, "helloworld", 10); + memcpy(s + sz - 10, "1234567890", 10); + + quicklistNode *node = __quicklistCreatePlainNode(s, sz); + + long long start = mstime(); + assert(__quicklistCompressNode(node)); + assert(__quicklistDecompressNode(node)); + printf("Compress and decompress: %zu MB in %.2f seconds.\n", + node->sz/1024/1024, (float)(mstime() - start) / 1000); + + assert(memcmp(node->entry, "helloworld", 10) == 0); + assert(memcmp(node->entry + sz - 10, "1234567890", 10) == 0); + zfree(node->entry); + zfree(node); + } +#endif + } + if (!err) printf("ALL TESTS PASSED!\n"); else diff --git a/tests/unit/bitops.tcl b/tests/unit/bitops.tcl index 1a3c3a54e..89431c81f 100644 --- a/tests/unit/bitops.tcl +++ b/tests/unit/bitops.tcl @@ -566,4 +566,18 @@ start_server {tags {"bitops"}} { r config set proto-max-bulk-len $oldval r del mykey } {1} {large-memory} + + test "SETBIT values larger than UINT32_MAX and lzf_compress/lzf_decompress correctly" { + set bytes [expr (1 << 32) + 1] + set bitpos [expr (1 << 35)] + set oldval [lindex [r config get proto-max-bulk-len] 1] + r config set proto-max-bulk-len $bytes + r setbit mykey $bitpos 1 + assert_equal $bytes [r strlen mykey] + assert_equal 1 [r getbit mykey $bitpos] + r debug reload ;# lzf_compress/lzf_decompress when RDB saving/loading. + assert_equal 1 [r getbit mykey $bitpos] + r config set proto-max-bulk-len $oldval + r del mykey + } {1} {large-memory needs:debug} }