Change lzf to handle values larger than UINT32_MAX (#9776)
Redis supports inserting data over 4GB into string (and recently for lists too, see #9357), But LZF compression used in RDB files (see `rdbcompression` config), and in quicklist (see `list-compress-depth` config) does not support compress/decompress data over UINT32_MAX, which will result in corrupting the rdb after compression. Internal changes: 1. Modify the `unsigned int` parameter of `lzf_compress/lzf_decompress` to `size_t`. 2. Modify the variable types in `lzf_compress` involving offsets and lengths to `size_t`. 3. Set LZF_USE_OFFSETS to 0. When LZF_USE_OFFSETS is 1, lzf store offset into `LZF_HSLOT`(32bit). Even in 64-bit, `LZF_USE_OFFSETS` defaults to 1, because lzf assumes that it only compresses and decompresses data smaller than UINT32_MAX. But now we need to make lzf support 64-bit, turning on `LZF_USE_OFFSETS` will make it impossible to store 64-bit offsets or pointers. BTW, disable LZF_USE_OFFSETS also brings a few performance improvements. Tests: 1. Add test for compress/decompress string large than UINT32_MAX. 2. Add unittest for compress/decompress quicklistNode.
This commit is contained in:
parent
e725d737fb
commit
985430b4fc
12
src/lzf.h
12
src/lzf.h
@ -73,9 +73,9 @@
|
||||
* and lzf_c.c.
|
||||
*
|
||||
*/
|
||||
unsigned int
|
||||
lzf_compress (const void *const in_data, unsigned int in_len,
|
||||
void *out_data, unsigned int out_len);
|
||||
size_t
|
||||
lzf_compress (const void *const in_data, size_t in_len,
|
||||
void *out_data, size_t out_len);
|
||||
|
||||
/*
|
||||
* Decompress data compressed with some version of the lzf_compress
|
||||
@ -92,9 +92,9 @@ lzf_compress (const void *const in_data, unsigned int in_len,
|
||||
*
|
||||
* This function is very fast, about as fast as a copying loop.
|
||||
*/
|
||||
unsigned int
|
||||
lzf_decompress (const void *const in_data, unsigned int in_len,
|
||||
void *out_data, unsigned int out_len);
|
||||
size_t
|
||||
lzf_decompress (const void *const in_data, size_t in_len,
|
||||
void *out_data, size_t out_len);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -129,8 +129,9 @@
|
||||
* Whether to store pointers or offsets inside the hash table. On
|
||||
* 64 bit architectures, pointers take up twice as much space,
|
||||
* and might also be slower. Default is to autodetect.
|
||||
*/
|
||||
/*#define LZF_USER_OFFSETS autodetect */
|
||||
* Notice: Don't set this value to 1, it will result in 'LZF_HSLOT'
|
||||
* not being able to store offset above UINT32_MAX in 64bit. */
|
||||
#define LZF_USE_OFFSETS 0
|
||||
|
||||
/*****************************************************************************/
|
||||
/* nothing should be changed below */
|
||||
|
13
src/lzf_c.c
13
src/lzf_c.c
@ -105,9 +105,9 @@
|
||||
*
|
||||
*/
|
||||
NO_SANITIZE("alignment")
|
||||
unsigned int
|
||||
lzf_compress (const void *const in_data, unsigned int in_len,
|
||||
void *out_data, unsigned int out_len
|
||||
size_t
|
||||
lzf_compress (const void *const in_data, size_t in_len,
|
||||
void *out_data, size_t out_len
|
||||
#if LZF_STATE_ARG
|
||||
, LZF_STATE htab
|
||||
#endif
|
||||
@ -132,7 +132,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
|
||||
#if defined (WIN32) && defined (_M_X64)
|
||||
unsigned _int64 off; /* workaround for missing POSIX compliance */
|
||||
#else
|
||||
unsigned long off;
|
||||
size_t off;
|
||||
#endif
|
||||
unsigned int hval;
|
||||
int lit;
|
||||
@ -153,7 +153,8 @@ lzf_compress (const void *const in_data, unsigned int in_len,
|
||||
|
||||
hval = NEXT (hval, ip);
|
||||
hslot = htab + IDX (hval);
|
||||
ref = *hslot + LZF_HSLOT_BIAS; *hslot = ip - LZF_HSLOT_BIAS;
|
||||
ref = *hslot ? (*hslot + LZF_HSLOT_BIAS) : NULL; /* avoid applying zero offset to null pointer */
|
||||
*hslot = ip - LZF_HSLOT_BIAS;
|
||||
|
||||
if (1
|
||||
#if INIT_HTAB
|
||||
@ -171,7 +172,7 @@ lzf_compress (const void *const in_data, unsigned int in_len,
|
||||
{
|
||||
/* match found at *ref++ */
|
||||
unsigned int len = 2;
|
||||
unsigned int maxlen = in_end - ip - len;
|
||||
size_t maxlen = in_end - ip - len;
|
||||
maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
|
||||
|
||||
if (expect_false (op + 3 + 1 >= out_end)) /* first a faster conservative test */
|
||||
|
@ -56,9 +56,9 @@
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
|
||||
#endif
|
||||
unsigned int
|
||||
lzf_decompress (const void *const in_data, unsigned int in_len,
|
||||
void *out_data, unsigned int out_len)
|
||||
size_t
|
||||
lzf_decompress (const void *const in_data, size_t in_len,
|
||||
void *out_data, size_t out_len)
|
||||
{
|
||||
u8 const *ip = (const u8 *)in_data;
|
||||
u8 *op = (u8 *)out_data;
|
||||
|
@ -1721,6 +1721,7 @@ void quicklistBookmarksClear(quicklist *ql) {
|
||||
#include <stdint.h>
|
||||
#include <sys/time.h>
|
||||
#include "testhelp.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
#define yell(str, ...) printf("ERROR! " str "\n\n", __VA_ARGS__)
|
||||
|
||||
@ -1902,6 +1903,30 @@ static char *genstr(char *prefix, int i) {
|
||||
return result;
|
||||
}
|
||||
|
||||
static void randstring(unsigned char *target, size_t sz) {
|
||||
size_t p = 0;
|
||||
int minval, maxval;
|
||||
switch(rand() % 3) {
|
||||
case 0:
|
||||
minval = 'a';
|
||||
maxval = 'z';
|
||||
break;
|
||||
case 1:
|
||||
minval = '0';
|
||||
maxval = '9';
|
||||
break;
|
||||
case 2:
|
||||
minval = 'A';
|
||||
maxval = 'Z';
|
||||
break;
|
||||
default:
|
||||
assert(NULL);
|
||||
}
|
||||
|
||||
while(p < sz)
|
||||
target[p++] = minval+rand()%(maxval-minval+1);
|
||||
}
|
||||
|
||||
/* main test, but callable from other files */
|
||||
int quicklistTest(int argc, char *argv[], int flags) {
|
||||
UNUSED(argc);
|
||||
@ -3125,6 +3150,57 @@ int quicklistTest(int argc, char *argv[], int flags) {
|
||||
quicklistRelease(ql);
|
||||
}
|
||||
|
||||
if (flags & REDIS_TEST_LARGE_MEMORY) {
|
||||
TEST("compress and decompress quicklist ziplist node") {
|
||||
quicklistNode *node = quicklistCreateNode();
|
||||
node->entry = ziplistNew();
|
||||
|
||||
/* Create a rand string */
|
||||
size_t sz = (1 << 25); /* 32MB per one entry */
|
||||
unsigned char *s = zmalloc(sz);
|
||||
randstring(s, sz);
|
||||
|
||||
/* Keep filling the node, until it reaches 1GB */
|
||||
for (int i = 0; i < 32; i++) {
|
||||
node->entry = ziplistPush(node->entry, s, sz, ZIPLIST_TAIL);
|
||||
quicklistNodeUpdateSz(node);
|
||||
|
||||
long long start = mstime();
|
||||
assert(__quicklistCompressNode(node));
|
||||
assert(__quicklistDecompressNode(node));
|
||||
printf("Compress and decompress: %zu MB in %.2f seconds.\n",
|
||||
node->sz/1024/1024, (float)(mstime() - start) / 1000);
|
||||
}
|
||||
|
||||
zfree(s);
|
||||
zfree(node->entry);
|
||||
zfree(node);
|
||||
}
|
||||
|
||||
#if ULONG_MAX >= 0xffffffffffffffff
|
||||
TEST("compress and decomress quicklist plain node large than UINT32_MAX") {
|
||||
size_t sz = (1ull << 32);
|
||||
unsigned char *s = zmalloc(sz);
|
||||
randstring(s, sz);
|
||||
memcpy(s, "helloworld", 10);
|
||||
memcpy(s + sz - 10, "1234567890", 10);
|
||||
|
||||
quicklistNode *node = __quicklistCreatePlainNode(s, sz);
|
||||
|
||||
long long start = mstime();
|
||||
assert(__quicklistCompressNode(node));
|
||||
assert(__quicklistDecompressNode(node));
|
||||
printf("Compress and decompress: %zu MB in %.2f seconds.\n",
|
||||
node->sz/1024/1024, (float)(mstime() - start) / 1000);
|
||||
|
||||
assert(memcmp(node->entry, "helloworld", 10) == 0);
|
||||
assert(memcmp(node->entry + sz - 10, "1234567890", 10) == 0);
|
||||
zfree(node->entry);
|
||||
zfree(node);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (!err)
|
||||
printf("ALL TESTS PASSED!\n");
|
||||
else
|
||||
|
@ -566,4 +566,18 @@ start_server {tags {"bitops"}} {
|
||||
r config set proto-max-bulk-len $oldval
|
||||
r del mykey
|
||||
} {1} {large-memory}
|
||||
|
||||
test "SETBIT values larger than UINT32_MAX and lzf_compress/lzf_decompress correctly" {
|
||||
set bytes [expr (1 << 32) + 1]
|
||||
set bitpos [expr (1 << 35)]
|
||||
set oldval [lindex [r config get proto-max-bulk-len] 1]
|
||||
r config set proto-max-bulk-len $bytes
|
||||
r setbit mykey $bitpos 1
|
||||
assert_equal $bytes [r strlen mykey]
|
||||
assert_equal 1 [r getbit mykey $bitpos]
|
||||
r debug reload ;# lzf_compress/lzf_decompress when RDB saving/loading.
|
||||
assert_equal 1 [r getbit mykey $bitpos]
|
||||
r config set proto-max-bulk-len $oldval
|
||||
r del mykey
|
||||
} {1} {large-memory needs:debug}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user