diff --git a/src/listpack.c b/src/listpack.c index f36d9d64d..8b3209be2 100644 --- a/src/listpack.c +++ b/src/listpack.c @@ -1221,6 +1221,15 @@ size_t lpBytes(unsigned char *lp) { return lpGetTotalBytes(lp); } +/* Returns the size of a listpack consisting of an integer repeated 'rep' times. */ +size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep) { + uint64_t enclen; + unsigned char intenc[LP_MAX_INT_ENCODING_LEN]; + lpEncodeIntegerGetType(lval, intenc, &enclen); + unsigned long backlen = lpEncodeBacklen(NULL, enclen); + return LP_HDR_SIZE + (enclen + backlen) * rep + 1; +} + /* Seek the specified element and returns the pointer to the seeked element. * Positive indexes specify the zero-based element to seek from the head to * the tail, negative indexes specify elements starting from the tail, where diff --git a/src/listpack.h b/src/listpack.h index 304ccd32c..a60f089f9 100644 --- a/src/listpack.h +++ b/src/listpack.h @@ -82,6 +82,7 @@ unsigned char *lpLast(unsigned char *lp); unsigned char *lpNext(unsigned char *lp, unsigned char *p); unsigned char *lpPrev(unsigned char *lp, unsigned char *p); size_t lpBytes(unsigned char *lp); +size_t lpEstimateBytesRepeatedInteger(long long lval, unsigned long rep); unsigned char *lpSeek(unsigned char *lp, long index); typedef int (*listpackValidateEntryCB)(unsigned char *p, unsigned int head_count, void *userdata); int lpValidateIntegrity(unsigned char *lp, size_t size, int deep, diff --git a/src/rdb.c b/src/rdb.c index 733181ac6..8a99d4aa2 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1875,14 +1875,11 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { * of many small ones. It's OK since lpSafeToAdd doesn't * care about individual elements, only the total size. */ setTypeConvert(o, OBJ_ENCODING_LISTPACK); - } else { - setTypeConvert(o,OBJ_ENCODING_HT); - if (dictTryExpand(o->ptr,len) != DICT_OK) { - rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len); - sdsfree(sdsele); - decrRefCount(o); - return NULL; - } + } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HT, len, 0) != C_OK) { + rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len); + sdsfree(sdsele); + decrRefCount(o); + return NULL; } } @@ -1901,15 +1898,12 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { return NULL; } o->ptr = lpAppend(o->ptr, (unsigned char *)sdsele, elelen); - } else { - setTypeConvert(o, OBJ_ENCODING_HT); - if (dictTryExpand(o->ptr, len) != DICT_OK) { - rdbReportCorruptRDB("OOM in dictTryExpand %llu", - (unsigned long long)len); - sdsfree(sdsele); - decrRefCount(o); - return NULL; - } + } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HT, len, 0) != C_OK) { + rdbReportCorruptRDB("OOM in dictTryExpand %llu", + (unsigned long long)len); + sdsfree(sdsele); + decrRefCount(o); + return NULL; } } diff --git a/src/server.h b/src/server.h index 105b2db4e..1ce5ff781 100644 --- a/src/server.h +++ b/src/server.h @@ -3022,6 +3022,7 @@ int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele); unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set); unsigned long setTypeSize(const robj *subject); void setTypeConvert(robj *subject, int enc); +int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic); robj *setTypeDup(robj *o); /* Hash data type */ diff --git a/src/t_set.c b/src/t_set.c index 197cabcd9..ae5f26540 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -159,7 +159,7 @@ int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sd set->ptr = lp; } else { /* Size limit is reached. Convert to hashtable and add. */ - setTypeConvert(set, OBJ_ENCODING_HT); + setTypeConvertAndExpand(set, OBJ_ENCODING_HT, lpLength(lp) + 1, 1); serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK); } return 1; @@ -174,23 +174,34 @@ int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sd return 1; } } else { - size_t maxelelen = intsetLen(set->ptr) == 0 ? - 0 : max(sdigits10(intsetMax(set->ptr)), - sdigits10(intsetMin(set->ptr))); + /* Check if listpack encoding is safe not to cross any threshold. */ + size_t maxelelen = 0, totsize = 0; + unsigned long n = intsetLen(set->ptr); + if (n != 0) { + size_t elelen1 = sdigits10(intsetMax(set->ptr)); + size_t elelen2 = sdigits10(intsetMin(set->ptr)); + maxelelen = max(elelen1, elelen2); + size_t s1 = lpEstimateBytesRepeatedInteger(intsetMax(set->ptr), n); + size_t s2 = lpEstimateBytesRepeatedInteger(intsetMin(set->ptr), n); + totsize = max(s1, s2); + } if (intsetLen((const intset*)set->ptr) < server.set_max_listpack_entries && len <= server.set_max_listpack_value && maxelelen <= server.set_max_listpack_value && - lpSafeToAdd(NULL, maxelelen * intsetLen(set->ptr) + len)) + lpSafeToAdd(NULL, totsize + len)) { /* In the "safe to add" check above we assumed all elements in * the intset are of size maxelelen. This is an upper bound. */ - setTypeConvert(set, OBJ_ENCODING_LISTPACK); + setTypeConvertAndExpand(set, OBJ_ENCODING_LISTPACK, + intsetLen(set->ptr) + 1, 1); unsigned char *lp = set->ptr; lp = lpAppend(lp, (unsigned char *)str, len); + lp = lpShrinkToFit(lp); set->ptr = lp; return 1; } else { - setTypeConvert(set, OBJ_ENCODING_HT); + setTypeConvertAndExpand(set, OBJ_ENCODING_HT, + intsetLen(set->ptr) + 1, 1); /* The set *was* an intset and this value is not integer * encodable, so dictAdd should always work. */ serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK); @@ -468,6 +479,14 @@ unsigned long setTypeSize(const robj *subject) { * to a hash table) is presized to hold the number of elements in the original * set. */ void setTypeConvert(robj *setobj, int enc) { + setTypeConvertAndExpand(setobj, enc, setTypeSize(setobj), 1); +} + +/* Converts a set to the specified encoding, pre-sizing it for 'cap' elements. + * The 'panic' argument controls whether to panic on OOM (panic=1) or return + * C_ERR on OOM (panic=0). If panic=1 is given, this function always returns + * C_OK. */ +int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic) { setTypeIterator *si; serverAssertWithInfo(NULL,setobj,setobj->type == OBJ_SET && setobj->encoding != enc); @@ -477,7 +496,12 @@ void setTypeConvert(robj *setobj, int enc) { sds element; /* Presize the dict to avoid rehashing */ - dictExpand(d, setTypeSize(setobj)); + if (panic) { + dictExpand(d, cap); + } else if (dictTryExpand(d, cap) != DICT_OK) { + dictRelease(d); + return C_ERR; + } /* To add the elements we extract integers and create redis objects */ si = setTypeInitIterator(setobj); @@ -490,8 +514,14 @@ void setTypeConvert(robj *setobj, int enc) { setobj->encoding = OBJ_ENCODING_HT; setobj->ptr = d; } else if (enc == OBJ_ENCODING_LISTPACK) { - /* Preallocate the minimum one byte per element */ - size_t estcap = setTypeSize(setobj); + /* Preallocate the minimum two bytes per element (enc/value + backlen) */ + size_t estcap = cap * 2; + if (setobj->encoding == OBJ_ENCODING_INTSET && setTypeSize(setobj) > 0) { + /* If we're converting from intset, we have a better estimate. */ + size_t s1 = lpEstimateBytesRepeatedInteger(intsetMin(setobj->ptr), cap); + size_t s2 = lpEstimateBytesRepeatedInteger(intsetMax(setobj->ptr), cap); + estcap = max(s1, s2); + } unsigned char *lp = lpNew(estcap); char *str; size_t len; @@ -511,6 +541,7 @@ void setTypeConvert(robj *setobj, int enc) { } else { serverPanic("Unsupported set conversion"); } + return C_OK; } /* This is a helper function for the COPY command.