diff --git a/redis.conf b/redis.conf index dfc9de308..408d0c258 100644 --- a/redis.conf +++ b/redis.conf @@ -1951,13 +1951,20 @@ list-max-listpack-size -2 # etc. list-compress-depth 0 -# Sets have a special encoding in just one case: when a set is composed +# Sets have a special encoding when a set is composed # of just strings that happen to be integers in radix 10 in the range # of 64 bit signed integers. # The following configuration setting sets the limit in the size of the # set in order to use this special memory saving encoding. set-max-intset-entries 512 +# Sets containing non-integer values are also encoded using a memory efficient +# data structure when they have a small number of entries, and the biggest entry +# does not exceed a given threshold. These thresholds can be configured using +# the following directives. +set-max-listpack-entries 128 +set-max-listpack-value 64 + # Similarly to hashes and lists, sorted sets are also specially encoded in # order to save a lot of space. This encoding is only used when the length and # elements of a sorted set are below the following limits: diff --git a/src/aof.c b/src/aof.c index 52026ab24..f1bf9a1a6 100644 --- a/src/aof.c +++ b/src/aof.c @@ -1818,56 +1818,31 @@ int rewriteListObject(rio *r, robj *key, robj *o) { * The function returns 0 on error, 1 on success. */ int rewriteSetObject(rio *r, robj *key, robj *o) { long long count = 0, items = setTypeSize(o); - - if (o->encoding == OBJ_ENCODING_INTSET) { - int ii = 0; - int64_t llval; - - while(intsetGet(o->ptr,ii++,&llval)) { - if (count == 0) { - int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? - AOF_REWRITE_ITEMS_PER_CMD : items; - - if (!rioWriteBulkCount(r,'*',2+cmd_items) || - !rioWriteBulkString(r,"SADD",4) || - !rioWriteBulkObject(r,key)) - { - return 0; - } + setTypeIterator *si = setTypeInitIterator(o); + char *str; + size_t len; + int64_t llval; + while (setTypeNext(si, &str, &len, &llval) != -1) { + if (count == 0) { + int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? + AOF_REWRITE_ITEMS_PER_CMD : items; + if (!rioWriteBulkCount(r,'*',2+cmd_items) || + !rioWriteBulkString(r,"SADD",4) || + !rioWriteBulkObject(r,key)) + { + return 0; } - if (!rioWriteBulkLongLong(r,llval)) return 0; - if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; } - } else if (o->encoding == OBJ_ENCODING_HT) { - dictIterator *di = dictGetIterator(o->ptr); - dictEntry *de; - - while((de = dictNext(di)) != NULL) { - sds ele = dictGetKey(de); - if (count == 0) { - int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ? - AOF_REWRITE_ITEMS_PER_CMD : items; - - if (!rioWriteBulkCount(r,'*',2+cmd_items) || - !rioWriteBulkString(r,"SADD",4) || - !rioWriteBulkObject(r,key)) - { - dictReleaseIterator(di); - return 0; - } - } - if (!rioWriteBulkString(r,ele,sdslen(ele))) { - dictReleaseIterator(di); - return 0; - } - if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; - items--; + size_t written = str ? + rioWriteBulkString(r, str, len) : rioWriteBulkLongLong(r, llval); + if (!written) { + setTypeReleaseIterator(si); + return 0; } - dictReleaseIterator(di); - } else { - serverPanic("Unknown set encoding"); + if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0; + items--; } + setTypeReleaseIterator(si); return 1; } diff --git a/src/config.c b/src/config.c index b11e4dda4..c5eab41a9 100644 --- a/src/config.c +++ b/src/config.c @@ -3130,6 +3130,8 @@ standardConfig static_configs[] = { /* Size_t configs */ createSizeTConfig("hash-max-listpack-entries", "hash-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_entries, 512, INTEGER_CONFIG, NULL, NULL), createSizeTConfig("set-max-intset-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_intset_entries, 512, INTEGER_CONFIG, NULL, NULL), + createSizeTConfig("set-max-listpack-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL), + createSizeTConfig("set-max-listpack-value", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_value, 64, INTEGER_CONFIG, NULL, NULL), createSizeTConfig("zset-max-listpack-entries", "zset-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.zset_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL), createSizeTConfig("active-defrag-ignore-bytes", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.active_defrag_ignore_bytes, 100<<20, MEMORY_CONFIG, NULL, NULL), /* Default: don't defrag if frag overhead is below 100mb */ createSizeTConfig("hash-max-listpack-value", "hash-max-ziplist-value", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_value, 64, MEMORY_CONFIG, NULL, NULL), diff --git a/src/db.c b/src/db.c index a560ce052..5ebd21e4b 100644 --- a/src/db.c +++ b/src/db.c @@ -915,14 +915,16 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) { } while (cursor && maxiterations-- && listLength(keys) < (unsigned long)count); - } else if (o->type == OBJ_SET) { + } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_INTSET) { int pos = 0; int64_t ll; while(intsetGet(o->ptr,pos++,&ll)) listAddNodeTail(keys,createStringObjectFromLongLong(ll)); cursor = 0; - } else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) { + } else if ((o->type == OBJ_HASH || o->type == OBJ_ZSET || o->type == OBJ_SET) && + o->encoding == OBJ_ENCODING_LISTPACK) + { unsigned char *p = lpFirst(o->ptr); unsigned char *vstr; int64_t vlen; diff --git a/src/defrag.c b/src/defrag.c index ced4fd20a..e78c07929 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -874,10 +874,12 @@ long defragKey(redisDb *db, dictEntry *de) { } else if (ob->type == OBJ_SET) { if (ob->encoding == OBJ_ENCODING_HT) { defragged += defragSet(db, de); - } else if (ob->encoding == OBJ_ENCODING_INTSET) { - intset *newis, *is = ob->ptr; - if ((newis = activeDefragAlloc(is))) - defragged++, ob->ptr = newis; + } else if (ob->encoding == OBJ_ENCODING_INTSET || + ob->encoding == OBJ_ENCODING_LISTPACK) + { + void *newptr, *ptr = ob->ptr; + if ((newptr = activeDefragAlloc(ptr))) + defragged++, ob->ptr = newptr; } else { serverPanic("Unknown set encoding"); } diff --git a/src/intset.c b/src/intset.c index e96037da8..621a74283 100644 --- a/src/intset.c +++ b/src/intset.c @@ -265,6 +265,17 @@ int64_t intsetRandom(intset *is) { return _intsetGet(is,rand()%len); } +/* Return the largest member. */ +int64_t intsetMax(intset *is) { + uint32_t len = intrev32ifbe(is->length); + return _intsetGet(is, len - 1); +} + +/* Return the smallest member. */ +int64_t intsetMin(intset *is) { + return _intsetGet(is, 0); +} + /* Get the value at the given position. When this position is * out of range the function returns 0, when in range it returns 1. */ uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) { @@ -425,6 +436,8 @@ int intsetTest(int argc, char **argv, int flags) { is = intsetAdd(is,6,&success); assert(success); is = intsetAdd(is,4,&success); assert(success); is = intsetAdd(is,4,&success); assert(!success); + assert(6 == intsetMax(is)); + assert(4 == intsetMin(is)); ok(); zfree(is); } diff --git a/src/intset.h b/src/intset.h index 772f847b6..41cc7b822 100644 --- a/src/intset.h +++ b/src/intset.h @@ -43,6 +43,8 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success); intset *intsetRemove(intset *is, int64_t value, int *success); uint8_t intsetFind(intset *is, int64_t value); int64_t intsetRandom(intset *is); +int64_t intsetMax(intset *is); +int64_t intsetMin(intset *is); uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value); uint32_t intsetLen(const intset *is); size_t intsetBlobLen(intset *is); diff --git a/src/listpack.c b/src/listpack.c index db9a1b4ab..bfa364f98 100644 --- a/src/listpack.c +++ b/src/listpack.c @@ -1063,6 +1063,55 @@ unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num) { return lp; } +/* Delete the elements 'ps' passed as an array of 'count' element pointers and + * return the resulting listpack. The elements must be given in the same order + * as they apper in the listpack. */ +unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count) { + if (count == 0) return lp; + unsigned char *dst = ps[0]; + size_t total_bytes = lpGetTotalBytes(lp); + unsigned char *lp_end = lp + total_bytes; /* After the EOF element. */ + assert(lp_end[-1] == LP_EOF); + /* + * ----+--------+-----------+--------+---------+-----+---+ + * ... | Delete | Keep | Delete | Keep | ... |EOF| + * ... |xxxxxxxx| |xxxxxxxx| | ... | | + * ----+--------+-----------+--------+---------+-----+---+ + * ^ ^ ^ ^ + * | | | | + * ps[i] | ps[i+1] | + * skip keep_start keep_end lp_end + * + * The loop memmoves the bytes between keep_start and keep_end to dst. + */ + for (unsigned long i = 0; i < count; i++) { + unsigned char *skip = ps[i]; + assert(skip != NULL && skip[0] != LP_EOF); + unsigned char *keep_start = lpSkip(skip); + unsigned char *keep_end; + if (i + 1 < count) { + keep_end = ps[i + 1]; + /* Deleting consecutive elements. Nothing to keep between them. */ + if (keep_start == keep_end) continue; + } else { + /* Keep the rest of the listpack including the EOF marker. */ + keep_end = lp_end; + } + assert(keep_end > keep_start); + size_t bytes_to_keep = keep_end - keep_start; + memmove(dst, keep_start, bytes_to_keep); + dst += bytes_to_keep; + } + /* Update total size and num elements. */ + size_t deleted_bytes = lp_end - dst; + total_bytes -= deleted_bytes; + assert(lp[total_bytes - 1] == LP_EOF); + lpSetTotalBytes(lp, total_bytes); + uint32_t numele = lpGetNumElements(lp); + if (numele != LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp, numele - count); + return lpShrinkToFit(lp); +} + /* Merge listpacks 'first' and 'second' by appending 'second' to 'first'. * * NOTE: The larger listpack is reallocated to contain the new merged listpack. @@ -1383,6 +1432,43 @@ void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *k val->sval = lpGetValue(p, &(val->slen), &(val->lval)); } +/* Randomly select 'count' entries and store them in the 'entries' array, which + * needs to have space for 'count' listpackEntry structs. The order is random + * and duplicates are possible. */ +void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries) { + struct pick { + unsigned int index; + unsigned int order; + } *picks = lp_malloc(count * sizeof(struct pick)); + unsigned int total_size = lpLength(lp); + assert(total_size); + for (unsigned int i = 0; i < count; i++) { + picks[i].index = rand() % total_size; + picks[i].order = i; + } + + /* Sort by index. */ + qsort(picks, count, sizeof(struct pick), uintCompare); + + /* Iterate over listpack in index order and store the values in the entries + * array respecting the original order. */ + unsigned char *p = lpFirst(lp); + unsigned int j = 0; /* index in listpack */ + for (unsigned int i = 0; i < count; i++) { + /* Advance listpack pointer to until we reach 'index' listpack. */ + while (j < picks[i].index) { + p = lpNext(lp, p); + j++; + } + int storeorder = picks[i].order; + unsigned int len = 0; + long long llval = 0; + unsigned char *str = lpGetValue(p, &len, &llval); + lpSaveValue(str, len, llval, &entries[storeorder]); + } + lp_free(picks); +} + /* Randomly select count of key value pairs and store into 'keys' and * 'vals' args. The order of the picked entries is random, and the selections * are non-unique (repetitions are possible). @@ -1449,34 +1535,83 @@ unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpack if (count > total_size) count = total_size; - /* To only iterate once, every time we try to pick a member, the probability - * we pick it is the quotient of the count left we want to pick and the - * count still we haven't visited in the dict, this way, we could make every - * member be equally picked.*/ p = lpFirst(lp); unsigned int picked = 0, remaining = count; while (picked < count && p) { - double randomDouble = ((double)rand()) / RAND_MAX; - double threshold = ((double)remaining) / (total_size - index); - if (randomDouble <= threshold) { + assert((p = lpNextRandom(lp, p, &index, remaining, 1))); + key = lpGetValue(p, &klen, &klval); + lpSaveValue(key, klen, klval, &keys[picked]); + assert((p = lpNext(lp, p))); + index++; + if (vals) { key = lpGetValue(p, &klen, &klval); - lpSaveValue(key, klen, klval, &keys[picked]); - assert((p = lpNext(lp, p))); - if (vals) { - key = lpGetValue(p, &klen, &klval); - lpSaveValue(key, klen, klval, &vals[picked]); - } - remaining--; - picked++; - } else { - assert((p = lpNext(lp, p))); + lpSaveValue(key, klen, klval, &vals[picked]); } p = lpNext(lp, p); + remaining--; + picked++; index++; } return picked; } +/* Iterates forward to the "next random" element, given we are yet to pick + * 'remaining' unique elements between the starting element 'p' (inclusive) and + * the end of the list. The 'index' needs to be initialized according to the + * current zero-based index matching the position of the starting element 'p' + * and is updated to match the returned element's zero-based index. If + * 'even_only' is nonzero, an element with an even index is picked, which is + * useful if the listpack represents a key-value pair sequence. + * + * Note that this function can return p. In order to skip the previously + * returned element, you need to call lpNext() or lpDelete() after each call to + * lpNextRandom(). Idea: + * + * assert(remaining <= lpLength(lp)); + * p = lpFirst(lp); + * i = 0; + * while (remaining > 0) { + * p = lpNextRandom(lp, p, &i, remaining--, 0); + * + * // ... Do stuff with p ... + * + * p = lpNext(lp, p); + * i++; + * } + */ +unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index, + unsigned int remaining, int even_only) +{ + /* To only iterate once, every time we try to pick a member, the probability + * we pick it is the quotient of the count left we want to pick and the + * count still we haven't visited. This way, we could make every member be + * equally likely to be picked. */ + unsigned int i = *index; + unsigned int total_size = lpLength(lp); + while (i < total_size && p != NULL) { + if (even_only && i % 2 != 0) { + p = lpNext(lp, p); + i++; + continue; + } + + /* Do we pick this element? */ + unsigned int available = total_size - i; + if (even_only) available /= 2; + double randomDouble = ((double)rand()) / RAND_MAX; + double threshold = ((double)remaining) / available; + if (randomDouble <= threshold) { + *index = i; + return p; + } + + p = lpNext(lp, p); + i++; + } + + return NULL; +} + /* Print info of listpack which is used in debugCommand */ void lpRepr(unsigned char *lp) { unsigned char *p, *vstr; @@ -1902,6 +2037,21 @@ int listpackTest(int argc, char *argv[], int flags) { zfree(lp); } + TEST("Batch delete") { + unsigned char *lp = createList(); /* char *mixlist[] = {"hello", "foo", "quux", "1024"} */ + assert(lpLength(lp) == 4); /* Pre-condition */ + unsigned char *p0 = lpFirst(lp), + *p1 = lpNext(lp, p0), + *p2 = lpNext(lp, p1), + *p3 = lpNext(lp, p2); + unsigned char *ps[] = {p0, p1, p3}; + lp = lpBatchDelete(lp, ps, 3); + assert(lpLength(lp) == 1); + verifyEntry(lpFirst(lp), (unsigned char*)mixlist[2], strlen(mixlist[2])); + assert(lpValidateIntegrity(lp, lpBytes(lp), 1, NULL, NULL) == 1); + lpFree(lp); + } + TEST("Delete foo while iterating") { lp = createList(); p = lpFirst(lp); @@ -2048,6 +2198,82 @@ int listpackTest(int argc, char *argv[], int flags) { zfree(lp3); } + TEST("lpNextRandom normal usage") { + /* Create some data */ + unsigned char *lp = lpNew(0); + unsigned char buf[100] = "asdf"; + unsigned int size = 100; + for (size_t i = 0; i < size; i++) { + lp = lpAppend(lp, buf, i); + } + assert(lpLength(lp) == size); + + /* Pick a subset of the elements of every possible subset size */ + for (unsigned int count = 0; count <= size; count++) { + unsigned int remaining = count; + unsigned char *p = lpFirst(lp); + unsigned char *prev = NULL; + unsigned index = 0; + while (remaining > 0) { + assert(p != NULL); + p = lpNextRandom(lp, p, &index, remaining--, 0); + assert(p != NULL); + assert(p != prev); + prev = p; + p = lpNext(lp, p); + index++; + } + } + } + + TEST("lpNextRandom corner cases") { + unsigned char *lp = lpNew(0); + unsigned i = 0; + + /* Pick from empty listpack returns NULL. */ + assert(lpNextRandom(lp, NULL, &i, 2, 0) == NULL); + + /* Add some elements and find their pointers within the listpack. */ + lp = lpAppend(lp, (unsigned char *)"abc", 3); + lp = lpAppend(lp, (unsigned char *)"def", 3); + lp = lpAppend(lp, (unsigned char *)"ghi", 3); + assert(lpLength(lp) == 3); + unsigned char *p0 = lpFirst(lp); + unsigned char *p1 = lpNext(lp, p0); + unsigned char *p2 = lpNext(lp, p1); + assert(lpNext(lp, p2) == NULL); + + /* Pick zero elements returns NULL. */ + i = 0; assert(lpNextRandom(lp, lpFirst(lp), &i, 0, 0) == NULL); + + /* Pick all returns all. */ + i = 0; assert(lpNextRandom(lp, p0, &i, 3, 0) == p0 && i == 0); + i = 1; assert(lpNextRandom(lp, p1, &i, 2, 0) == p1 && i == 1); + i = 2; assert(lpNextRandom(lp, p2, &i, 1, 0) == p2 && i == 2); + + /* Pick more than one when there's only one left returns the last one. */ + i = 2; assert(lpNextRandom(lp, p2, &i, 42, 0) == p2 && i == 2); + + /* Pick all even elements returns p0 and p2. */ + i = 0; assert(lpNextRandom(lp, p0, &i, 10, 1) == p0 && i == 0); + i = 1; assert(lpNextRandom(lp, p1, &i, 10, 1) == p2 && i == 2); + + /* Don't crash even for bad index. */ + for (int j = 0; j < 100; j++) { + unsigned char *p; + switch (j % 4) { + case 0: p = p0; break; + case 1: p = p1; break; + case 2: p = p2; break; + case 3: p = NULL; break; + } + i = j % 7; + unsigned int remaining = j % 5; + p = lpNextRandom(lp, p, &i, remaining, 0); + assert(p == p0 || p == p1 || p == p2 || p == NULL); + } + } + TEST("Random pair with one element") { listpackEntry key, val; unsigned char *lp = lpNew(0); diff --git a/src/listpack.h b/src/listpack.h index 3e750af5b..ce27ea367 100644 --- a/src/listpack.h +++ b/src/listpack.h @@ -70,6 +70,7 @@ unsigned char *lpReplaceInteger(unsigned char *lp, unsigned char **p, long long unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp); unsigned char *lpDeleteRangeWithEntry(unsigned char *lp, unsigned char **p, unsigned long num); unsigned char *lpDeleteRange(unsigned char *lp, long index, unsigned long num); +unsigned char *lpBatchDelete(unsigned char *lp, unsigned char **ps, unsigned long count); unsigned char *lpMerge(unsigned char **first, unsigned char **second); unsigned long lpLength(unsigned char *lp); unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf); @@ -90,6 +91,9 @@ unsigned int lpCompare(unsigned char *p, unsigned char *s, uint32_t slen); void lpRandomPair(unsigned char *lp, unsigned long total_count, listpackEntry *key, listpackEntry *val); void lpRandomPairs(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals); unsigned int lpRandomPairsUnique(unsigned char *lp, unsigned int count, listpackEntry *keys, listpackEntry *vals); +void lpRandomEntries(unsigned char *lp, unsigned int count, listpackEntry *entries); +unsigned char *lpNextRandom(unsigned char *lp, unsigned char *p, unsigned int *index, + unsigned int remaining, int even_only); int lpSafeToAdd(unsigned char* lp, size_t add); void lpRepr(unsigned char *lp); diff --git a/src/object.c b/src/object.c index 44fd5befd..d48af4128 100644 --- a/src/object.c +++ b/src/object.c @@ -247,6 +247,13 @@ robj *createIntsetObject(void) { return o; } +robj *createSetListpackObject(void) { + unsigned char *lp = lpNew(0); + robj *o = createObject(OBJ_SET, lp); + o->encoding = OBJ_ENCODING_LISTPACK; + return o; +} + robj *createHashObject(void) { unsigned char *zl = lpNew(0); robj *o = createObject(OBJ_HASH, zl); @@ -306,6 +313,7 @@ void freeSetObject(robj *o) { dictRelease((dict*) o->ptr); break; case OBJ_ENCODING_INTSET: + case OBJ_ENCODING_LISTPACK: zfree(o->ptr); break; default: @@ -441,6 +449,8 @@ void dismissSetObject(robj *o, size_t size_hint) { dismissMemory(set->ht_table[1], DICTHT_SIZE(set->ht_size_exp[1])*sizeof(dictEntry*)); } else if (o->encoding == OBJ_ENCODING_INTSET) { dismissMemory(o->ptr, intsetBlobLen((intset*)o->ptr)); + } else if (o->encoding == OBJ_ENCODING_LISTPACK) { + dismissMemory(o->ptr, lpBytes((unsigned char *)o->ptr)); } else { serverPanic("Unknown set encoding type"); } diff --git a/src/rdb.c b/src/rdb.c index 083e5cf89..bbb61aafb 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -665,6 +665,8 @@ int rdbSaveObjectType(rio *rdb, robj *o) { return rdbSaveType(rdb,RDB_TYPE_SET_INTSET); else if (o->encoding == OBJ_ENCODING_HT) return rdbSaveType(rdb,RDB_TYPE_SET); + else if (o->encoding == OBJ_ENCODING_LISTPACK) + return rdbSaveType(rdb,RDB_TYPE_SET_LISTPACK); else serverPanic("Unknown set encoding"); case OBJ_ZSET: @@ -858,6 +860,10 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) { if ((n = rdbSaveRawString(rdb,o->ptr,l)) == -1) return -1; nwritten += n; + } else if (o->encoding == OBJ_ENCODING_LISTPACK) { + size_t l = lpBytes((unsigned char *)o->ptr); + if ((n = rdbSaveRawString(rdb, o->ptr, l)) == -1) return -1; + nwritten += n; } else { serverPanic("Unknown set encoding"); } @@ -1690,19 +1696,21 @@ static int _listZiplistEntryConvertAndValidate(unsigned char *p, unsigned int he } /* callback for to check the listpack doesn't have duplicate records */ -static int _lpPairsEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) { +static int _lpEntryValidation(unsigned char *p, unsigned int head_count, void *userdata) { struct { + int pairs; long count; dict *fields; } *data = userdata; if (data->fields == NULL) { data->fields = dictCreate(&hashDictType); - dictExpand(data->fields, head_count/2); + dictExpand(data->fields, data->pairs ? head_count/2 : head_count); } - /* Even records are field names, add to dict and check that's not a dup */ - if (((data->count) & 1) == 0) { + /* If we're checking pairs, then even records are field names. Otherwise + * we're checking all elements. Add to dict and check that's not a dup */ + if (!data->pairs || ((data->count) & 1) == 0) { unsigned char *str; int64_t slen; unsigned char buf[LP_INTBUF_SIZE]; @@ -1722,21 +1730,24 @@ static int _lpPairsEntryValidation(unsigned char *p, unsigned int head_count, vo /* Validate the integrity of the listpack structure. * when `deep` is 0, only the integrity of the header is validated. - * when `deep` is 1, we scan all the entries one by one. */ -int lpPairsValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep) { + * when `deep` is 1, we scan all the entries one by one. + * when `pairs` is 0, all elements need to be unique (it's a set) + * when `pairs` is 1, odd elements need to be unique (it's a key-value map) */ +int lpValidateIntegrityAndDups(unsigned char *lp, size_t size, int deep, int pairs) { if (!deep) return lpValidateIntegrity(lp, size, 0, NULL, NULL); /* Keep track of the field names to locate duplicate ones */ struct { + int pairs; long count; dict *fields; /* Initialisation at the first callback. */ - } data = {0, NULL}; + } data = {pairs, 0, NULL}; - int ret = lpValidateIntegrity(lp, size, 1, _lpPairsEntryValidation, &data); + int ret = lpValidateIntegrity(lp, size, 1, _lpEntryValidation, &data); /* make sure we have an even number of records. */ - if (data.count & 1) + if (pairs && data.count & 1) ret = 0; if (data.fields) dictRelease(data.fields); @@ -1813,6 +1824,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { } /* Load every single element of the set */ + size_t maxelelen = 0, sumelelen = 0; for (i = 0; i < len; i++) { long long llval; sds sdsele; @@ -1821,6 +1833,9 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { decrRefCount(o); return NULL; } + size_t elelen = sdslen(sdsele); + sumelelen += elelen; + if (elelen > maxelelen) maxelelen = elelen; if (o->encoding == OBJ_ENCODING_INTSET) { /* Fetch integer value from element. */ @@ -1833,6 +1848,14 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { sdsfree(sdsele); return NULL; } + } else if (setTypeSize(o) < server.set_max_listpack_entries && + maxelelen <= server.set_max_listpack_value && + lpSafeToAdd(NULL, sumelelen)) + { + /* We checked if it's safe to add one large element instead + * of many small ones. It's OK since lpSafeToAdd doesn't + * care about individual elements, only the total size. */ + setTypeConvert(o, OBJ_ENCODING_LISTPACK); } else { setTypeConvert(o,OBJ_ENCODING_HT); if (dictTryExpand(o->ptr,len) != DICT_OK) { @@ -1844,6 +1867,33 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { } } + /* This will also be called when the set was just converted + * to a listpack encoded set. */ + if (o->encoding == OBJ_ENCODING_LISTPACK) { + if (setTypeSize(o) < server.set_max_listpack_entries && + elelen <= server.set_max_listpack_value && + lpSafeToAdd(o->ptr, elelen)) + { + unsigned char *p = lpFirst(o->ptr); + if (p && lpFind(o->ptr, p, (unsigned char*)sdsele, elelen, 0)) { + rdbReportCorruptRDB("Duplicate set members detected"); + decrRefCount(o); + sdsfree(sdsele); + return NULL; + } + o->ptr = lpAppend(o->ptr, (unsigned char *)sdsele, elelen); + } else { + setTypeConvert(o, OBJ_ENCODING_HT); + if (dictTryExpand(o->ptr, len) != DICT_OK) { + rdbReportCorruptRDB("OOM in dictTryExpand %llu", + (unsigned long long)len); + sdsfree(sdsele); + decrRefCount(o); + return NULL; + } + } + } + /* This will also be called when the set was just converted * to a regular hash table encoded set. */ if (o->encoding == OBJ_ENCODING_HT) { @@ -2126,6 +2176,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { } else if (rdbtype == RDB_TYPE_HASH_ZIPMAP || rdbtype == RDB_TYPE_LIST_ZIPLIST || rdbtype == RDB_TYPE_SET_INTSET || + rdbtype == RDB_TYPE_SET_LISTPACK || rdbtype == RDB_TYPE_ZSET_ZIPLIST || rdbtype == RDB_TYPE_ZSET_LISTPACK || rdbtype == RDB_TYPE_HASH_ZIPLIST || @@ -2243,6 +2294,20 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { if (intsetLen(o->ptr) > server.set_max_intset_entries) setTypeConvert(o,OBJ_ENCODING_HT); break; + case RDB_TYPE_SET_LISTPACK: + if (deep_integrity_validation) server.stat_dump_payload_sanitizations++; + if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 0)) { + rdbReportCorruptRDB("Set listpack integrity check failed."); + zfree(encoded); + o->ptr = NULL; + decrRefCount(o); + return NULL; + } + o->type = OBJ_SET; + o->encoding = OBJ_ENCODING_LISTPACK; + if (setTypeSize(o) > server.set_max_listpack_entries) + setTypeConvert(o, OBJ_ENCODING_HT); + break; case RDB_TYPE_ZSET_ZIPLIST: { unsigned char *lp = lpNew(encoded_len); @@ -2272,7 +2337,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { } case RDB_TYPE_ZSET_LISTPACK: if (deep_integrity_validation) server.stat_dump_payload_sanitizations++; - if (!lpPairsValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation)) { + if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) { rdbReportCorruptRDB("Zset listpack integrity check failed."); zfree(encoded); o->ptr = NULL; @@ -2318,7 +2383,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) { } case RDB_TYPE_HASH_LISTPACK: if (deep_integrity_validation) server.stat_dump_payload_sanitizations++; - if (!lpPairsValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation)) { + if (!lpValidateIntegrityAndDups(encoded, encoded_len, deep_integrity_validation, 1)) { rdbReportCorruptRDB("Hash listpack integrity check failed."); zfree(encoded); o->ptr = NULL; diff --git a/src/rdb.h b/src/rdb.h index d215334db..4350d3a28 100644 --- a/src/rdb.h +++ b/src/rdb.h @@ -38,7 +38,7 @@ /* The current RDB version. When the format changes in a way that is no longer * backward compatible this number gets incremented. */ -#define RDB_VERSION 10 +#define RDB_VERSION 11 /* Defines related to the dump file format. To store 32 bits lengths for short * keys requires a lot of space, so we check the most significant 2 bits of @@ -95,10 +95,11 @@ #define RDB_TYPE_ZSET_LISTPACK 17 #define RDB_TYPE_LIST_QUICKLIST_2 18 #define RDB_TYPE_STREAM_LISTPACKS_2 19 +#define RDB_TYPE_SET_LISTPACK 20 /* NOTE: WHEN ADDING NEW RDB TYPE, UPDATE rdbIsObjectType() BELOW */ /* Test if a type is an object type. */ -#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 19)) +#define rdbIsObjectType(t) (((t) >= 0 && (t) <= 7) || ((t) >= 9 && (t) <= 20)) /* Special RDB opcodes (saved/loaded with rdbSaveType/rdbLoadType). */ #define RDB_OPCODE_FUNCTION2 245 /* function library data */ diff --git a/src/redis-check-rdb.c b/src/redis-check-rdb.c index cedfd84cb..a537c6dae 100644 --- a/src/redis-check-rdb.c +++ b/src/redis-check-rdb.c @@ -97,7 +97,8 @@ char *rdb_type_string[] = { "stream", "hash-listpack", "zset-listpack", - "quicklist-v2" + "quicklist-v2", + "set-listpack", }; /* Show a few stats collected into 'rdbstate' */ diff --git a/src/server.h b/src/server.h index 4dfaaea7b..c96c1c743 100644 --- a/src/server.h +++ b/src/server.h @@ -1850,6 +1850,8 @@ struct redisServer { size_t hash_max_listpack_entries; size_t hash_max_listpack_value; size_t set_max_intset_entries; + size_t set_max_listpack_entries; + size_t set_max_listpack_value; size_t zset_max_listpack_entries; size_t zset_max_listpack_value; size_t hll_sparse_max_bytes; @@ -2331,6 +2333,7 @@ typedef struct { int encoding; int ii; /* intset iterator */ dictIterator *di; + unsigned char *lpi; /* listpack iterator */ } setTypeIterator; /* Structure to hold hash iteration abstraction. Note that iteration over @@ -2655,6 +2658,7 @@ robj *createStringObjectFromLongDouble(long double value, int humanfriendly); robj *createQuicklistObject(void); robj *createSetObject(void); robj *createIntsetObject(void); +robj *createSetListpackObject(void); robj *createHashObject(void); robj *createZsetObject(void); robj *createZsetListpackObject(void); @@ -2980,9 +2984,9 @@ int setTypeRemove(robj *subject, sds value); int setTypeIsMember(robj *subject, sds value); setTypeIterator *setTypeInitIterator(robj *subject); void setTypeReleaseIterator(setTypeIterator *si); -int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele); +int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele); sds setTypeNextObject(setTypeIterator *si); -int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele); +int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele); unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set); unsigned long setTypeSize(const robj *subject); void setTypeConvert(robj *subject, int enc); diff --git a/src/t_set.c b/src/t_set.c index 8a669fe7c..8ab602791 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -29,6 +29,12 @@ #include "server.h" +/* Internal prototypes */ + +int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds); +int setTypeRemoveAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds); +int setTypeIsMemberAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds); + /*----------------------------------------------------------------------------- * Set Commands *----------------------------------------------------------------------------*/ @@ -42,45 +48,154 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, robj *setTypeCreate(sds value) { if (isSdsRepresentableAsLongLong(value,NULL) == C_OK) return createIntsetObject(); - return createSetObject(); + return createSetListpackObject(); } -/* Add the specified value into a set. +/* Return the maximum number of entries to store in an intset. */ +static size_t intsetMaxEntries(void) { + size_t max_entries = server.set_max_intset_entries; + /* limit to 1G entries due to intset internals. */ + if (max_entries >= 1<<30) max_entries = 1<<30; + return max_entries; +} + +/* Converts intset to HT if it contains too many entries. */ +static void maybeConvertIntset(robj *subject) { + serverAssert(subject->encoding == OBJ_ENCODING_INTSET); + if (intsetLen(subject->ptr) > intsetMaxEntries()) + setTypeConvert(subject,OBJ_ENCODING_HT); +} + +/* When you know all set elements are integers, call this to convert the set to + * an intset. No conversion happens if the set contains too many entries for an + * intset. */ +static void maybeConvertToIntset(robj *set) { + if (set->encoding == OBJ_ENCODING_INTSET) return; /* already intset */ + if (setTypeSize(set) > intsetMaxEntries()) return; /* can't use intset */ + intset *is = intsetNew(); + char *str; + size_t len; + int64_t llval; + setTypeIterator *si = setTypeInitIterator(set); + while (setTypeNext(si, &str, &len, &llval) != -1) { + if (str) { + /* If the element is returned as a string, we may be able to convert + * it to integer. This happens for OBJ_ENCODING_HT. */ + serverAssert(string2ll(str, len, (long long *)&llval)); + } + uint8_t success = 0; + is = intsetAdd(is, llval, &success); + serverAssert(success); + } + setTypeReleaseIterator(si); + freeSetObject(set); /* frees the internals but not robj itself */ + set->ptr = is; + set->encoding = OBJ_ENCODING_INTSET; +} + +/* Add the specified sds value into a set. * * If the value was already member of the set, nothing is done and 0 is * returned, otherwise the new element is added and 1 is returned. */ int setTypeAdd(robj *subject, sds value) { - long long llval; - if (subject->encoding == OBJ_ENCODING_HT) { - dict *ht = subject->ptr; - dictEntry *de = dictAddRaw(ht,value,NULL); - if (de) { - dictSetKey(ht,de,sdsdup(value)); + return setTypeAddAux(subject, value, sdslen(value), 0, 1); +} + +/* Add member. This function is optimized for the different encodings. The + * value can be provided as an sds string (indicated by passing str_is_sds = + * 1), as string and length (str_is_sds = 0) or as an integer in which case str + * is set to NULL and llval is provided instead. + * + * Returns 1 if the value was added and 0 if it was already a member. */ +int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds) { + char tmpbuf[LONG_STR_SIZE]; + if (!str) { + if (set->encoding == OBJ_ENCODING_INTSET) { + uint8_t success = 0; + set->ptr = intsetAdd(set->ptr, llval, &success); + if (success) maybeConvertIntset(set); + return success; + } + /* Convert int to string. */ + len = ll2string(tmpbuf, sizeof tmpbuf, llval); + str = tmpbuf; + str_is_sds = 0; + } + + serverAssert(str); + if (set->encoding == OBJ_ENCODING_HT) { + /* Avoid duping the string if it is an sds string. */ + sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len); + dict *ht = set->ptr; + dictEntry *de = dictAddRaw(ht,sdsval,NULL); + if (de && sdsval == str) { + /* String was added but we don't own this sds string. Dup it and + * replace it in the dict entry. */ + dictSetKey(ht,de,sdsdup((sds)str)); dictSetVal(ht,de,NULL); + } else if (!de && sdsval != str) { + /* String was already a member. Free our temporary sds copy. */ + sdsfree(sdsval); + } + return (de != NULL); + } else if (set->encoding == OBJ_ENCODING_LISTPACK) { + unsigned char *lp = set->ptr; + unsigned char *p = lpFirst(lp); + if (p != NULL) + p = lpFind(lp, p, (unsigned char*)str, len, 0); + if (p == NULL) { + /* Not found. */ + if (lpLength(lp) < server.set_max_listpack_entries && + len <= server.set_max_listpack_value && + lpSafeToAdd(lp, len)) + { + if (str == tmpbuf) { + /* This came in as integer so we can avoid parsing it again. + * TODO: Create and use lpFindInteger; don't go via string. */ + lp = lpAppendInteger(lp, llval); + } else { + lp = lpAppend(lp, (unsigned char*)str, len); + } + set->ptr = lp; + } else { + /* Size limit is reached. Convert to hashtable and add. */ + setTypeConvert(set, OBJ_ENCODING_HT); + serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK); + } return 1; } - } else if (subject->encoding == OBJ_ENCODING_INTSET) { - if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) { + } else if (set->encoding == OBJ_ENCODING_INTSET) { + long long value; + if (string2ll(str, len, &value)) { uint8_t success = 0; - subject->ptr = intsetAdd(subject->ptr,llval,&success); + set->ptr = intsetAdd(set->ptr,value,&success); if (success) { - /* Convert to regular set when the intset contains - * too many entries. */ - size_t max_entries = server.set_max_intset_entries; - /* limit to 1G entries due to intset internals. */ - if (max_entries >= 1<<30) max_entries = 1<<30; - if (intsetLen(subject->ptr) > max_entries) - setTypeConvert(subject,OBJ_ENCODING_HT); + maybeConvertIntset(set); return 1; } } else { - /* Failed to get integer from object, convert to regular set. */ - setTypeConvert(subject,OBJ_ENCODING_HT); - - /* The set *was* an intset and this value is not integer - * encodable, so dictAdd should always work. */ - serverAssert(dictAdd(subject->ptr,sdsdup(value),NULL) == DICT_OK); - return 1; + size_t maxelelen = intsetLen(set->ptr) == 0 ? + 0 : max(sdigits10(intsetMax(set->ptr)), + sdigits10(intsetMin(set->ptr))); + if (intsetLen((const intset*)set->ptr) < server.set_max_listpack_entries && + len <= server.set_max_listpack_value && + maxelelen <= server.set_max_listpack_value && + lpSafeToAdd(NULL, maxelelen * intsetLen(set->ptr) + len)) + { + /* In the "safe to add" check above we assumed all elements in + * the intset are of size maxelelen. This is an upper bound. */ + setTypeConvert(set, OBJ_ENCODING_LISTPACK); + unsigned char *lp = set->ptr; + lp = lpAppend(lp, (unsigned char *)str, len); + set->ptr = lp; + return 1; + } else { + setTypeConvert(set, OBJ_ENCODING_HT); + /* The set *was* an intset and this value is not integer + * encodable, so dictAdd should always work. */ + serverAssert(dictAdd(set->ptr,sdsnewlen(str,len),NULL) == DICT_OK); + return 1; + } } } else { serverPanic("Unknown set encoding"); @@ -88,15 +203,50 @@ int setTypeAdd(robj *subject, sds value) { return 0; } +/* Deletes a value provided as an sds string from the set. Returns 1 if the + * value was deleted and 0 if it was not a member of the set. */ int setTypeRemove(robj *setobj, sds value) { - long long llval; + return setTypeRemoveAux(setobj, value, sdslen(value), 0, 1); +} + +/* Remove a member. This function is optimized for the different encodings. The + * value can be provided as an sds string (indicated by passing str_is_sds = + * 1), as string and length (str_is_sds = 0) or as an integer in which case str + * is set to NULL and llval is provided instead. + * + * Returns 1 if the value was deleted and 0 if it was not a member of the set. */ +int setTypeRemoveAux(robj *setobj, char *str, size_t len, int64_t llval, int str_is_sds) { + char tmpbuf[LONG_STR_SIZE]; + if (!str) { + if (setobj->encoding == OBJ_ENCODING_INTSET) { + int success; + setobj->ptr = intsetRemove(setobj->ptr,llval,&success); + return success; + } + len = ll2string(tmpbuf, sizeof tmpbuf, llval); + str = tmpbuf; + str_is_sds = 0; + } + if (setobj->encoding == OBJ_ENCODING_HT) { - if (dictDelete(setobj->ptr,value) == DICT_OK) { - if (htNeedsResize(setobj->ptr)) dictResize(setobj->ptr); + sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len); + int deleted = (dictDelete(setobj->ptr, sdsval) == DICT_OK); + if (deleted && htNeedsResize(setobj->ptr)) dictResize(setobj->ptr); + if (sdsval != str) sdsfree(sdsval); /* free temp copy */ + return deleted; + } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) { + unsigned char *lp = setobj->ptr; + unsigned char *p = lpFirst(lp); + if (p == NULL) return 0; + p = lpFind(lp, p, (unsigned char*)str, len, 0); + if (p != NULL) { + lp = lpDelete(lp, p, NULL); + setobj->ptr = lp; return 1; } } else if (setobj->encoding == OBJ_ENCODING_INTSET) { - if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) { + long long llval; + if (string2ll(str, len, &llval)) { int success; setobj->ptr = intsetRemove(setobj->ptr,llval,&success); if (success) return 1; @@ -107,18 +257,45 @@ int setTypeRemove(robj *setobj, sds value) { return 0; } +/* Check if an sds string is a member of the set. Returns 1 if the value is a + * member of the set and 0 if it isn't. */ int setTypeIsMember(robj *subject, sds value) { - long long llval; - if (subject->encoding == OBJ_ENCODING_HT) { - return dictFind((dict*)subject->ptr,value) != NULL; - } else if (subject->encoding == OBJ_ENCODING_INTSET) { - if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) { - return intsetFind((intset*)subject->ptr,llval); - } + return setTypeIsMemberAux(subject, value, sdslen(value), 0, 1); +} + +/* Membership checking optimized for the different encodings. The value can be + * provided as an sds string (indicated by passing str_is_sds = 1), as string + * and length (str_is_sds = 0) or as an integer in which case str is set to NULL + * and llval is provided instead. + * + * Returns 1 if the value is a member of the set and 0 if it isn't. */ +int setTypeIsMemberAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sds) { + char tmpbuf[LONG_STR_SIZE]; + if (!str) { + if (set->encoding == OBJ_ENCODING_INTSET) + return intsetFind(set->ptr, llval); + len = ll2string(tmpbuf, sizeof tmpbuf, llval); + str = tmpbuf; + str_is_sds = 0; + } + + if (set->encoding == OBJ_ENCODING_LISTPACK) { + unsigned char *lp = set->ptr; + unsigned char *p = lpFirst(lp); + return p && lpFind(lp, p, (unsigned char*)str, len, 0); + } else if (set->encoding == OBJ_ENCODING_INTSET) { + long long llval; + return string2ll(str, len, &llval) && intsetFind(set->ptr, llval); + } else if (set->encoding == OBJ_ENCODING_HT && str_is_sds) { + return dictFind(set->ptr, (sds)str) != NULL; + } else if (set->encoding == OBJ_ENCODING_HT) { + sds sdsval = sdsnewlen(str, len); + int result = dictFind(set->ptr, sdsval) != NULL; + sdsfree(sdsval); + return result; } else { serverPanic("Unknown set encoding"); } - return 0; } setTypeIterator *setTypeInitIterator(robj *subject) { @@ -129,6 +306,8 @@ setTypeIterator *setTypeInitIterator(robj *subject) { si->di = dictGetIterator(subject->ptr); } else if (si->encoding == OBJ_ENCODING_INTSET) { si->ii = 0; + } else if (si->encoding == OBJ_ENCODING_LISTPACK) { + si->lpi = NULL; } else { serverPanic("Unknown set encoding"); } @@ -142,28 +321,50 @@ void setTypeReleaseIterator(setTypeIterator *si) { } /* Move to the next entry in the set. Returns the object at the current - * position. + * position, as a string or as an integer. * - * Since set elements can be internally be stored as SDS strings or + * Since set elements can be internally be stored as SDS strings, char buffers or * simple arrays of integers, setTypeNext returns the encoding of the - * set object you are iterating, and will populate the appropriate pointer - * (sdsele) or (llele) accordingly. + * set object you are iterating, and will populate the appropriate pointers + * (str and len) or (llele) depending on whether the value is stored as a string + * or as an integer internally. * - * Note that both the sdsele and llele pointers should be passed and cannot + * If OBJ_ENCODING_HT is returned, then str points to an sds string and can be + * used as such. If OBJ_ENCODING_INTSET, then llele is populated and str is + * pointed to NULL. If OBJ_ENCODING_LISTPACK is returned, the value can be + * either a string or an integer. If *str is not NULL, then str and len are + * populated with the string content and length. Otherwise, llele populated with + * an integer value. + * + * Note that str, len and llele pointers should all be passed and cannot * be NULL since the function will try to defensively populate the non * used field with values which are easy to trap if misused. * - * When there are no longer elements -1 is returned. */ -int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele) { + * When there are no more elements -1 is returned. */ +int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele) { if (si->encoding == OBJ_ENCODING_HT) { dictEntry *de = dictNext(si->di); if (de == NULL) return -1; - *sdsele = dictGetKey(de); + *str = dictGetKey(de); + *len = sdslen(*str); *llele = -123456789; /* Not needed. Defensive. */ } else if (si->encoding == OBJ_ENCODING_INTSET) { if (!intsetGet(si->subject->ptr,si->ii++,llele)) return -1; - *sdsele = NULL; /* Not needed. Defensive. */ + *str = NULL; + } else if (si->encoding == OBJ_ENCODING_LISTPACK) { + unsigned char *lp = si->subject->ptr; + unsigned char *lpi = si->lpi; + if (lpi == NULL) { + lpi = lpFirst(lp); + } else { + lpi = lpNext(lp, lpi); + } + if (lpi == NULL) return -1; + si->lpi = lpi; + unsigned int l; + *str = (char *)lpGetValue(lpi, &l, (long long *)llele); + *len = (size_t)l; } else { serverPanic("Wrong set encoding in setTypeNext"); } @@ -179,54 +380,85 @@ int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele) { * an issue. */ sds setTypeNextObject(setTypeIterator *si) { int64_t intele; - sds sdsele; - int encoding; + char *str; + size_t len; - encoding = setTypeNext(si,&sdsele,&intele); - switch(encoding) { - case -1: return NULL; - case OBJ_ENCODING_INTSET: - return sdsfromlonglong(intele); - case OBJ_ENCODING_HT: - return sdsdup(sdsele); - default: - serverPanic("Unsupported encoding"); - } - return NULL; /* just to suppress warnings */ + if (setTypeNext(si, &str, &len, &intele) == -1) return NULL; + if (str != NULL) return sdsnewlen(str, len); + return sdsfromlonglong(intele); } /* Return random element from a non empty set. * The returned element can be an int64_t value if the set is encoded - * as an "intset" blob of integers, or an SDS string if the set - * is a regular set. + * as an "intset" blob of integers, or an string. * - * The caller provides both pointers to be populated with the right + * The caller provides three pointers to be populated with the right * object. The return value of the function is the object->encoding - * field of the object and is used by the caller to check if the - * int64_t pointer or the sds pointer was populated. + * field of the object and can be used by the caller to check if the + * int64_t pointer or the str and len pointers were populated, as for + * setTypeNext. If OBJ_ENCODING_HT is returned, str is pointed to a + * string which is actually an sds string and it can be used as such. * - * Note that both the sdsele and llele pointers should be passed and cannot - * be NULL since the function will try to defensively populate the non - * used field with values which are easy to trap if misused. */ -int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele) { + * Note that both the str, len and llele pointers should be passed and cannot + * be NULL. If str is set to NULL, the value is an integer stored in llele. */ +int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele) { if (setobj->encoding == OBJ_ENCODING_HT) { dictEntry *de = dictGetFairRandomKey(setobj->ptr); - *sdsele = dictGetKey(de); + *str = dictGetKey(de); + *len = sdslen(*str); *llele = -123456789; /* Not needed. Defensive. */ } else if (setobj->encoding == OBJ_ENCODING_INTSET) { *llele = intsetRandom(setobj->ptr); - *sdsele = NULL; /* Not needed. Defensive. */ + *str = NULL; /* Not needed. Defensive. */ + } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) { + unsigned char *lp = setobj->ptr; + int r = rand() % lpLength(lp); + unsigned char *p = lpSeek(lp, r); + unsigned int l; + *str = (char *)lpGetValue(p, &l, (long long *)llele); + *len = (size_t)l; } else { serverPanic("Unknown set encoding"); } return setobj->encoding; } +/* Pops a random element and returns it as an object. */ +robj *setTypePopRandom(robj *set) { + robj *obj; + if (set->encoding == OBJ_ENCODING_LISTPACK) { + /* Find random and delete it without re-seeking the listpack. */ + unsigned int i = 0; + unsigned char *p = lpNextRandom(set->ptr, lpFirst(set->ptr), &i, 1, 0); + unsigned int len = 0; /* initialize to silence warning */ + long long llele = 0; /* initialize to silence warning */ + char *str = (char *)lpGetValue(p, &len, &llele); + if (str) + obj = createStringObject(str, len); + else + obj = createStringObjectFromLongLong(llele); + set->ptr = lpDelete(set->ptr, p, NULL); + } else { + char *str; + size_t len = 0; + int64_t llele = 0; + int encoding = setTypeRandomElement(set, &str, &len, &llele); + if (str) + obj = createStringObject(str, len); + else + obj = createStringObjectFromLongLong(llele); + setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HT); + } + return obj; +} + unsigned long setTypeSize(const robj *subject) { if (subject->encoding == OBJ_ENCODING_HT) { return dictSize((const dict*)subject->ptr); } else if (subject->encoding == OBJ_ENCODING_INTSET) { return intsetLen((const intset*)subject->ptr); + } else if (subject->encoding == OBJ_ENCODING_LISTPACK) { + return lpLength((unsigned char *)subject->ptr); } else { serverPanic("Unknown set encoding"); } @@ -238,27 +470,44 @@ unsigned long setTypeSize(const robj *subject) { void setTypeConvert(robj *setobj, int enc) { setTypeIterator *si; serverAssertWithInfo(NULL,setobj,setobj->type == OBJ_SET && - setobj->encoding == OBJ_ENCODING_INTSET); + setobj->encoding != enc); if (enc == OBJ_ENCODING_HT) { - int64_t intele; dict *d = dictCreate(&setDictType); sds element; /* Presize the dict to avoid rehashing */ - dictExpand(d,intsetLen(setobj->ptr)); + dictExpand(d, setTypeSize(setobj)); /* To add the elements we extract integers and create redis objects */ si = setTypeInitIterator(setobj); - while (setTypeNext(si,&element,&intele) != -1) { - element = sdsfromlonglong(intele); + while ((element = setTypeNextObject(si)) != NULL) { serverAssert(dictAdd(d,element,NULL) == DICT_OK); } setTypeReleaseIterator(si); + freeSetObject(setobj); /* frees the internals but not setobj itself */ setobj->encoding = OBJ_ENCODING_HT; - zfree(setobj->ptr); setobj->ptr = d; + } else if (enc == OBJ_ENCODING_LISTPACK) { + /* Preallocate the minimum one byte per element */ + size_t estcap = setTypeSize(setobj); + unsigned char *lp = lpNew(estcap); + char *str; + size_t len; + int64_t llele; + si = setTypeInitIterator(setobj); + while (setTypeNext(si, &str, &len, &llele) != -1) { + if (str != NULL) + lp = lpAppend(lp, (unsigned char *)str, len); + else + lp = lpAppendInteger(lp, llele); + } + setTypeReleaseIterator(si); + + freeSetObject(setobj); /* frees the internals but not setobj itself */ + setobj->encoding = OBJ_ENCODING_LISTPACK; + setobj->ptr = lp; } else { serverPanic("Unsupported set conversion"); } @@ -272,8 +521,6 @@ void setTypeConvert(robj *setobj, int enc) { robj *setTypeDup(robj *o) { robj *set; setTypeIterator *si; - sds elesds; - int64_t intobj; serverAssert(o->type == OBJ_SET); @@ -285,13 +532,23 @@ robj *setTypeDup(robj *o) { memcpy(newis,is,size); set = createObject(OBJ_SET, newis); set->encoding = OBJ_ENCODING_INTSET; + } else if (o->encoding == OBJ_ENCODING_LISTPACK) { + unsigned char *lp = o->ptr; + size_t sz = lpBytes(lp); + unsigned char *new_lp = zmalloc(sz); + memcpy(new_lp, lp, sz); + set = createObject(OBJ_SET, new_lp); + set->encoding = OBJ_ENCODING_LISTPACK; } else if (o->encoding == OBJ_ENCODING_HT) { set = createSetObject(); dict *d = o->ptr; dictExpand(set->ptr, dictSize(d)); si = setTypeInitIterator(o); - while (setTypeNext(si, &elesds, &intobj) != -1) { - setTypeAdd(set, elesds); + char *str; + size_t len; + int64_t intobj; + while (setTypeNext(si, &str, &len, &intobj) != -1) { + setTypeAdd(set, (sds)str); } setTypeReleaseIterator(si); } else { @@ -509,9 +766,9 @@ void spopWithCountCommand(client *c) { addReplySetLen(c,count); /* Common iteration vars. */ - sds sdsele; robj *objele; - int encoding; + char *str; + size_t len; int64_t llele; unsigned long remaining = size-count; /* Elements left after SPOP. */ @@ -522,24 +779,49 @@ void spopWithCountCommand(client *c) { * CASE 2: The number of elements to return is small compared to the * set size. We can just extract random elements and return them to * the set. */ - if (remaining*SPOP_MOVE_STRATEGY_MUL > count) { - while(count--) { - /* Emit and remove. */ - encoding = setTypeRandomElement(set,&sdsele,&llele); - if (encoding == OBJ_ENCODING_INTSET) { - addReplyBulkLongLong(c,llele); - objele = createStringObjectFromLongLong(llele); - set->ptr = intsetRemove(set->ptr,llele,NULL); + if (remaining*SPOP_MOVE_STRATEGY_MUL > count && + set->encoding == OBJ_ENCODING_LISTPACK) + { + /* Specialized case for listpack. Traverse it only once. */ + unsigned char *lp = set->ptr; + unsigned char *p = lpFirst(lp); + unsigned int index = 0; + unsigned char **ps = zmalloc(sizeof(char *) * count); + for (unsigned long i = 0; i < count; i++) { + p = lpNextRandom(lp, p, &index, count - i, 0); + unsigned int len; + str = (char *)lpGetValue(p, &len, (long long *)&llele); + + if (str) { + addReplyBulkCBuffer(c, str, len); + objele = createStringObject(str, len); } else { - addReplyBulkCBuffer(c,sdsele,sdslen(sdsele)); - objele = createStringObject(sdsele,sdslen(sdsele)); - setTypeRemove(set,sdsele); + addReplyBulkLongLong(c, llele); + objele = createStringObjectFromLongLong(llele); } /* Replicate/AOF this command as an SREM operation */ propargv[2] = objele; alsoPropagate(c->db->id,propargv,3,PROPAGATE_AOF|PROPAGATE_REPL); decrRefCount(objele); + + /* Store pointer for later deletion and move to next. */ + ps[i] = p; + p = lpNext(lp, p); + index++; + } + lp = lpBatchDelete(lp, ps, count); + zfree(ps); + set->ptr = lp; + } else if (remaining*SPOP_MOVE_STRATEGY_MUL > count) { + while(count--) { + objele = setTypePopRandom(set); + addReplyBulk(c, objele); + + /* Replicate/AOF this command as an SREM operation */ + propargv[2] = objele; + alsoPropagate(c->db->id,propargv,3,PROPAGATE_AOF|PROPAGATE_REPL); + decrRefCount(objele); } } else { /* CASE 3: The number of elements to return is very big, approaching @@ -553,29 +835,46 @@ void spopWithCountCommand(client *c) { robj *newset = NULL; /* Create a new set with just the remaining elements. */ - while(remaining--) { - encoding = setTypeRandomElement(set,&sdsele,&llele); - if (encoding == OBJ_ENCODING_INTSET) { - sdsele = sdsfromlonglong(llele); - } else { - sdsele = sdsdup(sdsele); + if (set->encoding == OBJ_ENCODING_LISTPACK) { + /* Specialized case for listpack. Traverse it only once. */ + newset = createSetListpackObject(); + unsigned char *lp = set->ptr; + unsigned char *p = lpFirst(lp); + unsigned int index = 0; + unsigned char **ps = zmalloc(sizeof(char *) * remaining); + for (unsigned long i = 0; i < remaining; i++) { + p = lpNextRandom(lp, p, &index, remaining - i, 0); + unsigned int len; + str = (char *)lpGetValue(p, &len, (long long *)&llele); + setTypeAddAux(newset, str, len, llele, 0); + ps[i] = p; + p = lpNext(lp, p); + index++; + } + lp = lpBatchDelete(lp, ps, remaining); + zfree(ps); + set->ptr = lp; + } else { + while(remaining--) { + int encoding = setTypeRandomElement(set, &str, &len, &llele); + if (!newset) { + newset = str ? createSetListpackObject() : createIntsetObject(); + } + setTypeAddAux(newset, str, len, llele, encoding == OBJ_ENCODING_HT); + setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HT); } - if (!newset) newset = setTypeCreate(sdsele); - setTypeAdd(newset,sdsele); - setTypeRemove(set,sdsele); - sdsfree(sdsele); } /* Transfer the old set to the client. */ setTypeIterator *si; si = setTypeInitIterator(set); - while((encoding = setTypeNext(si,&sdsele,&llele)) != -1) { - if (encoding == OBJ_ENCODING_INTSET) { + while (setTypeNext(si, &str, &len, &llele) != -1) { + if (str == NULL) { addReplyBulkLongLong(c,llele); objele = createStringObjectFromLongLong(llele); } else { - addReplyBulkCBuffer(c,sdsele,sdslen(sdsele)); - objele = createStringObject(sdsele,sdslen(sdsele)); + addReplyBulkCBuffer(c, str, len); + objele = createStringObject(str, len); } /* Replicate/AOF this command as an SREM operation */ @@ -599,9 +898,6 @@ void spopWithCountCommand(client *c) { void spopCommand(client *c) { robj *set, *ele; - sds sdsele; - int64_t llele; - int encoding; if (c->argc == 3) { spopWithCountCommand(c); @@ -616,17 +912,8 @@ void spopCommand(client *c) { if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.null[c->resp])) == NULL || checkType(c,set,OBJ_SET)) return; - /* Get a random element from the set */ - encoding = setTypeRandomElement(set,&sdsele,&llele); - - /* Remove the element from the set */ - if (encoding == OBJ_ENCODING_INTSET) { - ele = createStringObjectFromLongLong(llele); - set->ptr = intsetRemove(set->ptr,llele,NULL); - } else { - ele = createStringObject(sdsele,sdslen(sdsele)); - setTypeRemove(set,ele->ptr); - } + /* Pop a random element from the set */ + ele = setTypePopRandom(set); notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id); @@ -634,7 +921,7 @@ void spopCommand(client *c) { rewriteClientCommandVector(c,3,shared.srem,c->argv[1],ele); /* Add the element to the reply */ - addReplyBulk(c,ele); + addReplyBulk(c, ele); decrRefCount(ele); /* Delete the set if it's empty */ @@ -661,9 +948,9 @@ void srandmemberWithCountCommand(client *c) { unsigned long count, size; int uniq = 1; robj *set; - sds ele; + char *str; + size_t len; int64_t llele; - int encoding; dict *d; @@ -694,12 +981,27 @@ void srandmemberWithCountCommand(client *c) { * elements in random order. */ if (!uniq || count == 1) { addReplyArrayLen(c,count); + + if (set->encoding == OBJ_ENCODING_LISTPACK && count > 1) { + /* Specialized case for listpack, traversing it only once. */ + listpackEntry *entries = zmalloc(count * sizeof(listpackEntry)); + lpRandomEntries(set->ptr, count, entries); + for (unsigned long i = 0; i < count; i++) { + if (entries[i].sval) + addReplyBulkCBuffer(c, entries[i].sval, entries[i].slen); + else + addReplyBulkLongLong(c, entries[i].lval); + } + zfree(entries); + return; + } + while(count--) { - encoding = setTypeRandomElement(set,&ele,&llele); - if (encoding == OBJ_ENCODING_INTSET) { + setTypeRandomElement(set, &str, &len, &llele); + if (str == NULL) { addReplyBulkLongLong(c,llele); } else { - addReplyBulkCBuffer(c,ele,sdslen(ele)); + addReplyBulkCBuffer(c, str, len); } } return; @@ -712,11 +1014,11 @@ void srandmemberWithCountCommand(client *c) { setTypeIterator *si; addReplyArrayLen(c,size); si = setTypeInitIterator(set); - while ((encoding = setTypeNext(si,&ele,&llele)) != -1) { - if (encoding == OBJ_ENCODING_INTSET) { + while (setTypeNext(si, &str, &len, &llele) != -1) { + if (str == NULL) { addReplyBulkLongLong(c,llele); } else { - addReplyBulkCBuffer(c,ele,sdslen(ele)); + addReplyBulkCBuffer(c, str, len); } size--; } @@ -725,6 +1027,31 @@ void srandmemberWithCountCommand(client *c) { return; } + /* CASE 2.5 listpack only. Sampling unique elements, in non-random order. + * Listpack encoded sets are meant to be relatively small, so + * SRANDMEMBER_SUB_STRATEGY_MUL isn't necessary and we rather not make + * copies of the entries. Instead, we emit them directly to the output + * buffer. */ + if (set->encoding == OBJ_ENCODING_LISTPACK) { + unsigned char *lp = set->ptr; + unsigned char *p = lpFirst(lp); + unsigned int i = 0; + addReplyArrayLen(c, count); + while (count) { + p = lpNextRandom(lp, p, &i, count--, 0); + unsigned int len; + str = (char *)lpGetValue(p, &len, (long long *)&llele); + if (str == NULL) { + addReplyBulkLongLong(c, llele); + } else { + addReplyBulkCBuffer(c, str, len); + } + p = lpNext(lp, p); + i++; + } + return; + } + /* For CASE 3 and CASE 4 we need an auxiliary dictionary. */ d = dictCreate(&sdsReplyDictType); @@ -743,13 +1070,13 @@ void srandmemberWithCountCommand(client *c) { /* Add all the elements into the temporary dictionary. */ si = setTypeInitIterator(set); dictExpand(d, size); - while ((encoding = setTypeNext(si,&ele,&llele)) != -1) { + while (setTypeNext(si, &str, &len, &llele) != -1) { int retval = DICT_ERR; - if (encoding == OBJ_ENCODING_INTSET) { + if (str == NULL) { retval = dictAdd(d,sdsfromlonglong(llele),NULL); } else { - retval = dictAdd(d,sdsdup(ele),NULL); + retval = dictAdd(d, sdsnewlen(str, len), NULL); } serverAssert(retval == DICT_OK); } @@ -777,11 +1104,11 @@ void srandmemberWithCountCommand(client *c) { dictExpand(d, count); while (added < count) { - encoding = setTypeRandomElement(set,&ele,&llele); - if (encoding == OBJ_ENCODING_INTSET) { + setTypeRandomElement(set, &str, &len, &llele); + if (str == NULL) { sdsele = sdsfromlonglong(llele); } else { - sdsele = sdsdup(ele); + sdsele = sdsnewlen(str, len); } /* Try to add the object to the dictionary. If it already exists * free it, otherwise increment the number of objects we have @@ -810,9 +1137,9 @@ void srandmemberWithCountCommand(client *c) { /* SRANDMEMBER [] */ void srandmemberCommand(client *c) { robj *set; - sds ele; + char *str; + size_t len; int64_t llele; - int encoding; if (c->argc == 3) { srandmemberWithCountCommand(c); @@ -826,11 +1153,11 @@ void srandmemberCommand(client *c) { if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL || checkType(c,set,OBJ_SET)) return; - encoding = setTypeRandomElement(set,&ele,&llele); - if (encoding == OBJ_ENCODING_INTSET) { + setTypeRandomElement(set, &str, &len, &llele); + if (str == NULL) { addReplyBulkLongLong(c,llele); } else { - addReplyBulkCBuffer(c,ele,sdslen(ele)); + addReplyBulkCBuffer(c, str, len); } } @@ -866,7 +1193,8 @@ void sinterGenericCommand(client *c, robj **setkeys, robj **sets = zmalloc(sizeof(robj*)*setnum); setTypeIterator *si; robj *dstset = NULL; - sds elesds; + char *str; + size_t len; int64_t intobj; void *replylen = NULL; unsigned long j, cardinality = 0; @@ -918,7 +1246,24 @@ void sinterGenericCommand(client *c, robj **setkeys, if (dstkey) { /* If we have a target key where to store the resulting set * create this key with an empty set inside */ - dstset = createIntsetObject(); + if (sets[0]->encoding == OBJ_ENCODING_INTSET) { + /* The first set is an intset, so the result is an intset too. The + * elements are inserted in ascending order which is efficient in an + * intset. */ + dstset = createIntsetObject(); + } else if (sets[0]->encoding == OBJ_ENCODING_LISTPACK) { + /* To avoid many reallocs, we estimate that the result is a listpack + * of approximately the same size as the first set. Then we shrink + * it or possibly convert it to intset it in the end. */ + unsigned char *lp = lpNew(lpBytes(sets[0]->ptr)); + dstset = createObject(OBJ_SET, lp); + dstset->encoding = OBJ_ENCODING_LISTPACK; + } else { + /* We start off with a listpack, since it's more efficient to append + * to than an intset. Later we can convert it to intset or a + * hashtable. */ + dstset = createSetListpackObject(); + } } else if (!cardinality_only) { replylen = addReplyDeferredLen(c); } @@ -926,32 +1271,14 @@ void sinterGenericCommand(client *c, robj **setkeys, /* Iterate all the elements of the first (smallest) set, and test * the element against all the other sets, if at least one set does * not include the element it is discarded */ + int only_integers = 1; si = setTypeInitIterator(sets[0]); - while((encoding = setTypeNext(si,&elesds,&intobj)) != -1) { + while((encoding = setTypeNext(si, &str, &len, &intobj)) != -1) { for (j = 1; j < setnum; j++) { if (sets[j] == sets[0]) continue; - if (encoding == OBJ_ENCODING_INTSET) { - /* intset with intset is simple... and fast */ - if (sets[j]->encoding == OBJ_ENCODING_INTSET && - !intsetFind((intset*)sets[j]->ptr,intobj)) - { - break; - /* in order to compare an integer with an object we - * have to use the generic function, creating an object - * for this */ - } else if (sets[j]->encoding == OBJ_ENCODING_HT) { - elesds = sdsfromlonglong(intobj); - if (!setTypeIsMember(sets[j],elesds)) { - sdsfree(elesds); - break; - } - sdsfree(elesds); - } - } else if (encoding == OBJ_ENCODING_HT) { - if (!setTypeIsMember(sets[j],elesds)) { - break; - } - } + if (!setTypeIsMemberAux(sets[j], str, len, intobj, + encoding == OBJ_ENCODING_HT)) + break; } /* Only take action when all sets contain the member */ @@ -963,19 +1290,29 @@ void sinterGenericCommand(client *c, robj **setkeys, if (limit && cardinality >= limit) break; } else if (!dstkey) { - if (encoding == OBJ_ENCODING_HT) - addReplyBulkCBuffer(c,elesds,sdslen(elesds)); + if (str != NULL) + addReplyBulkCBuffer(c, str, len); else addReplyBulkLongLong(c,intobj); cardinality++; } else { - if (encoding == OBJ_ENCODING_INTSET) { - elesds = sdsfromlonglong(intobj); - setTypeAdd(dstset,elesds); - sdsfree(elesds); - } else { - setTypeAdd(dstset,elesds); + if (str && only_integers) { + /* It may be an integer although we got it as a string. */ + if (encoding == OBJ_ENCODING_HT && + string2ll(str, len, (long long *)&intobj)) + { + if (dstset->encoding == OBJ_ENCODING_LISTPACK || + dstset->encoding == OBJ_ENCODING_INTSET) + { + /* Adding it as an integer is more efficient. */ + str = NULL; + } + } else { + /* It's not an integer */ + only_integers = 0; + } } + setTypeAddAux(dstset, str, len, intobj, encoding == OBJ_ENCODING_HT); } } } @@ -987,6 +1324,12 @@ void sinterGenericCommand(client *c, robj **setkeys, /* Store the resulting set into the target, if the intersection * is not an empty set. */ if (setTypeSize(dstset) > 0) { + if (only_integers) maybeConvertToIntset(dstset); + if (dstset->encoding == OBJ_ENCODING_LISTPACK) { + /* We allocated too much memory when we created it to avoid + * frequent reallocs. Therefore, we shrink it now. */ + dstset->ptr = lpShrinkToFit(dstset->ptr); + } setKey(c,c->db,dstkey,dstset,0); addReplyLongLong(c,setTypeSize(dstset)); notifyKeyspaceEvent(NOTIFY_SET,"sinterstore", @@ -1054,7 +1397,10 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, robj **sets = zmalloc(sizeof(robj*)*setnum); setTypeIterator *si; robj *dstset = NULL; - sds ele; + char *str; + size_t len; + int64_t llval; + int encoding; int j, cardinality = 0; int diff_algo = 1; int sameset = 0; @@ -1120,9 +1466,8 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, if (!sets[j]) continue; /* non existing keys are like empty sets */ si = setTypeInitIterator(sets[j]); - while((ele = setTypeNextObject(si)) != NULL) { - if (setTypeAdd(dstset,ele)) cardinality++; - sdsfree(ele); + while ((encoding = setTypeNext(si, &str, &len, &llval)) != -1) { + cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT); } setTypeReleaseIterator(si); } @@ -1138,18 +1483,19 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, * This way we perform at max N*M operations, where N is the size of * the first set, and M the number of sets. */ si = setTypeInitIterator(sets[0]); - while((ele = setTypeNextObject(si)) != NULL) { + while ((encoding = setTypeNext(si, &str, &len, &llval)) != -1) { for (j = 1; j < setnum; j++) { if (!sets[j]) continue; /* no key is an empty set. */ if (sets[j] == sets[0]) break; /* same set! */ - if (setTypeIsMember(sets[j],ele)) break; + if (setTypeIsMemberAux(sets[j], str, len, llval, + encoding == OBJ_ENCODING_HT)) + break; } if (j == setnum) { /* There is no other set with this element. Add it. */ - setTypeAdd(dstset,ele); + setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT); cardinality++; } - sdsfree(ele); } setTypeReleaseIterator(si); } else if (op == SET_OP_DIFF && sets[0] && diff_algo == 2) { @@ -1164,13 +1510,14 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, if (!sets[j]) continue; /* non existing keys are like empty sets */ si = setTypeInitIterator(sets[j]); - while((ele = setTypeNextObject(si)) != NULL) { + while((encoding = setTypeNext(si, &str, &len, &llval)) != -1) { if (j == 0) { - if (setTypeAdd(dstset,ele)) cardinality++; + cardinality += setTypeAddAux(dstset, str, len, llval, + encoding == OBJ_ENCODING_HT); } else { - if (setTypeRemove(dstset,ele)) cardinality--; + cardinality -= setTypeRemoveAux(dstset, str, len, llval, + encoding == OBJ_ENCODING_HT); } - sdsfree(ele); } setTypeReleaseIterator(si); @@ -1184,9 +1531,11 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, if (!dstkey) { addReplySetLen(c,cardinality); si = setTypeInitIterator(dstset); - while((ele = setTypeNextObject(si)) != NULL) { - addReplyBulkCBuffer(c,ele,sdslen(ele)); - sdsfree(ele); + while (setTypeNext(si, &str, &len, &llval) != -1) { + if (str) + addReplyBulkCBuffer(c, str, len); + else + addReplyBulkLongLong(c, llval); } setTypeReleaseIterator(si); server.lazyfree_lazy_server_del ? freeObjAsync(NULL, dstset, -1) : diff --git a/src/t_zset.c b/src/t_zset.c index e194860db..ed1830175 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -1971,6 +1971,10 @@ typedef struct { dictIterator *di; dictEntry *de; } ht; + struct { + unsigned char *lp; + unsigned char *p; + } lp; } set; /* Sorted set iterators. */ @@ -2025,6 +2029,9 @@ void zuiInitIterator(zsetopsrc *op) { it->ht.dict = op->subject->ptr; it->ht.di = dictGetIterator(op->subject->ptr); it->ht.de = dictNext(it->ht.di); + } else if (op->encoding == OBJ_ENCODING_LISTPACK) { + it->lp.lp = op->subject->ptr; + it->lp.p = lpFirst(it->lp.lp); } else { serverPanic("Unknown set encoding"); } @@ -2061,6 +2068,8 @@ void zuiClearIterator(zsetopsrc *op) { UNUSED(it); /* skip */ } else if (op->encoding == OBJ_ENCODING_HT) { dictReleaseIterator(it->ht.di); + } else if (op->encoding == OBJ_ENCODING_LISTPACK) { + UNUSED(it); } else { serverPanic("Unknown set encoding"); } @@ -2091,14 +2100,7 @@ unsigned long zuiLength(zsetopsrc *op) { return 0; if (op->type == OBJ_SET) { - if (op->encoding == OBJ_ENCODING_INTSET) { - return intsetLen(op->subject->ptr); - } else if (op->encoding == OBJ_ENCODING_HT) { - dict *ht = op->subject->ptr; - return dictSize(ht); - } else { - serverPanic("Unknown set encoding"); - } + return setTypeSize(op->subject); } else if (op->type == OBJ_ZSET) { if (op->encoding == OBJ_ENCODING_LISTPACK) { return zzlLength(op->subject->ptr); @@ -2144,6 +2146,14 @@ int zuiNext(zsetopsrc *op, zsetopval *val) { /* Move to next element. */ it->ht.de = dictNext(it->ht.di); + } else if (op->encoding == OBJ_ENCODING_LISTPACK) { + if (it->lp.p == NULL) + return 0; + val->estr = lpGetValue(it->lp.p, &val->elen, &val->ell); + val->score = 1.0; + + /* Move to next element. */ + it->lp.p = lpNext(it->lp.lp, it->lp.p); } else { serverPanic("Unknown set encoding"); } diff --git a/tests/unit/keyspace.tcl b/tests/unit/keyspace.tcl index f5f971140..97ffef6bd 100644 --- a/tests/unit/keyspace.tcl +++ b/tests/unit/keyspace.tcl @@ -256,30 +256,27 @@ start_server {tags {"keyspace"}} { assert_equal $digest [debug_digest_value mynewlist{t}] } - test {COPY basic usage for intset set} { - r del set1{t} newset1{t} - r sadd set1{t} 1 2 3 - assert_encoding intset set1{t} - r copy set1{t} newset1{t} - set digest [debug_digest_value set1{t}] - assert_equal $digest [debug_digest_value newset1{t}] - assert_refcount 1 set1{t} - assert_refcount 1 newset1{t} - r del set1{t} - assert_equal $digest [debug_digest_value newset1{t}] - } - - test {COPY basic usage for hashtable set} { - r del set2{t} newset2{t} - r sadd set2{t} 1 2 3 a - assert_encoding hashtable set2{t} - r copy set2{t} newset2{t} - set digest [debug_digest_value set2{t}] - assert_equal $digest [debug_digest_value newset2{t}] - assert_refcount 1 set2{t} - assert_refcount 1 newset2{t} - r del set2{t} - assert_equal $digest [debug_digest_value newset2{t}] + foreach type {intset listpack hashtable} { + test {COPY basic usage for $type set} { + r del set1{t} newset1{t} + r sadd set1{t} 1 2 3 + if {$type ne "intset"} { + r sadd set1{t} a + } + if {$type eq "hashtable"} { + for {set i 4} {$i < 200} {incr i} { + r sadd set1{t} $i + } + } + assert_encoding $type set1{t} + r copy set1{t} newset1{t} + set digest [debug_digest_value set1{t}] + assert_equal $digest [debug_digest_value newset1{t}] + assert_refcount 1 set1{t} + assert_refcount 1 newset1{t} + r del set1{t} + assert_equal $digest [debug_digest_value newset1{t}] + } } test {COPY basic usage for listpack sorted set} { diff --git a/tests/unit/scan.tcl b/tests/unit/scan.tcl index 5dea76be7..45397d7a3 100644 --- a/tests/unit/scan.tcl +++ b/tests/unit/scan.tcl @@ -98,7 +98,7 @@ start_server {tags {"scan network"}} { assert_equal 1000 [llength $keys] } - foreach enc {intset hashtable} { + foreach enc {intset listpack hashtable} { test "SSCAN with encoding $enc" { # Create the Set r del set @@ -107,8 +107,9 @@ start_server {tags {"scan network"}} { } else { set prefix "ele:" } + set count [expr {$enc eq "hashtable" ? 200 : 100}] set elements {} - for {set j 0} {$j < 100} {incr j} { + for {set j 0} {$j < $count} {incr j} { lappend elements ${prefix}${j} } r sadd set {*}$elements @@ -128,7 +129,7 @@ start_server {tags {"scan network"}} { } set keys [lsort -unique $keys] - assert_equal 100 [llength $keys] + assert_equal $count [llength $keys] } } diff --git a/tests/unit/type/set.tcl b/tests/unit/type/set.tcl index 30b6dc5d7..34c2ea76d 100644 --- a/tests/unit/type/set.tcl +++ b/tests/unit/type/set.tcl @@ -2,6 +2,8 @@ start_server { tags {"set"} overrides { "set-max-intset-entries" 512 + "set-max-listpack-entries" 128 + "set-max-listpack-value" 32 } } { proc create_set {key entries} { @@ -9,12 +11,19 @@ start_server { foreach entry $entries { r sadd $key $entry } } - test {SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - regular set} { - create_set myset {foo} - assert_encoding hashtable myset + # Values for initialing sets, per encoding. + array set initelems {listpack {foo} hashtable {foo}} + for {set i 0} {$i < 130} {incr i} { + lappend initelems(hashtable) [format "i%03d" $i] + } + + foreach type {listpack hashtable} { + test "SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - $type" { + create_set myset $initelems($type) + assert_encoding $type myset assert_equal 1 [r sadd myset bar] assert_equal 0 [r sadd myset bar] - assert_equal 2 [r scard myset] + assert_equal [expr [llength $initelems($type)] + 1] [r scard myset] assert_equal 1 [r sismember myset foo] assert_equal 1 [r sismember myset bar] assert_equal 0 [r sismember myset bla] @@ -23,7 +32,8 @@ start_server { assert_equal {1 0} [r smismember myset foo bla] assert_equal {0 1} [r smismember myset bla foo] assert_equal {0} [r smismember myset bla] - assert_equal {bar foo} [lsort [r smembers myset]] + assert_equal "bar $initelems($type)" [lsort [r smembers myset]] + } } test {SADD, SCARD, SISMEMBER, SMISMEMBER, SMEMBERS basics - intset} { @@ -67,15 +77,33 @@ start_server { assert_error WRONGTYPE* {r sadd mylist bar} } - test "SADD a non-integer against an intset" { + test "SADD a non-integer against a small intset" { create_set myset {1 2 3} assert_encoding intset myset assert_equal 1 [r sadd myset a] + assert_encoding listpack myset + } + + test "SADD a non-integer against a large intset" { + create_set myset {0} + for {set i 1} {$i < 130} {incr i} {r sadd myset $i} + assert_encoding intset myset + assert_equal 1 [r sadd myset a] assert_encoding hashtable myset } test "SADD an integer larger than 64 bits" { create_set myset {213244124402402314402033402} + assert_encoding listpack myset + assert_equal 1 [r sismember myset 213244124402402314402033402] + assert_equal {1} [r smismember myset 213244124402402314402033402] + } + + test "SADD an integer larger than 64 bits to a large intset" { + create_set myset {0} + for {set i 1} {$i < 130} {incr i} {r sadd myset $i} + assert_encoding intset myset + r sadd myset 213244124402402314402033402 assert_encoding hashtable myset assert_equal 1 [r sismember myset 213244124402402314402033402] assert_equal {1} [r smismember myset 213244124402402314402033402] @@ -100,25 +128,32 @@ start_server { r del myintset r del myhashset r del mylargeintset + r del mysmallset for {set i 0} {$i < 100} {incr i} { r sadd myintset $i } for {set i 0} {$i < 1280} {incr i} { r sadd mylargeintset $i } + for {set i 0} {$i < 50} {incr i} { r sadd mysmallset [format "i%03d" $i] } for {set i 0} {$i < 256} {incr i} { r sadd myhashset [format "i%03d" $i] } assert_encoding intset myintset assert_encoding hashtable mylargeintset + assert_encoding listpack mysmallset assert_encoding hashtable myhashset r debug reload assert_encoding intset myintset assert_encoding hashtable mylargeintset + assert_encoding listpack mysmallset assert_encoding hashtable myhashset } {} {needs:debug} - test {SREM basics - regular set} { - create_set myset {foo bar ciao} - assert_encoding hashtable myset - assert_equal 0 [r srem myset qux] - assert_equal 1 [r srem myset foo] - assert_equal {bar ciao} [lsort [r smembers myset]] + foreach type {listpack hashtable} { + test {SREM basics - $type} { + create_set myset $initelems($type) + r sadd myset ciao + assert_encoding $type myset + assert_equal 0 [r srem myset qux] + assert_equal 1 [r srem myset ciao] + assert_equal $initelems($type) [lsort [r smembers myset]] + } } test {SREM basics - intset} { @@ -177,7 +212,18 @@ start_server { assert_equal 0 [r sintercard 1 non-existing-key limit 10] } - foreach {type} {hashtable intset} { + foreach {type} {regular intset} { + # Create sets setN{t} where N = 1..5 + if {$type eq "regular"} { + set smallenc listpack + set bigenc hashtable + } else { + set smallenc intset + set bigenc intset + } + # Sets 1, 2 and 4 are big; sets 3 and 5 are small. + array set encoding "1 $bigenc 2 $bigenc 3 $smallenc 4 $bigenc 5 $smallenc" + for {set i 1} {$i <= 5} {incr i} { r del [format "set%d{t}" $i] } @@ -198,7 +244,7 @@ start_server { # while the tests are running -- an extra element is added to every # set that determines its encoding. set large 200 - if {$type eq "hashtable"} { + if {$type eq "regular"} { set large foo } @@ -206,9 +252,9 @@ start_server { r sadd [format "set%d{t}" $i] $large } - test "Generated sets must be encoded as $type" { + test "Generated sets must be encoded correctly - $type" { for {set i 1} {$i <= 5} {incr i} { - assert_encoding $type [format "set%d{t}" $i] + assert_encoding $encoding($i) [format "set%d{t}" $i] } } @@ -225,14 +271,14 @@ start_server { test "SINTERSTORE with two sets - $type" { r sinterstore setres{t} set1{t} set2{t} - assert_encoding $type setres{t} + assert_encoding $smallenc setres{t} assert_equal [list 195 196 197 198 199 $large] [lsort [r smembers setres{t}]] } test "SINTERSTORE with two sets, after a DEBUG RELOAD - $type" { r debug reload r sinterstore setres{t} set1{t} set2{t} - assert_encoding $type setres{t} + assert_encoding $smallenc setres{t} assert_equal [list 195 196 197 198 199 $large] [lsort [r smembers setres{t}]] } {} {needs:debug} @@ -243,7 +289,7 @@ start_server { test "SUNIONSTORE with two sets - $type" { r sunionstore setres{t} set1{t} set2{t} - assert_encoding $type setres{t} + assert_encoding $bigenc setres{t} set expected [lsort -uniq "[r smembers set1{t}] [r smembers set2{t}]"] assert_equal $expected [lsort [r smembers setres{t}]] } @@ -294,6 +340,46 @@ start_server { } } + test "SINTERSTORE with two listpack sets where result is intset" { + r del setres{t} set1{t} set2{t} + r sadd set1{t} a b c 1 3 6 x y z + r sadd set2{t} e f g 1 2 3 u v w + assert_encoding listpack set1{t} + assert_encoding listpack set2{t} + r sinterstore setres{t} set1{t} set2{t} + assert_equal [list 1 3] [lsort [r smembers setres{t}]] + assert_encoding intset setres{t} + } + + test "SINTERSTORE with two hashtable sets where result is intset" { + r del setres{t} set1{t} set2{t} + r sadd set1{t} a b c 444 555 666 + r sadd set2{t} e f g 111 222 333 + set expected {} + for {set i 1} {$i < 130} {incr i} { + r sadd set1{t} $i + r sadd set2{t} $i + lappend expected $i + } + assert_encoding hashtable set1{t} + assert_encoding hashtable set2{t} + r sinterstore setres{t} set1{t} set2{t} + assert_equal [lsort $expected] [lsort [r smembers setres{t}]] + assert_encoding intset setres{t} + } + + test "SUNION hashtable and listpack" { + # This adds code coverage for adding a non-sds string to a hashtable set + # which already contains the string. + r del set1{t} set2{t} + set union {abcdefghijklmnopqrstuvwxyz1234567890 a b c 1 2 3} + create_set set1{t} $union + create_set set2{t} {a b c} + assert_encoding hashtable set1{t} + assert_encoding listpack set2{t} + assert_equal [lsort $union] [lsort [r sunion set1{t} set2{t}]] + } + test "SDIFF with first set empty" { r del set1{t} set2{t} set3{t} r sadd set2{t} 1 2 3 4 @@ -428,7 +514,7 @@ start_server { r sadd set2{t} 1 2 3 a r srem set2{t} a assert_encoding intset set1{t} - assert_encoding hashtable set2{t} + assert_encoding listpack set2{t} lsort [r sinter set1{t} set2{t}] } {1 2 3} @@ -549,7 +635,7 @@ start_server { assert_equal 0 [r exists setres{t}] } - foreach {type contents} {hashtable {a b c} intset {1 2 3}} { + foreach {type contents} {listpack {a b c} intset {1 2 3}} { test "SPOP basics - $type" { create_set myset $contents assert_encoding $type myset @@ -575,11 +661,20 @@ start_server { } } + test "SPOP integer from listpack set" { + create_set myset {a 1 2 3 4 5 6 7} + assert_encoding listpack myset + set a [r spop myset] + set b [r spop myset] + assert {[string is digit $a] || [string is digit $b]} + } + foreach {type contents} { - hashtable {a b c d e f g h i j k l m n o p q r s t u v w x y z} + listpack {a b c d e f g h i j k l m n o p q r s t u v w x y z} intset {1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 26 3 4 5 6 7 8 9} + hashtable {ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 b c d e f g h i j k l m n o p q r s t u v w x y z} } { - test "SPOP with " { + test "SPOP with - $type" { create_set myset $contents assert_encoding $type myset assert_equal $contents [lsort [concat [r spop myset 11] [r spop myset 9] [r spop myset 0] [r spop myset 4] [r spop myset 1] [r spop myset 0] [r spop myset 1] [r spop myset 0]]] @@ -610,16 +705,20 @@ start_server { r spop nonexisting_key 100 } {} - test "SPOP new implementation: code path #1" { - set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20} + foreach {type content} { + intset {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20} + listpack {a 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20} + } { + test "SPOP new implementation: code path #1 $type" { create_set myset $content + assert_encoding $type myset set res [r spop myset 30] assert {[lsort $content] eq [lsort $res]} } - test "SPOP new implementation: code path #2" { - set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20} + test "SPOP new implementation: code path #2 $type" { create_set myset $content + assert_encoding $type myset set res [r spop myset 2] assert {[llength $res] == 2} assert {[r scard myset] == 18} @@ -627,15 +726,16 @@ start_server { assert {[lsort $union] eq [lsort $content]} } - test "SPOP new implementation: code path #3" { - set content {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20} + test "SPOP new implementation: code path #3 $type" { create_set myset $content + assert_encoding $type myset set res [r spop myset 18] assert {[llength $res] == 18} assert {[r scard myset] == 2} set union [concat [r smembers myset] $res] assert {[lsort $union] eq [lsort $content]} } + } test "SRANDMEMBER count of 0 is handled correctly" { r srandmember myset 0 @@ -659,7 +759,7 @@ start_server { r readraw 0 foreach {type contents} { - hashtable { + listpack { 1 5 10 50 125 50000 33959417 4775547 65434162 12098459 427716 483706 2726473884 72615637475 MARY PATRICIA LINDA BARBARA ELIZABETH JENNIFER MARIA @@ -674,9 +774,20 @@ start_server { 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 } + hashtable { + ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 + 1 5 10 50 125 50000 33959417 4775547 65434162 + 12098459 427716 483706 2726473884 72615637475 + MARY PATRICIA LINDA BARBARA ELIZABETH JENNIFER MARIA + SUSAN MARGARET DOROTHY LISA NANCY KAREN BETTY HELEN + SANDRA DONNA CAROL RUTH SHARON MICHELLE LAURA SARAH + KIMBERLY DEBORAH JESSICA SHIRLEY CYNTHIA ANGELA MELISSA + BRENDA AMY ANNA REBECCA VIRGINIA + } } { test "SRANDMEMBER with - $type" { create_set myset $contents + assert_encoding $type myset unset -nocomplain myset array set myset {} foreach ele [r smembers myset] { @@ -767,16 +878,22 @@ start_server { } foreach {type contents} { - hashtable { + listpack { 1 5 10 50 125 MARY PATRICIA LINDA BARBARA ELIZABETH } intset { 0 1 2 3 4 5 6 7 8 9 } + hashtable { + ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 + 1 5 10 50 125 + MARY PATRICIA LINDA BARBARA + } } { test "SRANDMEMBER histogram distribution - $type" { create_set myset $contents + assert_encoding $type myset unset -nocomplain myset array set myset {} foreach ele [r smembers myset] { @@ -809,7 +926,7 @@ start_server { r del myset3{t} myset4{t} create_set myset1{t} {1 a b} create_set myset2{t} {2 3 4} - assert_encoding hashtable myset1{t} + assert_encoding listpack myset1{t} assert_encoding intset myset2{t} } @@ -819,7 +936,7 @@ start_server { assert_equal 1 [r smove myset1{t} myset2{t} a] assert_equal {1 b} [lsort [r smembers myset1{t}]] assert_equal {2 3 4 a} [lsort [r smembers myset2{t}]] - assert_encoding hashtable myset2{t} + assert_encoding listpack myset2{t} # move an integer element should not convert the encoding setup_move @@ -855,7 +972,7 @@ start_server { assert_equal 1 [r smove myset1{t} myset3{t} a] assert_equal {1 b} [lsort [r smembers myset1{t}]] assert_equal {a} [lsort [r smembers myset3{t}]] - assert_encoding hashtable myset3{t} + assert_encoding listpack myset3{t} } test "SMOVE from intset to non existing destination set" {