From 006ab26c373239c05e66e52f98b90d086fb1d1ee Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 22 May 2023 20:48:32 +0800 Subject: [PATCH] Optimize HRANDFIELD and ZRANDMEMBER case 3 when listpack encoded (#12205) Optimized HRANDFIELD and ZRANDMEMBER commands as in #8444, CASE 3 under listpack encoding. Boost optimization to CASE 2.5. CASE 2.5 listpack only. Sampling unique elements, in non-random order. Listpack encoded hashes / zsets are meant to be relatively small, so HRANDFIELD_SUB_STRATEGY_MUL / ZRANDMEMBER_SUB_STRATEGY_MUL isn't necessary and we rather not make copies of the entries. Instead, we emit them directly to the output buffer. Simple benchmarks shows it provides some 400% improvement in HRANDFIELD and ZRANGESTORE both in CASE 3. Unrelated changes: remove useless setTypeRandomElements and fix a typo. --- src/server.h | 1 - src/t_hash.c | 35 +++++++++++++++++++++-------------- src/t_set.c | 7 +++++-- src/t_zset.c | 37 ++++++++++++++++++++++--------------- 4 files changed, 48 insertions(+), 32 deletions(-) diff --git a/src/server.h b/src/server.h index 6fdbe5bfd..e3f9c9729 100644 --- a/src/server.h +++ b/src/server.h @@ -3092,7 +3092,6 @@ void setTypeReleaseIterator(setTypeIterator *si); int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele); sds setTypeNextObject(setTypeIterator *si); int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele); -unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set); unsigned long setTypeSize(const robj *subject); void setTypeConvert(robj *subject, int enc); int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic); diff --git a/src/t_hash.c b/src/t_hash.c index 2d50ca304..b199d8c69 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -1014,6 +1014,26 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { return; } + /* CASE 2.5 listpack only. Sampling unique elements, in non-random order. + * Listpack encoded hashes are meant to be relatively small, so + * HRANDFIELD_SUB_STRATEGY_MUL isn't necessary and we rather not make + * copies of the entries. Instead, we emit them directly to the output + * buffer. + * + * And it is inefficient to repeatedly pick one random element from a + * listpack in CASE 4. So we use this instead. */ + if (hash->encoding == OBJ_ENCODING_LISTPACK) { + listpackEntry *keys, *vals = NULL; + keys = zmalloc(sizeof(listpackEntry)*count); + if (withvalues) + vals = zmalloc(sizeof(listpackEntry)*count); + serverAssert(lpRandomPairsUnique(hash->ptr, count, keys, vals) == count); + hrandfieldReplyWithListpack(c, count, keys, vals); + zfree(keys); + zfree(vals); + return; + } + /* CASE 3: * The number of elements inside the hash is not greater than * HRANDFIELD_SUB_STRATEGY_MUL times the number of requested elements. @@ -1024,6 +1044,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { * a bit less than the number of elements in the hash, the natural approach * used into CASE 4 is highly inefficient. */ if (count*HRANDFIELD_SUB_STRATEGY_MUL > size) { + /* Hashtable encoding (generic implementation) */ dict *d = dictCreate(&sdsReplyDictType); dictExpand(d, size); hashTypeIterator *hi = hashTypeInitIterator(hash); @@ -1077,20 +1098,6 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) { * to the temporary hash, trying to eventually get enough unique elements * to reach the specified count. */ else { - if (hash->encoding == OBJ_ENCODING_LISTPACK) { - /* it is inefficient to repeatedly pick one random element from a - * listpack. so we use this instead: */ - listpackEntry *keys, *vals = NULL; - keys = zmalloc(sizeof(listpackEntry)*count); - if (withvalues) - vals = zmalloc(sizeof(listpackEntry)*count); - serverAssert(lpRandomPairsUnique(hash->ptr, count, keys, vals) == count); - hrandfieldReplyWithListpack(c, count, keys, vals); - zfree(keys); - zfree(vals); - return; - } - /* Hashtable encoding (generic implementation) */ unsigned long added = 0; listpackEntry key, value; diff --git a/src/t_set.c b/src/t_set.c index 7f620daba..fb85d220f 100644 --- a/src/t_set.c +++ b/src/t_set.c @@ -1114,7 +1114,10 @@ void srandmemberWithCountCommand(client *c) { * Listpack encoded sets are meant to be relatively small, so * SRANDMEMBER_SUB_STRATEGY_MUL isn't necessary and we rather not make * copies of the entries. Instead, we emit them directly to the output - * buffer. */ + * buffer. + * + * And it is inefficient to repeatedly pick one random element from a + * listpack in CASE 4. So we use this instead. */ if (set->encoding == OBJ_ENCODING_LISTPACK) { unsigned char *lp = set->ptr; unsigned char *p = lpFirst(lp); @@ -1337,7 +1340,7 @@ void sinterGenericCommand(client *c, robj **setkeys, } else if (sets[0]->encoding == OBJ_ENCODING_LISTPACK) { /* To avoid many reallocs, we estimate that the result is a listpack * of approximately the same size as the first set. Then we shrink - * it or possibly convert it to intset it in the end. */ + * it or possibly convert it to intset in the end. */ unsigned char *lp = lpNew(lpBytes(sets[0]->ptr)); dstset = createObject(OBJ_SET, lp); dstset->encoding = OBJ_ENCODING_LISTPACK; diff --git a/src/t_zset.c b/src/t_zset.c index 7c60a45bd..6f0306c65 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -4243,6 +4243,27 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) { return; } + /* CASE 2.5 listpack only. Sampling unique elements, in non-random order. + * Listpack encoded zsets are meant to be relatively small, so + * ZRANDMEMBER_SUB_STRATEGY_MUL isn't necessary and we rather not make + * copies of the entries. Instead, we emit them directly to the output + * buffer. + * + * And it is inefficient to repeatedly pick one random element from a + * listpack in CASE 4. So we use this instead. */ + if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) { + listpackEntry *keys, *vals = NULL; + keys = zmalloc(sizeof(listpackEntry)*count); + if (withscores) + vals = zmalloc(sizeof(listpackEntry)*count); + serverAssert(lpRandomPairsUnique(zsetobj->ptr, count, keys, vals) == count); + zrandmemberReplyWithListpack(c, count, keys, vals); + zfree(keys); + zfree(vals); + zuiClearIterator(&src); + return; + } + /* CASE 3: * The number of elements inside the zset is not greater than * ZRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements. @@ -4253,6 +4274,7 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) { * a bit less than the number of elements in the set, the natural approach * used into CASE 4 is highly inefficient. */ if (count*ZRANDMEMBER_SUB_STRATEGY_MUL > size) { + /* Hashtable encoding (generic implementation) */ dict *d = dictCreate(&sdsReplyDictType); dictExpand(d, size); /* Add all the elements into the temporary dictionary. */ @@ -4296,21 +4318,6 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) { * to the temporary set, trying to eventually get enough unique elements * to reach the specified count. */ else { - if (zsetobj->encoding == OBJ_ENCODING_LISTPACK) { - /* it is inefficient to repeatedly pick one random element from a - * listpack. so we use this instead: */ - listpackEntry *keys, *vals = NULL; - keys = zmalloc(sizeof(listpackEntry)*count); - if (withscores) - vals = zmalloc(sizeof(listpackEntry)*count); - serverAssert(lpRandomPairsUnique(zsetobj->ptr, count, keys, vals) == count); - zrandmemberReplyWithListpack(c, count, keys, vals); - zfree(keys); - zfree(vals); - zuiClearIterator(&src); - return; - } - /* Hashtable encoding (generic implementation) */ unsigned long added = 0; dict *d = dictCreate(&hashDictType);