Optimize HRANDFIELD and ZRANDMEMBER case 4 when ziplist encoded (#8444)

It is inefficient to repeatedly pick a single random element from a
ziplist.
For CASE4, which is when the user requested a low number of unique
random picks from the collectoin, we used thta pattern.

Now we use a different algorithm that picks unique elements from a
ziplist, and guarentee no duplicate but doesn't provide random order
(which is only needed in the non-unique random picks case)

Unrelated changes:
* change ziplist count and indexes variables to unsigned
* solve compilation warnings about uninitialized vars in gcc 10.2

Co-authored-by: xinluton <xinluton@qq.com>
This commit is contained in:
Oran Agra 2021-02-07 16:55:11 +02:00 committed by GitHub
parent 8bfac022f8
commit 11fe272c1a
4 changed files with 129 additions and 44 deletions

View File

@ -959,6 +959,23 @@ void hscanCommand(client *c) {
scanGenericCommand(c,o,cursor); scanGenericCommand(c,o,cursor);
} }
static void harndfieldReplyWithZiplist(client *c, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
for (unsigned long i = 0; i < count; i++) {
if (vals && c->resp > 2)
addReplyArrayLen(c,2);
if (keys[i].sval)
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
else
addReplyBulkLongLong(c, keys[i].lval);
if (vals) {
if (vals[i].sval)
addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
else
addReplyBulkLongLong(c, vals[i].lval);
}
}
}
/* How many times bigger should be the hash compared to the requested size /* How many times bigger should be the hash compared to the requested size
* for us to not use the "remove elements" strategy? Read later in the * for us to not use the "remove elements" strategy? Read later in the
* implementation for more info. */ * implementation for more info. */
@ -1024,20 +1041,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
sample_count = count > limit ? limit : count; sample_count = count > limit ? limit : count;
count -= sample_count; count -= sample_count;
ziplistRandomPairs(hash->ptr, sample_count, keys, vals); ziplistRandomPairs(hash->ptr, sample_count, keys, vals);
for (unsigned long i = 0; i < sample_count; i++) { harndfieldReplyWithZiplist(c, sample_count, keys, vals);
if (withvalues && c->resp > 2)
addReplyArrayLen(c,2);
if (keys[i].sval)
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
else
addReplyBulkLongLong(c, keys[i].lval);
if (withvalues) {
if (vals[i].sval)
addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
else
addReplyBulkLongLong(c, vals[i].lval);
}
}
} }
zfree(keys); zfree(keys);
zfree(vals); zfree(vals);
@ -1130,6 +1134,21 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
* to the temporary hash, trying to eventually get enough unique elements * to the temporary hash, trying to eventually get enough unique elements
* to reach the specified count. */ * to reach the specified count. */
else { else {
if (hash->encoding == OBJ_ENCODING_ZIPLIST) {
/* it is inefficient to repeatedly pick one random element from a
* ziplist. so we use this instead: */
ziplistEntry *keys, *vals = NULL;
keys = zmalloc(sizeof(ziplistEntry)*count);
if (withvalues)
vals = zmalloc(sizeof(ziplistEntry)*count);
serverAssert(ziplistRandomPairsUnique(hash->ptr, count, keys, vals) == count);
harndfieldReplyWithZiplist(c, count, keys, vals);
zfree(keys);
zfree(vals);
return;
}
/* Hashtable encoding (generic implementation) */
unsigned long added = 0; unsigned long added = 0;
ziplistEntry key, value; ziplistEntry key, value;
dict *d = dictCreate(&hashDictType, NULL); dict *d = dictCreate(&hashDictType, NULL);

View File

@ -3956,6 +3956,23 @@ void bzpopmaxCommand(client *c) {
blockingGenericZpopCommand(c,ZSET_MAX); blockingGenericZpopCommand(c,ZSET_MAX);
} }
static void zarndmemberReplyWithZiplist(client *c, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
for (unsigned long i = 0; i < count; i++) {
if (vals && c->resp > 2)
addReplyArrayLen(c,2);
if (keys[i].sval)
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
else
addReplyBulkLongLong(c, keys[i].lval);
if (vals) {
if (vals[i].sval) {
addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
} else
addReplyDouble(c, vals[i].lval);
}
}
}
/* How many times bigger should be the zset compared to the requested size /* How many times bigger should be the zset compared to the requested size
* for us to not use the "remove elements" strategy? Read later in the * for us to not use the "remove elements" strategy? Read later in the
* implementation for more info. */ * implementation for more info. */
@ -4020,20 +4037,7 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
sample_count = count > limit ? limit : count; sample_count = count > limit ? limit : count;
count -= sample_count; count -= sample_count;
ziplistRandomPairs(zsetobj->ptr, sample_count, keys, vals); ziplistRandomPairs(zsetobj->ptr, sample_count, keys, vals);
for (unsigned long i = 0; i < sample_count; i++) { zarndmemberReplyWithZiplist(c, sample_count, keys, vals);
if (withscores && c->resp > 2)
addReplyArrayLen(c,2);
if (keys[i].sval)
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
else
addReplyBulkLongLong(c, keys[i].lval);
if (withscores) {
if (vals[i].sval) {
addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
} else
addReplyDouble(c, vals[i].lval);
}
}
} }
zfree(keys); zfree(keys);
zfree(vals); zfree(vals);
@ -4122,6 +4126,21 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
* to the temporary set, trying to eventually get enough unique elements * to the temporary set, trying to eventually get enough unique elements
* to reach the specified count. */ * to reach the specified count. */
else { else {
if (zsetobj->encoding == OBJ_ENCODING_ZIPLIST) {
/* it is inefficient to repeatedly pick one random element from a
* ziplist. so we use this instead: */
ziplistEntry *keys, *vals = NULL;
keys = zmalloc(sizeof(ziplistEntry)*count);
if (withscores)
vals = zmalloc(sizeof(ziplistEntry)*count);
serverAssert(ziplistRandomPairsUnique(zsetobj->ptr, count, keys, vals) == count);
zarndmemberReplyWithZiplist(c, count, keys, vals);
zfree(keys);
zfree(vals);
return;
}
/* Hashtable encoding (generic implementation) */
unsigned long added = 0; unsigned long added = 0;
dict *d = dictCreate(&hashDictType, NULL); dict *d = dictCreate(&hashDictType, NULL);

View File

@ -1523,8 +1523,8 @@ void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntr
} }
/* int compare for qsort */ /* int compare for qsort */
int intCompare(const void *a, const void *b) { int uintCompare(const void *a, const void *b) {
return (*(int *) a - *(int *) b); return (*(unsigned int *) a - *(unsigned int *) b);
} }
/* Helper method to store a string into from val or lval into dest */ /* Helper method to store a string into from val or lval into dest */
@ -1534,41 +1534,42 @@ static inline void ziplistSaveValue(unsigned char *val, unsigned int len, long l
dest->lval = lval; dest->lval = lval;
} }
/* Randomly select unique count of key value pairs and store into 'keys' and /* Randomly select count of key value pairs and store into 'keys' and
* 'vals' args. The order of the picked entries is random. * 'vals' args. The order of the picked entries is random, and the selections
* are non-unique (repetitions are possible).
* The 'vals' arg can be NULL in which case we skip these. */ * The 'vals' arg can be NULL in which case we skip these. */
void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals) { void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
unsigned char *p, *key, *value; unsigned char *p, *key, *value;
unsigned int klen, vlen; unsigned int klen = 0, vlen = 0;
long long klval, vlval; long long klval = 0, vlval = 0;
/* Notice: the index member must be first due to the use in intCompare */ /* Notice: the index member must be first due to the use in uintCompare */
typedef struct { typedef struct {
int index; unsigned int index;
int order; unsigned int order;
} rand_pick; } rand_pick;
rand_pick *picks = zmalloc(sizeof(rand_pick)*count); rand_pick *picks = zmalloc(sizeof(rand_pick)*count);
unsigned long total_size = ziplistLen(zl)/2; unsigned int total_size = ziplistLen(zl)/2;
/* Avoid div by zero on corrupt ziplist */ /* Avoid div by zero on corrupt ziplist */
assert(total_size); assert(total_size);
/* create a pool of random indexes (some may be duplicate). */ /* create a pool of random indexes (some may be duplicate). */
for (int i = 0; i < count; i++) { for (unsigned int i = 0; i < count; i++) {
picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */ picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */
/* keep track of the order we picked them */ /* keep track of the order we picked them */
picks[i].order = i; picks[i].order = i;
} }
/* sort by indexes. */ /* sort by indexes. */
qsort(picks, count, sizeof(rand_pick), intCompare); qsort(picks, count, sizeof(rand_pick), uintCompare);
/* fetch the elements form the ziplist into a output array respecting the original order. */ /* fetch the elements form the ziplist into a output array respecting the original order. */
int zipindex = 0, pickindex = 0; unsigned int zipindex = 0, pickindex = 0;
p = ziplistIndex(zl, 0); p = ziplistIndex(zl, 0);
while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) { while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) {
p = ziplistNext(zl, p); p = ziplistNext(zl, p);
ziplistGet(p, &value, &vlen, &vlval); assert(ziplistGet(p, &value, &vlen, &vlval));
while (pickindex < count && zipindex == picks[pickindex].index) { while (pickindex < count && zipindex == picks[pickindex].index) {
int storeorder = picks[pickindex].order; int storeorder = picks[pickindex].order;
ziplistSaveValue(key, klen, klval, &keys[storeorder]); ziplistSaveValue(key, klen, klval, &keys[storeorder]);
@ -1583,6 +1584,51 @@ void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplis
zfree(picks); zfree(picks);
} }
/* Randomly select count of key value pairs and store into 'keys' and
* 'vals' args. The selections are unique (no repetitions), and the order of
* the picked entries is NOT-random.
* The 'vals' arg can be NULL in which case we skip these.
* The return value is the number of items picked which can be lower than the
* requested count if the ziplist doesn't hold enough pairs. */
unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
unsigned char *p, *key;
unsigned int klen = 0;
long long klval = 0;
unsigned int total_size = ziplistLen(zl)/2;
unsigned int index = 0;
if (count > total_size)
count = total_size;
/* To only iterate once, every time we try to pick a member, the probability
* we pick it is the quotient of the count left we want to pick and the
* count still we haven't visited in the dict, this way, we could make every
* member be equally picked.*/
p = ziplistIndex(zl, 0);
unsigned int picked = 0, remaining = count;
while (picked < count && p) {
double randomDouble = ((double)rand()) / RAND_MAX;
double threshold = ((double)remaining) / (total_size - index);
if(randomDouble < threshold){
assert(ziplistGet(p, &key, &klen, &klval));
ziplistSaveValue(key, klen, klval, &keys[picked]);
p = ziplistNext(zl, p);
assert(p);
if (vals) {
assert(ziplistGet(p, &key, &klen, &klval));
ziplistSaveValue(key, klen, klval, &vals[picked]);
}
remaining--;
picked++;
} else {
p = ziplistNext(zl, p);
assert(p);
}
p = ziplistNext(zl, p);
index++;
}
return picked;
}
#ifdef REDIS_TEST #ifdef REDIS_TEST
#include <sys/time.h> #include <sys/time.h>
#include "adlist.h" #include "adlist.h"

View File

@ -62,7 +62,8 @@ typedef int (*ziplistValidateEntryCB)(unsigned char* p, void* userdata);
int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep, int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
ziplistValidateEntryCB entry_cb, void *cb_userdata); ziplistValidateEntryCB entry_cb, void *cb_userdata);
void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val); void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val);
void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals); void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);
unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);
#ifdef REDIS_TEST #ifdef REDIS_TEST
int ziplistTest(int argc, char *argv[]); int ziplistTest(int argc, char *argv[]);