Optimize HRANDFIELD and ZRANDMEMBER case 4 when ziplist encoded (#8444)

It is inefficient to repeatedly pick a single random element from a
ziplist.
For CASE4, which is when the user requested a low number of unique
random picks from the collectoin, we used thta pattern.

Now we use a different algorithm that picks unique elements from a
ziplist, and guarentee no duplicate but doesn't provide random order
(which is only needed in the non-unique random picks case)

Unrelated changes:
* change ziplist count and indexes variables to unsigned
* solve compilation warnings about uninitialized vars in gcc 10.2

Co-authored-by: xinluton <xinluton@qq.com>
This commit is contained in:
Oran Agra 2021-02-07 16:55:11 +02:00 committed by GitHub
parent 02ab14cc2e
commit 62b1f32062
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 129 additions and 44 deletions

View File

@ -959,6 +959,23 @@ void hscanCommand(client *c) {
scanGenericCommand(c,o,cursor);
}
static void harndfieldReplyWithZiplist(client *c, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
for (unsigned long i = 0; i < count; i++) {
if (vals && c->resp > 2)
addReplyArrayLen(c,2);
if (keys[i].sval)
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
else
addReplyBulkLongLong(c, keys[i].lval);
if (vals) {
if (vals[i].sval)
addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
else
addReplyBulkLongLong(c, vals[i].lval);
}
}
}
/* How many times bigger should be the hash compared to the requested size
* for us to not use the "remove elements" strategy? Read later in the
* implementation for more info. */
@ -1024,20 +1041,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
sample_count = count > limit ? limit : count;
count -= sample_count;
ziplistRandomPairs(hash->ptr, sample_count, keys, vals);
for (unsigned long i = 0; i < sample_count; i++) {
if (withvalues && c->resp > 2)
addReplyArrayLen(c,2);
if (keys[i].sval)
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
else
addReplyBulkLongLong(c, keys[i].lval);
if (withvalues) {
if (vals[i].sval)
addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
else
addReplyBulkLongLong(c, vals[i].lval);
}
}
harndfieldReplyWithZiplist(c, sample_count, keys, vals);
}
zfree(keys);
zfree(vals);
@ -1130,6 +1134,21 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
* to the temporary hash, trying to eventually get enough unique elements
* to reach the specified count. */
else {
if (hash->encoding == OBJ_ENCODING_ZIPLIST) {
/* it is inefficient to repeatedly pick one random element from a
* ziplist. so we use this instead: */
ziplistEntry *keys, *vals = NULL;
keys = zmalloc(sizeof(ziplistEntry)*count);
if (withvalues)
vals = zmalloc(sizeof(ziplistEntry)*count);
serverAssert(ziplistRandomPairsUnique(hash->ptr, count, keys, vals) == count);
harndfieldReplyWithZiplist(c, count, keys, vals);
zfree(keys);
zfree(vals);
return;
}
/* Hashtable encoding (generic implementation) */
unsigned long added = 0;
ziplistEntry key, value;
dict *d = dictCreate(&hashDictType, NULL);

View File

@ -3956,6 +3956,23 @@ void bzpopmaxCommand(client *c) {
blockingGenericZpopCommand(c,ZSET_MAX);
}
static void zarndmemberReplyWithZiplist(client *c, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
for (unsigned long i = 0; i < count; i++) {
if (vals && c->resp > 2)
addReplyArrayLen(c,2);
if (keys[i].sval)
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
else
addReplyBulkLongLong(c, keys[i].lval);
if (vals) {
if (vals[i].sval) {
addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
} else
addReplyDouble(c, vals[i].lval);
}
}
}
/* How many times bigger should be the zset compared to the requested size
* for us to not use the "remove elements" strategy? Read later in the
* implementation for more info. */
@ -4020,20 +4037,7 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
sample_count = count > limit ? limit : count;
count -= sample_count;
ziplistRandomPairs(zsetobj->ptr, sample_count, keys, vals);
for (unsigned long i = 0; i < sample_count; i++) {
if (withscores && c->resp > 2)
addReplyArrayLen(c,2);
if (keys[i].sval)
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
else
addReplyBulkLongLong(c, keys[i].lval);
if (withscores) {
if (vals[i].sval) {
addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
} else
addReplyDouble(c, vals[i].lval);
}
}
zarndmemberReplyWithZiplist(c, sample_count, keys, vals);
}
zfree(keys);
zfree(vals);
@ -4122,6 +4126,21 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
* to the temporary set, trying to eventually get enough unique elements
* to reach the specified count. */
else {
if (zsetobj->encoding == OBJ_ENCODING_ZIPLIST) {
/* it is inefficient to repeatedly pick one random element from a
* ziplist. so we use this instead: */
ziplistEntry *keys, *vals = NULL;
keys = zmalloc(sizeof(ziplistEntry)*count);
if (withscores)
vals = zmalloc(sizeof(ziplistEntry)*count);
serverAssert(ziplistRandomPairsUnique(zsetobj->ptr, count, keys, vals) == count);
zarndmemberReplyWithZiplist(c, count, keys, vals);
zfree(keys);
zfree(vals);
return;
}
/* Hashtable encoding (generic implementation) */
unsigned long added = 0;
dict *d = dictCreate(&hashDictType, NULL);

View File

@ -1523,8 +1523,8 @@ void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntr
}
/* int compare for qsort */
int intCompare(const void *a, const void *b) {
return (*(int *) a - *(int *) b);
int uintCompare(const void *a, const void *b) {
return (*(unsigned int *) a - *(unsigned int *) b);
}
/* Helper method to store a string into from val or lval into dest */
@ -1534,41 +1534,42 @@ static inline void ziplistSaveValue(unsigned char *val, unsigned int len, long l
dest->lval = lval;
}
/* Randomly select unique count of key value pairs and store into 'keys' and
* 'vals' args. The order of the picked entries is random.
/* Randomly select count of key value pairs and store into 'keys' and
* 'vals' args. The order of the picked entries is random, and the selections
* are non-unique (repetitions are possible).
* The 'vals' arg can be NULL in which case we skip these. */
void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals) {
void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
unsigned char *p, *key, *value;
unsigned int klen, vlen;
long long klval, vlval;
unsigned int klen = 0, vlen = 0;
long long klval = 0, vlval = 0;
/* Notice: the index member must be first due to the use in intCompare */
/* Notice: the index member must be first due to the use in uintCompare */
typedef struct {
int index;
int order;
unsigned int index;
unsigned int order;
} rand_pick;
rand_pick *picks = zmalloc(sizeof(rand_pick)*count);
unsigned long total_size = ziplistLen(zl)/2;
unsigned int total_size = ziplistLen(zl)/2;
/* Avoid div by zero on corrupt ziplist */
assert(total_size);
/* create a pool of random indexes (some may be duplicate). */
for (int i = 0; i < count; i++) {
for (unsigned int i = 0; i < count; i++) {
picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */
/* keep track of the order we picked them */
picks[i].order = i;
}
/* sort by indexes. */
qsort(picks, count, sizeof(rand_pick), intCompare);
qsort(picks, count, sizeof(rand_pick), uintCompare);
/* fetch the elements form the ziplist into a output array respecting the original order. */
int zipindex = 0, pickindex = 0;
unsigned int zipindex = 0, pickindex = 0;
p = ziplistIndex(zl, 0);
while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) {
p = ziplistNext(zl, p);
ziplistGet(p, &value, &vlen, &vlval);
assert(ziplistGet(p, &value, &vlen, &vlval));
while (pickindex < count && zipindex == picks[pickindex].index) {
int storeorder = picks[pickindex].order;
ziplistSaveValue(key, klen, klval, &keys[storeorder]);
@ -1583,6 +1584,51 @@ void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplis
zfree(picks);
}
/* Randomly select count of key value pairs and store into 'keys' and
* 'vals' args. The selections are unique (no repetitions), and the order of
* the picked entries is NOT-random.
* The 'vals' arg can be NULL in which case we skip these.
* The return value is the number of items picked which can be lower than the
* requested count if the ziplist doesn't hold enough pairs. */
unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
unsigned char *p, *key;
unsigned int klen = 0;
long long klval = 0;
unsigned int total_size = ziplistLen(zl)/2;
unsigned int index = 0;
if (count > total_size)
count = total_size;
/* To only iterate once, every time we try to pick a member, the probability
* we pick it is the quotient of the count left we want to pick and the
* count still we haven't visited in the dict, this way, we could make every
* member be equally picked.*/
p = ziplistIndex(zl, 0);
unsigned int picked = 0, remaining = count;
while (picked < count && p) {
double randomDouble = ((double)rand()) / RAND_MAX;
double threshold = ((double)remaining) / (total_size - index);
if(randomDouble < threshold){
assert(ziplistGet(p, &key, &klen, &klval));
ziplistSaveValue(key, klen, klval, &keys[picked]);
p = ziplistNext(zl, p);
assert(p);
if (vals) {
assert(ziplistGet(p, &key, &klen, &klval));
ziplistSaveValue(key, klen, klval, &vals[picked]);
}
remaining--;
picked++;
} else {
p = ziplistNext(zl, p);
assert(p);
}
p = ziplistNext(zl, p);
index++;
}
return picked;
}
#ifdef REDIS_TEST
#include <sys/time.h>
#include "adlist.h"

View File

@ -62,7 +62,8 @@ typedef int (*ziplistValidateEntryCB)(unsigned char* p, void* userdata);
int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
ziplistValidateEntryCB entry_cb, void *cb_userdata);
void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val);
void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals);
void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);
unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);
#ifdef REDIS_TEST
int ziplistTest(int argc, char *argv[]);