Optimize HRANDFIELD and ZRANDMEMBER case 4 when ziplist encoded (#8444)
It is inefficient to repeatedly pick a single random element from a ziplist. For CASE4, which is when the user requested a low number of unique random picks from the collectoin, we used thta pattern. Now we use a different algorithm that picks unique elements from a ziplist, and guarentee no duplicate but doesn't provide random order (which is only needed in the non-unique random picks case) Unrelated changes: * change ziplist count and indexes variables to unsigned * solve compilation warnings about uninitialized vars in gcc 10.2 Co-authored-by: xinluton <xinluton@qq.com>
This commit is contained in:
parent
02ab14cc2e
commit
62b1f32062
47
src/t_hash.c
47
src/t_hash.c
@ -959,6 +959,23 @@ void hscanCommand(client *c) {
|
||||
scanGenericCommand(c,o,cursor);
|
||||
}
|
||||
|
||||
static void harndfieldReplyWithZiplist(client *c, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
|
||||
for (unsigned long i = 0; i < count; i++) {
|
||||
if (vals && c->resp > 2)
|
||||
addReplyArrayLen(c,2);
|
||||
if (keys[i].sval)
|
||||
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
|
||||
else
|
||||
addReplyBulkLongLong(c, keys[i].lval);
|
||||
if (vals) {
|
||||
if (vals[i].sval)
|
||||
addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
|
||||
else
|
||||
addReplyBulkLongLong(c, vals[i].lval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* How many times bigger should be the hash compared to the requested size
|
||||
* for us to not use the "remove elements" strategy? Read later in the
|
||||
* implementation for more info. */
|
||||
@ -1024,20 +1041,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
|
||||
sample_count = count > limit ? limit : count;
|
||||
count -= sample_count;
|
||||
ziplistRandomPairs(hash->ptr, sample_count, keys, vals);
|
||||
for (unsigned long i = 0; i < sample_count; i++) {
|
||||
if (withvalues && c->resp > 2)
|
||||
addReplyArrayLen(c,2);
|
||||
if (keys[i].sval)
|
||||
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
|
||||
else
|
||||
addReplyBulkLongLong(c, keys[i].lval);
|
||||
if (withvalues) {
|
||||
if (vals[i].sval)
|
||||
addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
|
||||
else
|
||||
addReplyBulkLongLong(c, vals[i].lval);
|
||||
}
|
||||
}
|
||||
harndfieldReplyWithZiplist(c, sample_count, keys, vals);
|
||||
}
|
||||
zfree(keys);
|
||||
zfree(vals);
|
||||
@ -1130,6 +1134,21 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
|
||||
* to the temporary hash, trying to eventually get enough unique elements
|
||||
* to reach the specified count. */
|
||||
else {
|
||||
if (hash->encoding == OBJ_ENCODING_ZIPLIST) {
|
||||
/* it is inefficient to repeatedly pick one random element from a
|
||||
* ziplist. so we use this instead: */
|
||||
ziplistEntry *keys, *vals = NULL;
|
||||
keys = zmalloc(sizeof(ziplistEntry)*count);
|
||||
if (withvalues)
|
||||
vals = zmalloc(sizeof(ziplistEntry)*count);
|
||||
serverAssert(ziplistRandomPairsUnique(hash->ptr, count, keys, vals) == count);
|
||||
harndfieldReplyWithZiplist(c, count, keys, vals);
|
||||
zfree(keys);
|
||||
zfree(vals);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Hashtable encoding (generic implementation) */
|
||||
unsigned long added = 0;
|
||||
ziplistEntry key, value;
|
||||
dict *d = dictCreate(&hashDictType, NULL);
|
||||
|
47
src/t_zset.c
47
src/t_zset.c
@ -3956,6 +3956,23 @@ void bzpopmaxCommand(client *c) {
|
||||
blockingGenericZpopCommand(c,ZSET_MAX);
|
||||
}
|
||||
|
||||
static void zarndmemberReplyWithZiplist(client *c, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
|
||||
for (unsigned long i = 0; i < count; i++) {
|
||||
if (vals && c->resp > 2)
|
||||
addReplyArrayLen(c,2);
|
||||
if (keys[i].sval)
|
||||
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
|
||||
else
|
||||
addReplyBulkLongLong(c, keys[i].lval);
|
||||
if (vals) {
|
||||
if (vals[i].sval) {
|
||||
addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
|
||||
} else
|
||||
addReplyDouble(c, vals[i].lval);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* How many times bigger should be the zset compared to the requested size
|
||||
* for us to not use the "remove elements" strategy? Read later in the
|
||||
* implementation for more info. */
|
||||
@ -4020,20 +4037,7 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
|
||||
sample_count = count > limit ? limit : count;
|
||||
count -= sample_count;
|
||||
ziplistRandomPairs(zsetobj->ptr, sample_count, keys, vals);
|
||||
for (unsigned long i = 0; i < sample_count; i++) {
|
||||
if (withscores && c->resp > 2)
|
||||
addReplyArrayLen(c,2);
|
||||
if (keys[i].sval)
|
||||
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
|
||||
else
|
||||
addReplyBulkLongLong(c, keys[i].lval);
|
||||
if (withscores) {
|
||||
if (vals[i].sval) {
|
||||
addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
|
||||
} else
|
||||
addReplyDouble(c, vals[i].lval);
|
||||
}
|
||||
}
|
||||
zarndmemberReplyWithZiplist(c, sample_count, keys, vals);
|
||||
}
|
||||
zfree(keys);
|
||||
zfree(vals);
|
||||
@ -4122,6 +4126,21 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
|
||||
* to the temporary set, trying to eventually get enough unique elements
|
||||
* to reach the specified count. */
|
||||
else {
|
||||
if (zsetobj->encoding == OBJ_ENCODING_ZIPLIST) {
|
||||
/* it is inefficient to repeatedly pick one random element from a
|
||||
* ziplist. so we use this instead: */
|
||||
ziplistEntry *keys, *vals = NULL;
|
||||
keys = zmalloc(sizeof(ziplistEntry)*count);
|
||||
if (withscores)
|
||||
vals = zmalloc(sizeof(ziplistEntry)*count);
|
||||
serverAssert(ziplistRandomPairsUnique(zsetobj->ptr, count, keys, vals) == count);
|
||||
zarndmemberReplyWithZiplist(c, count, keys, vals);
|
||||
zfree(keys);
|
||||
zfree(vals);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Hashtable encoding (generic implementation) */
|
||||
unsigned long added = 0;
|
||||
dict *d = dictCreate(&hashDictType, NULL);
|
||||
|
||||
|
@ -1523,8 +1523,8 @@ void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntr
|
||||
}
|
||||
|
||||
/* int compare for qsort */
|
||||
int intCompare(const void *a, const void *b) {
|
||||
return (*(int *) a - *(int *) b);
|
||||
int uintCompare(const void *a, const void *b) {
|
||||
return (*(unsigned int *) a - *(unsigned int *) b);
|
||||
}
|
||||
|
||||
/* Helper method to store a string into from val or lval into dest */
|
||||
@ -1534,41 +1534,42 @@ static inline void ziplistSaveValue(unsigned char *val, unsigned int len, long l
|
||||
dest->lval = lval;
|
||||
}
|
||||
|
||||
/* Randomly select unique count of key value pairs and store into 'keys' and
|
||||
* 'vals' args. The order of the picked entries is random.
|
||||
/* Randomly select count of key value pairs and store into 'keys' and
|
||||
* 'vals' args. The order of the picked entries is random, and the selections
|
||||
* are non-unique (repetitions are possible).
|
||||
* The 'vals' arg can be NULL in which case we skip these. */
|
||||
void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals) {
|
||||
void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
|
||||
unsigned char *p, *key, *value;
|
||||
unsigned int klen, vlen;
|
||||
long long klval, vlval;
|
||||
unsigned int klen = 0, vlen = 0;
|
||||
long long klval = 0, vlval = 0;
|
||||
|
||||
/* Notice: the index member must be first due to the use in intCompare */
|
||||
/* Notice: the index member must be first due to the use in uintCompare */
|
||||
typedef struct {
|
||||
int index;
|
||||
int order;
|
||||
unsigned int index;
|
||||
unsigned int order;
|
||||
} rand_pick;
|
||||
rand_pick *picks = zmalloc(sizeof(rand_pick)*count);
|
||||
unsigned long total_size = ziplistLen(zl)/2;
|
||||
unsigned int total_size = ziplistLen(zl)/2;
|
||||
|
||||
/* Avoid div by zero on corrupt ziplist */
|
||||
assert(total_size);
|
||||
|
||||
/* create a pool of random indexes (some may be duplicate). */
|
||||
for (int i = 0; i < count; i++) {
|
||||
for (unsigned int i = 0; i < count; i++) {
|
||||
picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */
|
||||
/* keep track of the order we picked them */
|
||||
picks[i].order = i;
|
||||
}
|
||||
|
||||
/* sort by indexes. */
|
||||
qsort(picks, count, sizeof(rand_pick), intCompare);
|
||||
qsort(picks, count, sizeof(rand_pick), uintCompare);
|
||||
|
||||
/* fetch the elements form the ziplist into a output array respecting the original order. */
|
||||
int zipindex = 0, pickindex = 0;
|
||||
unsigned int zipindex = 0, pickindex = 0;
|
||||
p = ziplistIndex(zl, 0);
|
||||
while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) {
|
||||
p = ziplistNext(zl, p);
|
||||
ziplistGet(p, &value, &vlen, &vlval);
|
||||
assert(ziplistGet(p, &value, &vlen, &vlval));
|
||||
while (pickindex < count && zipindex == picks[pickindex].index) {
|
||||
int storeorder = picks[pickindex].order;
|
||||
ziplistSaveValue(key, klen, klval, &keys[storeorder]);
|
||||
@ -1583,6 +1584,51 @@ void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplis
|
||||
zfree(picks);
|
||||
}
|
||||
|
||||
/* Randomly select count of key value pairs and store into 'keys' and
|
||||
* 'vals' args. The selections are unique (no repetitions), and the order of
|
||||
* the picked entries is NOT-random.
|
||||
* The 'vals' arg can be NULL in which case we skip these.
|
||||
* The return value is the number of items picked which can be lower than the
|
||||
* requested count if the ziplist doesn't hold enough pairs. */
|
||||
unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
|
||||
unsigned char *p, *key;
|
||||
unsigned int klen = 0;
|
||||
long long klval = 0;
|
||||
unsigned int total_size = ziplistLen(zl)/2;
|
||||
unsigned int index = 0;
|
||||
if (count > total_size)
|
||||
count = total_size;
|
||||
|
||||
/* To only iterate once, every time we try to pick a member, the probability
|
||||
* we pick it is the quotient of the count left we want to pick and the
|
||||
* count still we haven't visited in the dict, this way, we could make every
|
||||
* member be equally picked.*/
|
||||
p = ziplistIndex(zl, 0);
|
||||
unsigned int picked = 0, remaining = count;
|
||||
while (picked < count && p) {
|
||||
double randomDouble = ((double)rand()) / RAND_MAX;
|
||||
double threshold = ((double)remaining) / (total_size - index);
|
||||
if(randomDouble < threshold){
|
||||
assert(ziplistGet(p, &key, &klen, &klval));
|
||||
ziplistSaveValue(key, klen, klval, &keys[picked]);
|
||||
p = ziplistNext(zl, p);
|
||||
assert(p);
|
||||
if (vals) {
|
||||
assert(ziplistGet(p, &key, &klen, &klval));
|
||||
ziplistSaveValue(key, klen, klval, &vals[picked]);
|
||||
}
|
||||
remaining--;
|
||||
picked++;
|
||||
} else {
|
||||
p = ziplistNext(zl, p);
|
||||
assert(p);
|
||||
}
|
||||
p = ziplistNext(zl, p);
|
||||
index++;
|
||||
}
|
||||
return picked;
|
||||
}
|
||||
|
||||
#ifdef REDIS_TEST
|
||||
#include <sys/time.h>
|
||||
#include "adlist.h"
|
||||
|
@ -62,7 +62,8 @@ typedef int (*ziplistValidateEntryCB)(unsigned char* p, void* userdata);
|
||||
int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
|
||||
ziplistValidateEntryCB entry_cb, void *cb_userdata);
|
||||
void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val);
|
||||
void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals);
|
||||
void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);
|
||||
unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);
|
||||
|
||||
#ifdef REDIS_TEST
|
||||
int ziplistTest(int argc, char *argv[]);
|
||||
|
Loading…
x
Reference in New Issue
Block a user