From 62b1f32062b8f688179a8262959a5b80d0ad4de7 Mon Sep 17 00:00:00 2001
From: Oran Agra <oran@redislabs.com>
Date: Sun, 7 Feb 2021 16:55:11 +0200
Subject: [PATCH] Optimize HRANDFIELD and ZRANDMEMBER case 4 when ziplist
 encoded (#8444)

It is inefficient to repeatedly pick a single random element from a
ziplist.
For CASE4, which is when the user requested a low number of unique
random picks from the collectoin, we used thta pattern.

Now we use a different algorithm that picks unique elements from a
ziplist, and guarentee no duplicate but doesn't provide random order
(which is only needed in the non-unique random picks case)

Unrelated changes:
* change ziplist count and indexes variables to unsigned
* solve compilation warnings about uninitialized vars in gcc 10.2

Co-authored-by: xinluton <xinluton@qq.com>
---
 src/t_hash.c  | 47 +++++++++++++++++++++----------
 src/t_zset.c  | 47 +++++++++++++++++++++----------
 src/ziplist.c | 76 +++++++++++++++++++++++++++++++++++++++++----------
 src/ziplist.h |  3 +-
 4 files changed, 129 insertions(+), 44 deletions(-)

diff --git a/src/t_hash.c b/src/t_hash.c
index 98ba69df7..082e0f129 100644
--- a/src/t_hash.c
+++ b/src/t_hash.c
@@ -959,6 +959,23 @@ void hscanCommand(client *c) {
     scanGenericCommand(c,o,cursor);
 }
 
+static void harndfieldReplyWithZiplist(client *c, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
+    for (unsigned long i = 0; i < count; i++) {
+        if (vals && c->resp > 2)
+            addReplyArrayLen(c,2);
+        if (keys[i].sval)
+            addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
+        else
+            addReplyBulkLongLong(c, keys[i].lval);
+        if (vals) {
+            if (vals[i].sval)
+                addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
+            else
+                addReplyBulkLongLong(c, vals[i].lval);
+        }
+    }
+}
+
 /* How many times bigger should be the hash compared to the requested size
  * for us to not use the "remove elements" strategy? Read later in the
  * implementation for more info. */
@@ -1024,20 +1041,7 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
                 sample_count = count > limit ? limit : count;
                 count -= sample_count;
                 ziplistRandomPairs(hash->ptr, sample_count, keys, vals);
-                for (unsigned long i = 0; i < sample_count; i++) {
-                    if (withvalues && c->resp > 2)
-                        addReplyArrayLen(c,2);
-                    if (keys[i].sval)
-                        addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
-                    else
-                        addReplyBulkLongLong(c, keys[i].lval);
-                    if (withvalues) {
-                        if (vals[i].sval)
-                            addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
-                        else
-                            addReplyBulkLongLong(c, vals[i].lval);
-                    }
-                }
+                harndfieldReplyWithZiplist(c, sample_count, keys, vals);
             }
             zfree(keys);
             zfree(vals);
@@ -1130,6 +1134,21 @@ void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
      * to the temporary hash, trying to eventually get enough unique elements
      * to reach the specified count. */
     else {
+        if (hash->encoding == OBJ_ENCODING_ZIPLIST) {
+            /* it is inefficient to repeatedly pick one random element from a
+             * ziplist. so we use this instead: */
+            ziplistEntry *keys, *vals = NULL;
+            keys = zmalloc(sizeof(ziplistEntry)*count);
+            if (withvalues)
+                vals = zmalloc(sizeof(ziplistEntry)*count);
+            serverAssert(ziplistRandomPairsUnique(hash->ptr, count, keys, vals) == count);
+            harndfieldReplyWithZiplist(c, count, keys, vals);
+            zfree(keys);
+            zfree(vals);
+            return;
+        }
+
+        /* Hashtable encoding (generic implementation) */
         unsigned long added = 0;
         ziplistEntry key, value;
         dict *d = dictCreate(&hashDictType, NULL);
diff --git a/src/t_zset.c b/src/t_zset.c
index c14a37263..869cf6c9a 100644
--- a/src/t_zset.c
+++ b/src/t_zset.c
@@ -3956,6 +3956,23 @@ void bzpopmaxCommand(client *c) {
     blockingGenericZpopCommand(c,ZSET_MAX);
 }
 
+static void zarndmemberReplyWithZiplist(client *c, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
+    for (unsigned long i = 0; i < count; i++) {
+        if (vals && c->resp > 2)
+            addReplyArrayLen(c,2);
+        if (keys[i].sval)
+            addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
+        else
+            addReplyBulkLongLong(c, keys[i].lval);
+        if (vals) {
+            if (vals[i].sval) {
+                addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
+            } else
+                addReplyDouble(c, vals[i].lval);
+        }
+    }
+}
+
 /* How many times bigger should be the zset compared to the requested size
  * for us to not use the "remove elements" strategy? Read later in the
  * implementation for more info. */
@@ -4020,20 +4037,7 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
                 sample_count = count > limit ? limit : count;
                 count -= sample_count;
                 ziplistRandomPairs(zsetobj->ptr, sample_count, keys, vals);
-                for (unsigned long i = 0; i < sample_count; i++) {
-                    if (withscores && c->resp > 2)
-                        addReplyArrayLen(c,2);
-                    if (keys[i].sval)
-                        addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
-                    else
-                        addReplyBulkLongLong(c, keys[i].lval);
-                    if (withscores) {
-                        if (vals[i].sval) {
-                            addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
-                        } else
-                            addReplyDouble(c, vals[i].lval);
-                    }
-                }
+                zarndmemberReplyWithZiplist(c, sample_count, keys, vals);
             }
             zfree(keys);
             zfree(vals);
@@ -4122,6 +4126,21 @@ void zrandmemberWithCountCommand(client *c, long l, int withscores) {
      * to the temporary set, trying to eventually get enough unique elements
      * to reach the specified count. */
     else {
+        if (zsetobj->encoding == OBJ_ENCODING_ZIPLIST) {
+            /* it is inefficient to repeatedly pick one random element from a
+             * ziplist. so we use this instead: */
+            ziplistEntry *keys, *vals = NULL;
+            keys = zmalloc(sizeof(ziplistEntry)*count);
+            if (withscores)
+                vals = zmalloc(sizeof(ziplistEntry)*count);
+            serverAssert(ziplistRandomPairsUnique(zsetobj->ptr, count, keys, vals) == count);
+            zarndmemberReplyWithZiplist(c, count, keys, vals);
+            zfree(keys);
+            zfree(vals);
+            return;
+        }
+
+        /* Hashtable encoding (generic implementation) */
         unsigned long added = 0;
         dict *d = dictCreate(&hashDictType, NULL);
 
diff --git a/src/ziplist.c b/src/ziplist.c
index 248166ac2..2c0074d5d 100644
--- a/src/ziplist.c
+++ b/src/ziplist.c
@@ -1523,8 +1523,8 @@ void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntr
 }
 
 /* int compare for qsort */
-int intCompare(const void *a, const void *b) {
-    return (*(int *) a - *(int *) b);
+int uintCompare(const void *a, const void *b) {
+    return (*(unsigned int *) a - *(unsigned int *) b);
 }
 
 /* Helper method to store a string into from val or lval into dest */
@@ -1534,41 +1534,42 @@ static inline void ziplistSaveValue(unsigned char *val, unsigned int len, long l
     dest->lval = lval;
 }
 
-/* Randomly select unique count of key value pairs and store into 'keys' and
- * 'vals' args. The order of the picked entries is random.
+/* Randomly select count of key value pairs and store into 'keys' and
+ * 'vals' args. The order of the picked entries is random, and the selections
+ * are non-unique (repetitions are possible).
  * The 'vals' arg can be NULL in which case we skip these. */
-void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals) {
+void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
     unsigned char *p, *key, *value;
-    unsigned int klen, vlen;
-    long long klval, vlval;
+    unsigned int klen = 0, vlen = 0;
+    long long klval = 0, vlval = 0;
 
-    /* Notice: the index member must be first due to the use in intCompare */
+    /* Notice: the index member must be first due to the use in uintCompare */
     typedef struct {
-        int index;
-        int order;
+        unsigned int index;
+        unsigned int order;
     } rand_pick;
     rand_pick *picks = zmalloc(sizeof(rand_pick)*count);
-    unsigned long total_size = ziplistLen(zl)/2;
+    unsigned int total_size = ziplistLen(zl)/2;
 
     /* Avoid div by zero on corrupt ziplist */
     assert(total_size);
 
     /* create a pool of random indexes (some may be duplicate). */
-    for (int i = 0; i < count; i++) {
+    for (unsigned int i = 0; i < count; i++) {
         picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */
         /* keep track of the order we picked them */
         picks[i].order = i;
     }
 
     /* sort by indexes. */
-    qsort(picks, count, sizeof(rand_pick), intCompare);
+    qsort(picks, count, sizeof(rand_pick), uintCompare);
 
     /* fetch the elements form the ziplist into a output array respecting the original order. */
-    int zipindex = 0, pickindex = 0;
+    unsigned int zipindex = 0, pickindex = 0;
     p = ziplistIndex(zl, 0);
     while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) {
         p = ziplistNext(zl, p);
-        ziplistGet(p, &value, &vlen, &vlval);
+        assert(ziplistGet(p, &value, &vlen, &vlval));
         while (pickindex < count && zipindex == picks[pickindex].index) {
             int storeorder = picks[pickindex].order;
             ziplistSaveValue(key, klen, klval, &keys[storeorder]);
@@ -1583,6 +1584,51 @@ void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplis
     zfree(picks);
 }
 
+/* Randomly select count of key value pairs and store into 'keys' and
+ * 'vals' args. The selections are unique (no repetitions), and the order of
+ * the picked entries is NOT-random.
+ * The 'vals' arg can be NULL in which case we skip these.
+ * The return value is the number of items picked which can be lower than the
+ * requested count if the ziplist doesn't hold enough pairs. */
+unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals) {
+    unsigned char *p, *key;
+    unsigned int klen = 0;
+    long long klval = 0;
+    unsigned int total_size = ziplistLen(zl)/2;
+    unsigned int index = 0;
+    if (count > total_size)
+        count = total_size;
+
+    /* To only iterate once, every time we try to pick a member, the probability
+     * we pick it is the quotient of the count left we want to pick and the
+     * count still we haven't visited in the dict, this way, we could make every
+     * member be equally picked.*/
+    p = ziplistIndex(zl, 0);
+    unsigned int picked = 0, remaining = count;
+    while (picked < count && p) {
+        double randomDouble = ((double)rand()) / RAND_MAX;
+        double threshold = ((double)remaining) / (total_size - index);
+        if(randomDouble < threshold){
+            assert(ziplistGet(p, &key, &klen, &klval));
+            ziplistSaveValue(key, klen, klval, &keys[picked]);
+            p = ziplistNext(zl, p);
+            assert(p);
+            if (vals) {
+                assert(ziplistGet(p, &key, &klen, &klval));
+                ziplistSaveValue(key, klen, klval, &vals[picked]);
+            }
+            remaining--;
+            picked++;
+        } else {
+            p = ziplistNext(zl, p);
+            assert(p);
+        }
+        p = ziplistNext(zl, p);
+        index++;
+    }
+    return picked;
+}
+
 #ifdef REDIS_TEST
 #include <sys/time.h>
 #include "adlist.h"
diff --git a/src/ziplist.h b/src/ziplist.h
index 5fb8fd46a..fff334cbd 100644
--- a/src/ziplist.h
+++ b/src/ziplist.h
@@ -62,7 +62,8 @@ typedef int (*ziplistValidateEntryCB)(unsigned char* p, void* userdata);
 int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
                              ziplistValidateEntryCB entry_cb, void *cb_userdata);
 void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val);
-void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals);
+void ziplistRandomPairs(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);
+unsigned int ziplistRandomPairsUnique(unsigned char *zl, unsigned int count, ziplistEntry *keys, ziplistEntry *vals);
 
 #ifdef REDIS_TEST
 int ziplistTest(int argc, char *argv[]);