Replace dict with new hashtable for sets datatype (#1176)

The new `hashtable` provides faster lookups and uses less memory than `dict`. A TCL test case "SRANDMEMBER with a dict containing long chain" is deleted because it's covered by a hashtable unit test "test_random_entry_with_long_chain", which is already present. This change also moves some logic from dismissMemory (object.c) to zmadvise_dontneed (zmalloc.c), so the hashtable implementation which needs the dismiss functionality doesn't need to depend on object.c and server.h. This PR follows #1186. --------- Signed-off-by: Rain Valentine <rsg000@gmail.com> Signed-off-by: Viktor Söderqvist <viktor.soderqvist@est.tech> Co-authored-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
2024-12-14 11:53:48 -08:00 · 2024-12-14 11:53:48 -08:00 · 88942c8e61
commit 88942c8e61
parent 0e96bb311e
17 changed files with 326 additions and 371 deletions
--- a/src/db.c
+++ b/src/db.c
@ -978,7 +978,7 @@ void keysScanCallback(void *privdata, void *entry) {

 /* This callback is used by scanGenericCommand in order to collect elements
 * returned by the dictionary iterator into a list. */
-void scanCallback(void *privdata, const dictEntry *de) {
+void dictScanCallback(void *privdata, const dictEntry *de) {
    scanData *data = (scanData *)privdata;
    list *keys = data->keys;
    robj *o = data->o;
@ -998,9 +998,7 @@ void scanCallback(void *privdata, const dictEntry *de) {
        }
    }

-    if (o->type == OBJ_SET) {
-        key = keysds;
-    } else if (o->type == OBJ_HASH) {
+    if (o->type == OBJ_HASH) {
        key = keysds;
        if (!data->only_keys) {
            val = dictGetVal(de);
@ -1013,13 +1011,33 @@ void scanCallback(void *privdata, const dictEntry *de) {
            val = sdsnewlen(buf, len);
        }
    } else {
-        serverPanic("Type not handled in SCAN callback.");
+        serverPanic("Type not handled in dict SCAN callback.");
    }

    listAddNodeTail(keys, key);
    if (val) listAddNodeTail(keys, val);
 }

+void hashtableScanCallback(void *privdata, void *entry) {
+    scanData *data = (scanData *)privdata;
+    robj *o = data->o;
+    list *keys = data->keys;
+    data->sampled++;
+
+    /* currently only implemented for SET scan */
+    serverAssert(o && o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HASHTABLE);
+    sds key = (sds)entry; /* Specific for OBJ_SET */
+
+    /* Filter element if it does not match the pattern. */
+    if (data->pattern) {
+        if (!stringmatchlen(data->pattern, sdslen(data->pattern), key, sdslen(key), 0)) {
+            return;
+        }
+    }
+
+    listAddNodeTail(keys, key);
+}
+
 /* Try to parse a SCAN cursor stored at object 'o':
 * if the cursor is valid, store it as unsigned integer into *cursor and
 * returns C_OK. Otherwise return C_ERR and send an error to the
@ -1083,7 +1101,6 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
    sds typename = NULL;
    long long type = LLONG_MAX;
    int patlen = 0, use_pattern = 0, only_keys = 0;
-    dict *ht;

    /* Object must be NULL (to iterate keys names), or the type of the object
     * must be Set, Sorted Set, or Hash. */
@ -1152,34 +1169,35 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
     * just return everything inside the object in a single call, setting the
     * cursor to zero to signal the end of the iteration. */

-    /* Handle the case of a hash table. */
-    ht = NULL;
+    /* Handle the case of kvstore, dict or hashtable. */
+    dict *dict_table = NULL;
+    hashtable *hashtable_table = NULL;
+    int shallow_copied_list_items = 0;
    if (o == NULL) {
-        ht = NULL;
-    } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) {
-        ht = o->ptr;
+        shallow_copied_list_items = 1;
+    } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HASHTABLE) {
+        hashtable_table = o->ptr;
+        shallow_copied_list_items = 1;
    } else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) {
-        ht = o->ptr;
+        dict_table = o->ptr;
+        shallow_copied_list_items = 1;
    } else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) {
        zset *zs = o->ptr;
-        ht = zs->dict;
+        dict_table = zs->dict;
+        /* scanning ZSET allocates temporary strings even though it's a dict */
+        shallow_copied_list_items = 0;
    }

    list *keys = listCreate();
-    /* Set a free callback for the contents of the collected keys list.
-     * For the main keyspace dict, and when we scan a key that's dict encoded
-     * (we have 'ht'), we don't need to define free method because the strings
-     * in the list are just a shallow copy from the pointer in the dictEntry.
-     * When scanning a key with other encodings (e.g. listpack), we need to
-     * free the temporary strings we add to that list.
-     * The exception to the above is ZSET, where we do allocate temporary
-     * strings even when scanning a dict. */
-    if (o && (!ht || o->type == OBJ_ZSET)) {
+    /* Set a free callback for the contents of the collected keys list if they
+     * are deep copied temporary strings. We must not free them if they are just
+     * a shallow copy - a pointer to the actual data in the data structure */
+    if (!shallow_copied_list_items) {
        listSetFreeMethod(keys, (void (*)(void *))sdsfree);
    }

-    /* For main dictionary scan or data structure using hashtable. */
-    if (!o || ht) {
+    /* For main hash table scan or scannable data structure. */
+    if (!o || dict_table || hashtable_table) {
        /* We set the max number of iterations to ten times the specified
         * COUNT, so if the hash table is in a pathological state (very
         * sparsely populated) we avoid to block too much time at the cost
@ -1188,7 +1206,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {

        /* We pass scanData which have three pointers to the callback:
         * 1. data.keys: the list to which it will add new elements;
-         * 2. data.o: the object containing the dictionary so that
+         * 2. data.o: the object containing the hash table so that
         * it is possible to fetch more data in a type-dependent way;
         * 3. data.type: the specified type scan in the db, LLONG_MAX means
         * type matching is no needed;
@ -1219,8 +1237,10 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
             * If cursor is empty, we should try exploring next non-empty slot. */
            if (o == NULL) {
                cursor = kvstoreScan(c->db->keys, cursor, onlydidx, keysScanCallback, NULL, &data);
+            } else if (dict_table) {
+                cursor = dictScan(dict_table, cursor, dictScanCallback, &data);
            } else {
-                cursor = dictScan(ht, cursor, scanCallback, &data);
+                cursor = hashtableScan(hashtable_table, cursor, hashtableScanCallback, &data);
            }
        } while (cursor && maxiterations-- && data.sampled < count);
    } else if (o->type == OBJ_SET) {
--- a/src/debug.c
+++ b/src/debug.c
@ -916,30 +916,35 @@ void debugCommand(client *c) {
        addReplyVerbatim(c, stats, sdslen(stats), "txt");
        sdsfree(stats);
    } else if (!strcasecmp(c->argv[1]->ptr, "htstats-key") && c->argc >= 3) {
-        robj *o;
-        dict *ht = NULL;
        int full = 0;
-
        if (c->argc >= 4 && !strcasecmp(c->argv[3]->ptr, "full")) full = 1;

-        if ((o = objectCommandLookupOrReply(c, c->argv[2], shared.nokeyerr)) == NULL) return;
+        robj *o = objectCommandLookupOrReply(c, c->argv[2], shared.nokeyerr);
+        if (o == NULL) return;

-        /* Get the hash table reference from the object, if possible. */
+        /* Get the dict reference from the object, if possible. */
+        dict *d = NULL;
+        hashtable *ht = NULL;
        switch (o->encoding) {
        case OBJ_ENCODING_SKIPLIST: {
            zset *zs = o->ptr;
-            ht = zs->dict;
+            d = zs->dict;
        } break;
-        case OBJ_ENCODING_HT: ht = o->ptr; break;
+        case OBJ_ENCODING_HT: d = o->ptr; break;
+        case OBJ_ENCODING_HASHTABLE: ht = o->ptr; break;
        }

-        if (ht == NULL) {
+        if (d != NULL) {
+            char buf[4096];
+            dictGetStats(buf, sizeof(buf), d, full);
+            addReplyVerbatim(c, buf, strlen(buf), "txt");
+        } else if (ht != NULL) {
+            char buf[4096];
+            hashtableGetStats(buf, sizeof(buf), ht, full);
+            addReplyVerbatim(c, buf, strlen(buf), "txt");
+        } else {
            addReplyError(c, "The value stored at the specified key is not "
                             "represented using an hash table");
-        } else {
-            char buf[4096];
-            dictGetStats(buf, sizeof(buf), ht, full);
-            addReplyVerbatim(c, buf, strlen(buf), "txt");
        }
    } else if (!strcasecmp(c->argv[1]->ptr, "change-repl-id") && c->argc == 2) {
        serverLog(LL_NOTICE, "Changing replication IDs after receiving DEBUG change-repl-id");
--- a/src/defrag.c
+++ b/src/defrag.c
@ -34,6 +34,7 @@
 */

 #include "server.h"
+#include "hashtable.h"
 #include "script.h"
 #include <stddef.h>

@ -379,6 +380,20 @@ static void activeDefragSdsDict(dict *d, int val_type) {
    } while (cursor != 0);
 }

+void activeDefragSdsHashtableCallback(void *privdata, void *entry_ref) {
+    UNUSED(privdata);
+    sds *sds_ref = (sds *)entry_ref;
+    sds new_sds = activeDefragSds(*sds_ref);
+    if (new_sds != NULL) *sds_ref = new_sds;
+}
+
+void activeDefragSdsHashtable(hashtable *ht) {
+    unsigned long cursor = 0;
+    do {
+        cursor = hashtableScanDefrag(ht, cursor, activeDefragSdsHashtableCallback, NULL, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF);
+    } while (cursor != 0);
+}
+
 /* Defrag a list of ptr, sds or robj string values */
 static void activeDefragQuickListNode(quicklist *ql, quicklistNode **node_ref) {
    quicklistNode *newnode, *node = *node_ref;
@ -497,11 +512,9 @@ static void scanCallbackCountScanned(void *privdata, const dictEntry *de) {
 }

 static void scanLaterSet(robj *ob, unsigned long *cursor) {
-    if (ob->type != OBJ_SET || ob->encoding != OBJ_ENCODING_HT) return;
-    dict *d = ob->ptr;
-    dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc,
-                                     .defragKey = (dictDefragAllocFunction *)activeDefragSds};
-    *cursor = dictScanDefrag(d, *cursor, scanCallbackCountScanned, &defragfns, NULL);
+    if (ob->type != OBJ_SET || ob->encoding != OBJ_ENCODING_HASHTABLE) return;
+    hashtable *ht = ob->ptr;
+    *cursor = hashtableScanDefrag(ht, *cursor, activeDefragSdsHashtableCallback, NULL, activeDefragAlloc, HASHTABLE_SCAN_EMIT_REF);
 }

 static void scanLaterHash(robj *ob, unsigned long *cursor) {
@ -560,15 +573,16 @@ static void defragHash(robj *ob) {
 }

 static void defragSet(robj *ob) {
-    dict *d, *newd;
-    serverAssert(ob->type == OBJ_SET && ob->encoding == OBJ_ENCODING_HT);
-    d = ob->ptr;
-    if (dictSize(d) > server.active_defrag_max_scan_fields)
+    serverAssert(ob->type == OBJ_SET && ob->encoding == OBJ_ENCODING_HASHTABLE);
+    hashtable *ht = ob->ptr;
+    if (hashtableSize(ht) > server.active_defrag_max_scan_fields) {
        defragLater(ob);
-    else
-        activeDefragSdsDict(d, DEFRAG_SDS_DICT_NO_VAL);
-    /* defrag the dict struct and tables */
-    if ((newd = dictDefragTables(ob->ptr))) ob->ptr = newd;
+    } else {
+        activeDefragSdsHashtable(ht);
+    }
+    /* defrag the hashtable struct and tables */
+    hashtable *newHashtable = hashtableDefragTables(ht, activeDefragAlloc);
+    if (newHashtable) ob->ptr = newHashtable;
 }

 /* Defrag callback for radix tree iterator, called for each node,
@ -766,7 +780,7 @@ static void defragKey(defragKeysCtx *ctx, robj **elemref) {
            serverPanic("Unknown list encoding");
        }
    } else if (ob->type == OBJ_SET) {
-        if (ob->encoding == OBJ_ENCODING_HT) {
+        if (ob->encoding == OBJ_ENCODING_HASHTABLE) {
            defragSet(ob);
        } else if (ob->encoding == OBJ_ENCODING_INTSET || ob->encoding == OBJ_ENCODING_LISTPACK) {
            void *newptr, *ptr = ob->ptr;
--- a/src/hashtable.c
+++ b/src/hashtable.c
@ -1023,7 +1023,7 @@ void *hashtableMetadata(hashtable *ht) {
 }

 /* Returns the number of entries stored. */
-size_t hashtableSize(hashtable *ht) {
+size_t hashtableSize(const hashtable *ht) {
    return ht->used[0] + ht->used[1];
 }

@ -1180,6 +1180,14 @@ hashtable *hashtableDefragTables(hashtable *ht, void *(*defragfn)(void *)) {
    return ht1;
 }

+/* Used for releasing memory to OS to avoid unnecessary CoW. Called when we've
+ * forked and memory won't be used again. See zmadvise_dontneed() */
+void dismissHashtable(hashtable *ht) {
+    for (int i = 0; i < 2; i++) {
+        zmadvise_dontneed(ht->tables[i], numBuckets(ht->bucket_exp[i]) * sizeof(bucket *));
+    }
+}
+
 /* Returns 1 if an entry was found matching the key. Also points *found to it,
 * if found is provided. Returns 0 if no matching entry was found. */
 int hashtableFind(hashtable *ht, const void *key, void **found) {
--- a/src/hashtable.h
+++ b/src/hashtable.h
@ -108,7 +108,7 @@ void hashtableRelease(hashtable *ht);
 void hashtableEmpty(hashtable *ht, void(callback)(hashtable *));
 hashtableType *hashtableGetType(hashtable *ht);
 void *hashtableMetadata(hashtable *ht);
-size_t hashtableSize(hashtable *ht);
+size_t hashtableSize(const hashtable *ht);
 size_t hashtableBuckets(hashtable *ht);
 size_t hashtableChainedBuckets(hashtable *ht, int table);
 size_t hashtableMemUsage(hashtable *ht);
@ -123,6 +123,7 @@ int hashtableTryExpand(hashtable *ht, size_t size);
 int hashtableExpandIfNeeded(hashtable *ht);
 int hashtableShrinkIfNeeded(hashtable *ht);
 hashtable *hashtableDefragTables(hashtable *ht, void *(*defragfn)(void *));
+void dismissHashtable(hashtable *ht);

 /* Entries */
 int hashtableFind(hashtable *ht, const void *key, void **found);
--- a/src/lazyfree.c
+++ b/src/lazyfree.c
@ -116,9 +116,9 @@ size_t lazyfreeGetFreeEffort(robj *key, robj *obj, int dbid) {
    if (obj->type == OBJ_LIST && obj->encoding == OBJ_ENCODING_QUICKLIST) {
        quicklist *ql = obj->ptr;
        return ql->len;
-    } else if (obj->type == OBJ_SET && obj->encoding == OBJ_ENCODING_HT) {
-        dict *ht = obj->ptr;
-        return dictSize(ht);
+    } else if (obj->type == OBJ_SET && obj->encoding == OBJ_ENCODING_HASHTABLE) {
+        hashtable *ht = obj->ptr;
+        return hashtableSize(ht);
    } else if (obj->type == OBJ_ZSET && obj->encoding == OBJ_ENCODING_SKIPLIST) {
        zset *zs = obj->ptr;
        return zs->zsl->length;
--- a/src/module.c
+++ b/src/module.c
@ -11017,20 +11017,20 @@ typedef struct {
    ValkeyModuleScanKeyCB fn;
 } ScanKeyCBData;

-static void moduleScanKeyCallback(void *privdata, const dictEntry *de) {
+static void moduleScanKeyDictCallback(void *privdata, const dictEntry *de) {
    ScanKeyCBData *data = privdata;
    sds key = dictGetKey(de);
    robj *o = data->key->value;
    robj *field = createStringObject(key, sdslen(key));
    robj *value = NULL;
-    if (o->type == OBJ_SET) {
-        value = NULL;
-    } else if (o->type == OBJ_HASH) {
+    if (o->type == OBJ_HASH) {
        sds val = dictGetVal(de);
        value = createStringObject(val, sdslen(val));
    } else if (o->type == OBJ_ZSET) {
        double *val = (double *)dictGetVal(de);
        value = createStringObjectFromLongDouble(*val, 0);
+    } else {
+        serverPanic("unexpected object type");
    }

    data->fn(data->key, field, value, data->user_data);
@ -11038,6 +11038,17 @@ static void moduleScanKeyCallback(void *privdata, const dictEntry *de) {
    if (value) decrRefCount(value);
 }

+static void moduleScanKeyHashtableCallback(void *privdata, void *entry) {
+    ScanKeyCBData *data = privdata;
+    robj *o = data->key->value;
+    serverAssert(o->type == OBJ_SET);
+    sds key = entry;
+    robj *field = createStringObject(key, sdslen(key));
+
+    data->fn(data->key, field, NULL, data->user_data);
+    decrRefCount(field);
+}
+
 /* Scan api that allows a module to scan the elements in a hash, set or sorted set key
 *
 * Callback for scan implementation.
@ -11091,14 +11102,15 @@ int VM_ScanKey(ValkeyModuleKey *key, ValkeyModuleScanCursor *cursor, ValkeyModul
        errno = EINVAL;
        return 0;
    }
-    dict *ht = NULL;
+    dict *d = NULL;
+    hashtable *ht = NULL;
    robj *o = key->value;
    if (o->type == OBJ_SET) {
-        if (o->encoding == OBJ_ENCODING_HT) ht = o->ptr;
+        if (o->encoding == OBJ_ENCODING_HASHTABLE) ht = o->ptr;
    } else if (o->type == OBJ_HASH) {
-        if (o->encoding == OBJ_ENCODING_HT) ht = o->ptr;
+        if (o->encoding == OBJ_ENCODING_HT) d = o->ptr;
    } else if (o->type == OBJ_ZSET) {
-        if (o->encoding == OBJ_ENCODING_SKIPLIST) ht = ((zset *)o->ptr)->dict;
+        if (o->encoding == OBJ_ENCODING_SKIPLIST) d = ((zset *)o->ptr)->dict;
    } else {
        errno = EINVAL;
        return 0;
@ -11108,9 +11120,16 @@ int VM_ScanKey(ValkeyModuleKey *key, ValkeyModuleScanCursor *cursor, ValkeyModul
        return 0;
    }
    int ret = 1;
-    if (ht) {
+    if (d) {
        ScanKeyCBData data = {key, privdata, fn};
-        cursor->cursor = dictScan(ht, cursor->cursor, moduleScanKeyCallback, &data);
+        cursor->cursor = dictScan(d, cursor->cursor, moduleScanKeyDictCallback, &data);
+        if (cursor->cursor == 0) {
+            cursor->done = 1;
+            ret = 0;
+        }
+    } else if (ht) {
+        ScanKeyCBData data = {key, privdata, fn};
+        cursor->cursor = hashtableScan(ht, cursor->cursor, moduleScanKeyHashtableCallback, &data);
        if (cursor->cursor == 0) {
            cursor->done = 1;
            ret = 0;
--- a/src/object.c
+++ b/src/object.c
@ -429,9 +429,9 @@ robj *createListListpackObject(void) {
 }

 robj *createSetObject(void) {
-    dict *d = dictCreate(&setDictType);
-    robj *o = createObject(OBJ_SET, d);
-    o->encoding = OBJ_ENCODING_HT;
+    hashtable *ht = hashtableCreate(&setHashtableType);
+    robj *o = createObject(OBJ_SET, ht);
+    o->encoding = OBJ_ENCODING_HASHTABLE;
    return o;
 }

@ -506,7 +506,7 @@ void freeListObject(robj *o) {

 void freeSetObject(robj *o) {
    switch (o->encoding) {
-    case OBJ_ENCODING_HT: dictRelease((dict *)o->ptr); break;
+    case OBJ_ENCODING_HASHTABLE: hashtableRelease((hashtable *)o->ptr); break;
    case OBJ_ENCODING_INTSET:
    case OBJ_ENCODING_LISTPACK: zfree(o->ptr); break;
    default: serverPanic("Unknown set encoding type");
@ -622,23 +622,23 @@ void dismissListObject(robj *o, size_t size_hint) {

 /* See dismissObject() */
 void dismissSetObject(robj *o, size_t size_hint) {
-    if (o->encoding == OBJ_ENCODING_HT) {
-        dict *set = o->ptr;
-        serverAssert(dictSize(set) != 0);
+    if (o->encoding == OBJ_ENCODING_HASHTABLE) {
+        hashtable *ht = o->ptr;
+        serverAssert(hashtableSize(ht) != 0);
        /* We iterate all nodes only when average member size is bigger than a
         * page size, and there's a high chance we'll actually dismiss something. */
-        if (size_hint / dictSize(set) >= server.page_size) {
-            dictEntry *de;
-            dictIterator *di = dictGetIterator(set);
-            while ((de = dictNext(di)) != NULL) {
-                dismissSds(dictGetKey(de));
+        if (size_hint / hashtableSize(ht) >= server.page_size) {
+            hashtableIterator iter;
+            hashtableInitIterator(&iter, ht);
+            void *next;
+            while (hashtableNext(&iter, &next)) {
+                sds item = next;
+                dismissSds(item);
            }
-            dictReleaseIterator(di);
+            hashtableResetIterator(&iter);
        }

-        /* Dismiss hash table memory. */
-        dismissMemory(set->ht_table[0], DICTHT_SIZE(set->ht_size_exp[0]) * sizeof(dictEntry *));
-        dismissMemory(set->ht_table[1], DICTHT_SIZE(set->ht_size_exp[1]) * sizeof(dictEntry *));
+        dismissHashtable(ht);
    } else if (o->encoding == OBJ_ENCODING_INTSET) {
        dismissMemory(o->ptr, intsetBlobLen((intset *)o->ptr));
    } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
@ -728,7 +728,7 @@ void dismissStreamObject(robj *o, size_t size_hint) {
 * modifies any keys due to write traffic, it'll cause CoW which consume
 * physical memory. In the child process, after serializing the key and value,
 * the data is definitely not accessed again, so to avoid unnecessary CoW, we
- * try to release their memory back to OS. see dismissMemory().
+ * try to release their memory back to OS. see zmadvise_dontneed().
 *
 * Because of the cost of iterating all node/field/member/entry of complex data
 * types, we iterate and dismiss them only when approximate average we estimate
@ -1109,6 +1109,7 @@ char *strEncoding(int encoding) {
    case OBJ_ENCODING_RAW: return "raw";
    case OBJ_ENCODING_INT: return "int";
    case OBJ_ENCODING_HT: return "hashtable";
+    case OBJ_ENCODING_HASHTABLE: return "hashtable";
    case OBJ_ENCODING_QUICKLIST: return "quicklist";
    case OBJ_ENCODING_LISTPACK: return "listpack";
    case OBJ_ENCODING_INTSET: return "intset";
@ -1160,17 +1161,20 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
            serverPanic("Unknown list encoding");
        }
    } else if (o->type == OBJ_SET) {
-        if (o->encoding == OBJ_ENCODING_HT) {
-            d = o->ptr;
-            di = dictGetIterator(d);
-            asize = sizeof(*o) + sizeof(dict) + (sizeof(struct dictEntry *) * dictBuckets(d));
-            while ((de = dictNext(di)) != NULL && samples < sample_size) {
-                ele = dictGetKey(de);
-                elesize += dictEntryMemUsage(de) + sdsAllocSize(ele);
+        if (o->encoding == OBJ_ENCODING_HASHTABLE) {
+            hashtable *ht = o->ptr;
+            asize = sizeof(*o) + hashtableMemUsage(ht);
+
+            hashtableIterator iter;
+            hashtableInitIterator(&iter, ht);
+            void *next;
+            while (hashtableNext(&iter, &next) && samples < sample_size) {
+                sds element = next;
+                elesize += sdsAllocSize(element);
                samples++;
            }
-            dictReleaseIterator(di);
-            if (samples) asize += (double)elesize / samples * dictSize(d);
+            hashtableResetIterator(&iter);
+            if (samples) asize += (double)elesize / samples * hashtableSize(ht);
        } else if (o->encoding == OBJ_ENCODING_INTSET) {
            asize = sizeof(*o) + zmalloc_size(o->ptr);
        } else if (o->encoding == OBJ_ENCODING_LISTPACK) {
--- a/src/rdb.c
+++ b/src/rdb.c
@ -692,7 +692,7 @@ int rdbSaveObjectType(rio *rdb, robj *o) {
    case OBJ_SET:
        if (o->encoding == OBJ_ENCODING_INTSET)
            return rdbSaveType(rdb, RDB_TYPE_SET_INTSET);
-        else if (o->encoding == OBJ_ENCODING_HT)
+        else if (o->encoding == OBJ_ENCODING_HASHTABLE)
            return rdbSaveType(rdb, RDB_TYPE_SET);
        else if (o->encoding == OBJ_ENCODING_LISTPACK)
            return rdbSaveType(rdb, RDB_TYPE_SET_LISTPACK);
@ -876,26 +876,26 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid) {
        }
    } else if (o->type == OBJ_SET) {
        /* Save a set value */
-        if (o->encoding == OBJ_ENCODING_HT) {
-            dict *set = o->ptr;
-            dictIterator *di = dictGetIterator(set);
-            dictEntry *de;
+        if (o->encoding == OBJ_ENCODING_HASHTABLE) {
+            hashtable *set = o->ptr;

-            if ((n = rdbSaveLen(rdb, dictSize(set))) == -1) {
-                dictReleaseIterator(di);
+            if ((n = rdbSaveLen(rdb, hashtableSize(set))) == -1) {
                return -1;
            }
            nwritten += n;

-            while ((de = dictNext(di)) != NULL) {
-                sds ele = dictGetKey(de);
+            hashtableIterator iterator;
+            hashtableInitIterator(&iterator, set);
+            void *next;
+            while (hashtableNext(&iterator, &next)) {
+                sds ele = next;
                if ((n = rdbSaveRawString(rdb, (unsigned char *)ele, sdslen(ele))) == -1) {
-                    dictReleaseIterator(di);
+                    hashtableResetIterator(&iterator);
                    return -1;
                }
                nwritten += n;
            }
-            dictReleaseIterator(di);
+            hashtableResetIterator(&iterator);
        } else if (o->encoding == OBJ_ENCODING_INTSET) {
            size_t l = intsetBlobLen((intset *)o->ptr);

@ -1909,8 +1909,8 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
            o = createSetObject();
            /* It's faster to expand the dict to the right size asap in order
             * to avoid rehashing */
-            if (len > DICT_HT_INITIAL_SIZE && dictTryExpand(o->ptr, len) != DICT_OK) {
-                rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+            if (!hashtableTryExpand(o->ptr, len)) {
+                rdbReportCorruptRDB("OOM in hashtableTryExpand %llu", (unsigned long long)len);
                decrRefCount(o);
                return NULL;
            }
@ -1949,8 +1949,8 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                     * of many small ones. It's OK since lpSafeToAdd doesn't
                     * care about individual elements, only the total size. */
                    setTypeConvert(o, OBJ_ENCODING_LISTPACK);
-                } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HT, len, 0) != C_OK) {
-                    rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+                } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HASHTABLE, len, 0) != C_OK) {
+                    rdbReportCorruptRDB("OOM in hashtableTryExpand %llu", (unsigned long long)len);
                    sdsfree(sdsele);
                    decrRefCount(o);
                    return NULL;
@ -1970,8 +1970,8 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                        return NULL;
                    }
                    o->ptr = lpAppend(o->ptr, (unsigned char *)sdsele, elelen);
-                } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HT, len, 0) != C_OK) {
-                    rdbReportCorruptRDB("OOM in dictTryExpand %llu", (unsigned long long)len);
+                } else if (setTypeConvertAndExpand(o, OBJ_ENCODING_HASHTABLE, len, 0) != C_OK) {
+                    rdbReportCorruptRDB("OOM in hashtableTryExpand %llu", (unsigned long long)len);
                    sdsfree(sdsele);
                    decrRefCount(o);
                    return NULL;
@ -1980,8 +1980,8 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {

            /* This will also be called when the set was just converted
             * to a regular hash table encoded set. */
-            if (o->encoding == OBJ_ENCODING_HT) {
-                if (dictAdd((dict *)o->ptr, sdsele, NULL) != DICT_OK) {
+            if (o->encoding == OBJ_ENCODING_HASHTABLE) {
+                if (!hashtableAdd((hashtable *)o->ptr, sdsele)) {
                    rdbReportCorruptRDB("Duplicate set members detected");
                    decrRefCount(o);
                    sdsfree(sdsele);
@ -2356,7 +2356,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
            }
            o->type = OBJ_SET;
            o->encoding = OBJ_ENCODING_INTSET;
-            if (intsetLen(o->ptr) > server.set_max_intset_entries) setTypeConvert(o, OBJ_ENCODING_HT);
+            if (intsetLen(o->ptr) > server.set_max_intset_entries) setTypeConvert(o, OBJ_ENCODING_HASHTABLE);
            break;
        case RDB_TYPE_SET_LISTPACK:
            if (deep_integrity_validation) server.stat_dump_payload_sanitizations++;
@ -2376,7 +2376,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
                decrRefCount(o);
                goto emptykey;
            }
-            if (setTypeSize(o) > server.set_max_listpack_entries) setTypeConvert(o, OBJ_ENCODING_HT);
+            if (setTypeSize(o) > server.set_max_listpack_entries) setTypeConvert(o, OBJ_ENCODING_HASHTABLE);
            break;
        case RDB_TYPE_ZSET_ZIPLIST: {
            unsigned char *lp = lpNew(encoded_len);
--- a/src/server.c
+++ b/src/server.c
@ -372,6 +372,7 @@ void dictDictDestructor(void *val) {
    dictRelease((dict *)val);
 }

+/* Returns 1 when keys match */
 int dictSdsKeyCompare(const void *key1, const void *key2) {
    int l1, l2;
    l1 = sdslen((sds)key1);
@ -380,6 +381,12 @@ int dictSdsKeyCompare(const void *key1, const void *key2) {
    return memcmp(key1, key2, l1) == 0;
 }

+/* Returns 0 when keys match */
+int hashtableSdsKeyCompare(const void *key1, const void *key2) {
+    const sds sds1 = (const sds)key1, sds2 = (const sds)key2;
+    return sdslen(sds1) != sdslen(sds2) || sdscmp(sds1, sds2);
+}
+
 size_t dictSdsEmbedKey(unsigned char *buf, size_t buf_len, const void *key, uint8_t *key_offset) {
    return sdscopytobuffer(buf, buf_len, (sds)key, key_offset);
 }
@ -542,17 +549,11 @@ dictType objectKeyHeapPointerValueDictType = {
    NULL                  /* allow to expand */
 };

-/* Set dictionary type. Keys are SDS strings, values are not used. */
-dictType setDictType = {
-    dictSdsHash,       /* hash function */
-    NULL,              /* key dup */
-    dictSdsKeyCompare, /* key compare */
-    dictSdsDestructor, /* key destructor */
-    NULL,              /* val destructor */
-    NULL,              /* allow to expand */
-    .no_value = 1,     /* no values in this dict */
-    .keys_are_odd = 1  /* an SDS string is always an odd pointer */
-};
+/* Set hashtable type. Items are SDS strings */
+hashtableType setHashtableType = {
+    .hashFunction = dictSdsHash,
+    .keyCompare = hashtableSdsKeyCompare,
+    .entryDestructor = dictSdsDestructor};

 /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
 dictType zsetDictType = {
@ -572,11 +573,6 @@ const void *hashtableObjectGetKey(const void *entry) {
    return objectGetKey(entry);
 }

-int hashtableSdsKeyCompare(const void *key1, const void *key2) {
-    const sds sds1 = (const sds)key1, sds2 = (const sds)key2;
-    return sdslen(sds1) != sdslen(sds2) || sdscmp(sds1, sds2);
-}
-
 int hashtableObjKeyCompare(const void *key1, const void *key2) {
    const robj *o1 = key1, *o2 = key2;
    return hashtableSdsKeyCompare(o1->ptr, o2->ptr);
@ -645,6 +641,11 @@ dictType sdsReplyDictType = {
    NULL               /* allow to expand */
 };

+/* Hashtable type without destructor */
+hashtableType sdsReplyHashtableType = {
+    .hashFunction = dictSdsCaseHash,
+    .keyCompare = hashtableSdsKeyCompare};
+
 /* Keylist hash table type has unencoded Objects as keys and
 * lists as values. It's used for blocking operations (BLPOP) and to
 * map swapped keys to a list of clients waiting for this keys to be loaded. */
@ -6521,27 +6522,7 @@ void sendChildInfo(childInfoType info_type, size_t keys, char *pname) {
    sendChildInfoGeneric(info_type, keys, -1, pname);
 }

-/* Try to release pages back to the OS directly (bypassing the allocator),
- * in an effort to decrease CoW during fork. For small allocations, we can't
- * release any full page, so in an effort to avoid getting the size of the
- * allocation from the allocator (malloc_size) when we already know it's small,
- * we check the size_hint. If the size is not already known, passing a size_hint
- * of 0 will lead the checking the real size of the allocation.
- * Also please note that the size may be not accurate, so in order to make this
- * solution effective, the judgement for releasing memory pages should not be
- * too strict. */
-void dismissMemory(void *ptr, size_t size_hint) {
-    if (ptr == NULL) return;
-
-    /* madvise(MADV_DONTNEED) can not release pages if the size of memory
-     * is too small, we try to release only for the memory which the size
-     * is more than half of page size. */
-    if (size_hint && size_hint <= server.page_size / 2) return;
-
-    zmadvise_dontneed(ptr);
-}
-
-/* Dismiss big chunks of memory inside a client structure, see dismissMemory() */
+/* Dismiss big chunks of memory inside a client structure, see zmadvise_dontneed() */
 void dismissClientMemory(client *c) {
    /* Dismiss client query buffer and static reply buffer. */
    dismissMemory(c->buf, c->buf_usable_size);
@ -6572,7 +6553,7 @@ void dismissClientMemory(client *c) {
 /* In the child process, we don't need some buffers anymore, and these are
 * likely to change in the parent when there's heavy write traffic.
 * We dismiss them right away, to avoid CoW.
- * see dismissMemory(). */
+ * see zmadvise_dontneed(). */
 void dismissMemoryInChild(void) {
    /* madvise(MADV_DONTNEED) may not work if Transparent Huge Pages is enabled. */
    if (server.thp_enabled) return;
--- a/src/server.h
+++ b/src/server.h
@ -83,6 +83,8 @@ typedef long long ustime_t; /* microsecond time type. */
 #include "connection.h" /* Connection abstraction */
 #include "memory_prefetch.h"

+#define dismissMemory zmadvise_dontneed
+
 #define VALKEYMODULE_CORE 1
 typedef struct serverObject robj;
 #include "valkeymodule.h" /* Modules API defines. */
@ -873,6 +875,7 @@ struct ValkeyModuleDigest {
 #define OBJ_ENCODING_QUICKLIST 9  /* Encoded as linked list of listpacks */
 #define OBJ_ENCODING_STREAM 10    /* Encoded as a radix tree of listpacks */
 #define OBJ_ENCODING_LISTPACK 11  /* Encoded as a listpack */
+#define OBJ_ENCODING_HASHTABLE 12 /* Encoded as a hashtable */

 #define LRU_BITS 24
 #define LRU_CLOCK_MAX ((1 << LRU_BITS) - 1) /* Max value of obj->lru */
@ -2634,7 +2637,7 @@ typedef struct {
    robj *subject;
    int encoding;
    int ii; /* intset iterator */
-    dictIterator *di;
+    hashtableIterator *hashtable_iterator;
    unsigned char *lpi; /* listpack iterator */
 } setTypeIterator;

@ -2665,7 +2668,7 @@ extern struct valkeyServer server;
 extern struct sharedObjectsStruct shared;
 extern dictType objectKeyPointerValueDictType;
 extern dictType objectKeyHeapPointerValueDictType;
-extern dictType setDictType;
+extern hashtableType setHashtableType;
 extern dictType BenchmarkDictType;
 extern dictType zsetDictType;
 extern hashtableType kvstoreKeysHashtableType;
@ -2680,6 +2683,7 @@ extern dictType objToDictDictType;
 extern hashtableType kvstoreChannelHashtableType;
 extern dictType modulesDictType;
 extern dictType sdsReplyDictType;
+extern hashtableType sdsReplyHashtableType;
 extern dictType keylistDictType;
 extern dict *modules;

@ -3374,7 +3378,6 @@ void rejectCommandFormat(client *c, const char *fmt, ...);
 void *activeDefragAlloc(void *ptr);
 robj *activeDefragStringOb(robj *ob);
 void dismissSds(sds s);
-void dismissMemory(void *ptr, size_t size_hint);
 void dismissMemoryInChild(void);

 #define RESTART_SERVER_NONE 0
--- a/src/t_set.c
+++ b/src/t_set.c
@ -28,6 +28,7 @@
 */

 #include "server.h"
+#include "hashtable.h"
 #include "intset.h" /* Compact integer set structure */

 /*-----------------------------------------------------------------------------
@ -50,7 +51,7 @@ robj *setTypeCreate(sds value, size_t size_hint) {
    /* We may oversize the set by using the hint if the hint is not accurate,
     * but we will assume this is acceptable to maximize performance. */
    robj *o = createSetObject();
-    dictExpand(o->ptr, size_hint);
+    hashtableExpand(o->ptr, size_hint);
    return o;
 }

@ -59,7 +60,7 @@ robj *setTypeCreate(sds value, size_t size_hint) {
 void setTypeMaybeConvert(robj *set, size_t size_hint) {
    if ((set->encoding == OBJ_ENCODING_LISTPACK && size_hint > server.set_max_listpack_entries) ||
        (set->encoding == OBJ_ENCODING_INTSET && size_hint > server.set_max_intset_entries)) {
-        setTypeConvertAndExpand(set, OBJ_ENCODING_HT, size_hint, 1);
+        setTypeConvertAndExpand(set, OBJ_ENCODING_HASHTABLE, size_hint, 1);
    }
 }

@ -74,7 +75,7 @@ static size_t intsetMaxEntries(void) {
 /* Converts intset to HT if it contains too many entries. */
 static void maybeConvertIntset(robj *subject) {
    serverAssert(subject->encoding == OBJ_ENCODING_INTSET);
-    if (intsetLen(subject->ptr) > intsetMaxEntries()) setTypeConvert(subject, OBJ_ENCODING_HT);
+    if (intsetLen(subject->ptr) > intsetMaxEntries()) setTypeConvert(subject, OBJ_ENCODING_HASHTABLE);
 }

 /* When you know all set elements are integers, call this to convert the set to
@ -91,7 +92,7 @@ static void maybeConvertToIntset(robj *set) {
    while (setTypeNext(si, &str, &len, &llval) != -1) {
        if (str) {
            /* If the element is returned as a string, we may be able to convert
-             * it to integer. This happens for OBJ_ENCODING_HT. */
+             * it to integer. This happens for OBJ_ENCODING_HASHTABLE. */
            serverAssert(string2ll(str, len, (long long *)&llval));
        }
        uint8_t success = 0;
@ -134,20 +135,21 @@ int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sd
    }

    serverAssert(str);
-    if (set->encoding == OBJ_ENCODING_HT) {
+    if (set->encoding == OBJ_ENCODING_HASHTABLE) {
        /* Avoid duping the string if it is an sds string. */
        sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
-        dict *ht = set->ptr;
-        void *position = dictFindPositionForInsert(ht, sdsval, NULL);
-        if (position) {
+        hashtable *ht = set->ptr;
+        hashtablePosition position;
+        if (hashtableFindPositionForInsert(ht, sdsval, &position, NULL)) {
            /* Key doesn't already exist in the set. Add it but dup the key. */
            if (sdsval == str) sdsval = sdsdup(sdsval);
-            dictInsertAtPosition(ht, sdsval, position);
+            hashtableInsertAtPosition(ht, sdsval, &position);
+            return 1;
        } else if (sdsval != str) {
            /* String is already a member. Free our temporary sds copy. */
            sdsfree(sdsval);
+            return 0;
        }
-        return (position != NULL);
    } else if (set->encoding == OBJ_ENCODING_LISTPACK) {
        unsigned char *lp = set->ptr;
        unsigned char *p = lpFirst(lp);
@ -166,8 +168,8 @@ int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sd
                set->ptr = lp;
            } else {
                /* Size limit is reached. Convert to hashtable and add. */
-                setTypeConvertAndExpand(set, OBJ_ENCODING_HT, lpLength(lp) + 1, 1);
-                serverAssert(dictAdd(set->ptr, sdsnewlen(str, len), NULL) == DICT_OK);
+                setTypeConvertAndExpand(set, OBJ_ENCODING_HASHTABLE, lpLength(lp) + 1, 1);
+                serverAssert(hashtableAdd(set->ptr, sdsnewlen(str, len)));
            }
            return 1;
        }
@ -204,10 +206,10 @@ int setTypeAddAux(robj *set, char *str, size_t len, int64_t llval, int str_is_sd
                set->ptr = lp;
                return 1;
            } else {
-                setTypeConvertAndExpand(set, OBJ_ENCODING_HT, intsetLen(set->ptr) + 1, 1);
+                setTypeConvertAndExpand(set, OBJ_ENCODING_HASHTABLE, intsetLen(set->ptr) + 1, 1);
                /* The set *was* an intset and this value is not integer
-                 * encodable, so dictAdd should always work. */
-                serverAssert(dictAdd(set->ptr, sdsnewlen(str, len), NULL) == DICT_OK);
+                 * encodable, so hashtableAdd should always work. */
+                serverAssert(hashtableAdd(set->ptr, sdsnewlen(str, len)));
                return 1;
            }
        }
@ -242,9 +244,9 @@ int setTypeRemoveAux(robj *setobj, char *str, size_t len, int64_t llval, int str
        str_is_sds = 0;
    }

-    if (setobj->encoding == OBJ_ENCODING_HT) {
+    if (setobj->encoding == OBJ_ENCODING_HASHTABLE) {
        sds sdsval = str_is_sds ? (sds)str : sdsnewlen(str, len);
-        int deleted = (dictDelete(setobj->ptr, sdsval) == DICT_OK);
+        int deleted = hashtableDelete(setobj->ptr, sdsval);
        if (sdsval != str) sdsfree(sdsval); /* free temp copy */
        return deleted;
    } else if (setobj->encoding == OBJ_ENCODING_LISTPACK) {
@ -298,11 +300,11 @@ int setTypeIsMemberAux(robj *set, char *str, size_t len, int64_t llval, int str_
    } else if (set->encoding == OBJ_ENCODING_INTSET) {
        long long llval;
        return string2ll(str, len, &llval) && intsetFind(set->ptr, llval);
-    } else if (set->encoding == OBJ_ENCODING_HT && str_is_sds) {
-        return dictFind(set->ptr, (sds)str) != NULL;
-    } else if (set->encoding == OBJ_ENCODING_HT) {
+    } else if (set->encoding == OBJ_ENCODING_HASHTABLE && str_is_sds) {
+        return hashtableFind(set->ptr, (sds)str, NULL);
+    } else if (set->encoding == OBJ_ENCODING_HASHTABLE) {
        sds sdsval = sdsnewlen(str, len);
-        int result = dictFind(set->ptr, sdsval) != NULL;
+        int result = hashtableFind(set->ptr, sdsval, NULL);
        sdsfree(sdsval);
        return result;
    } else {
@ -314,8 +316,8 @@ setTypeIterator *setTypeInitIterator(robj *subject) {
    setTypeIterator *si = zmalloc(sizeof(setTypeIterator));
    si->subject = subject;
    si->encoding = subject->encoding;
-    if (si->encoding == OBJ_ENCODING_HT) {
-        si->di = dictGetIterator(subject->ptr);
+    if (si->encoding == OBJ_ENCODING_HASHTABLE) {
+        si->hashtable_iterator = hashtableCreateIterator(subject->ptr);
    } else if (si->encoding == OBJ_ENCODING_INTSET) {
        si->ii = 0;
    } else if (si->encoding == OBJ_ENCODING_LISTPACK) {
@ -327,7 +329,7 @@ setTypeIterator *setTypeInitIterator(robj *subject) {
 }

 void setTypeReleaseIterator(setTypeIterator *si) {
-    if (si->encoding == OBJ_ENCODING_HT) dictReleaseIterator(si->di);
+    if (si->encoding == OBJ_ENCODING_HASHTABLE) hashtableReleaseIterator(si->hashtable_iterator);
    zfree(si);
 }

@ -340,7 +342,7 @@ void setTypeReleaseIterator(setTypeIterator *si) {
 * (str and len) or (llele) depending on whether the value is stored as a string
 * or as an integer internally.
 *
- * If OBJ_ENCODING_HT is returned, then str points to an sds string and can be
+ * If OBJ_ENCODING_HASHTABLE is returned, then str points to an sds string and can be
 * used as such. If OBJ_ENCODING_INTSET, then llele is populated and str is
 * pointed to NULL. If OBJ_ENCODING_LISTPACK is returned, the value can be
 * either a string or an integer. If *str is not NULL, then str and len are
@ -353,10 +355,10 @@ void setTypeReleaseIterator(setTypeIterator *si) {
 *
 * When there are no more elements -1 is returned. */
 int setTypeNext(setTypeIterator *si, char **str, size_t *len, int64_t *llele) {
-    if (si->encoding == OBJ_ENCODING_HT) {
-        dictEntry *de = dictNext(si->di);
-        if (de == NULL) return -1;
-        *str = dictGetKey(de);
+    if (si->encoding == OBJ_ENCODING_HASHTABLE) {
+        void *next;
+        if (!hashtableNext(si->hashtable_iterator, &next)) return -1;
+        *str = next;
        *len = sdslen(*str);
        *llele = -123456789; /* Not needed. Defensive. */
    } else if (si->encoding == OBJ_ENCODING_INTSET) {
@ -406,15 +408,16 @@ sds setTypeNextObject(setTypeIterator *si) {
 * object. The return value of the function is the object->encoding
 * field of the object and can be used by the caller to check if the
 * int64_t pointer or the str and len pointers were populated, as for
- * setTypeNext. If OBJ_ENCODING_HT is returned, str is pointed to a
+ * setTypeNext. If OBJ_ENCODING_HASHTABLE is returned, str is pointed to a
 * string which is actually an sds string and it can be used as such.
 *
 * Note that both the str, len and llele pointers should be passed and cannot
 * be NULL. If str is set to NULL, the value is an integer stored in llele. */
 int setTypeRandomElement(robj *setobj, char **str, size_t *len, int64_t *llele) {
-    if (setobj->encoding == OBJ_ENCODING_HT) {
-        dictEntry *de = dictGetFairRandomKey(setobj->ptr);
-        *str = dictGetKey(de);
+    if (setobj->encoding == OBJ_ENCODING_HASHTABLE) {
+        void *entry = NULL;
+        hashtableFairRandomEntry(setobj->ptr, &entry);
+        *str = entry;
        *len = sdslen(*str);
        *llele = -123456789; /* Not needed. Defensive. */
    } else if (setobj->encoding == OBJ_ENCODING_INTSET) {
@ -457,14 +460,14 @@ robj *setTypePopRandom(robj *set) {
            obj = createStringObject(str, len);
        else
            obj = createStringObjectFromLongLong(llele);
-        setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HT);
+        setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HASHTABLE);
    }
    return obj;
 }

 unsigned long setTypeSize(const robj *subject) {
-    if (subject->encoding == OBJ_ENCODING_HT) {
-        return dictSize((const dict *)subject->ptr);
+    if (subject->encoding == OBJ_ENCODING_HASHTABLE) {
+        return hashtableSize((const hashtable *)subject->ptr);
    } else if (subject->encoding == OBJ_ENCODING_INTSET) {
        return intsetLen((const intset *)subject->ptr);
    } else if (subject->encoding == OBJ_ENCODING_LISTPACK) {
@ -474,7 +477,7 @@ unsigned long setTypeSize(const robj *subject) {
    }
 }

-/* Convert the set to specified encoding. The resulting dict (when converting
+/* Convert the set to specified encoding. The resulting hashtable (when converting
 * to a hash table) is presized to hold the number of elements in the original
 * set. */
 void setTypeConvert(robj *setobj, int enc) {
@ -489,28 +492,28 @@ int setTypeConvertAndExpand(robj *setobj, int enc, unsigned long cap, int panic)
    setTypeIterator *si;
    serverAssertWithInfo(NULL, setobj, setobj->type == OBJ_SET && setobj->encoding != enc);

-    if (enc == OBJ_ENCODING_HT) {
-        dict *d = dictCreate(&setDictType);
+    if (enc == OBJ_ENCODING_HASHTABLE) {
+        hashtable *ht = hashtableCreate(&setHashtableType);
        sds element;

-        /* Presize the dict to avoid rehashing */
+        /* Presize the hashtable to avoid rehashing */
        if (panic) {
-            dictExpand(d, cap);
-        } else if (dictTryExpand(d, cap) != DICT_OK) {
-            dictRelease(d);
+            hashtableExpand(ht, cap);
+        } else if (!hashtableTryExpand(ht, cap)) {
+            hashtableRelease(ht);
            return C_ERR;
        }

        /* To add the elements we extract integers and create Objects */
        si = setTypeInitIterator(setobj);
        while ((element = setTypeNextObject(si)) != NULL) {
-            serverAssert(dictAdd(d, element, NULL) == DICT_OK);
+            serverAssert(hashtableAdd(ht, element));
        }
        setTypeReleaseIterator(si);

        freeSetObject(setobj); /* frees the internals but not setobj itself */
-        setobj->encoding = OBJ_ENCODING_HT;
-        setobj->ptr = d;
+        setobj->encoding = OBJ_ENCODING_HASHTABLE;
+        setobj->ptr = ht;
    } else if (enc == OBJ_ENCODING_LISTPACK) {
        /* Preallocate the minimum two bytes per element (enc/value + backlen) */
        size_t estcap = cap * 2;
@ -568,10 +571,10 @@ robj *setTypeDup(robj *o) {
        memcpy(new_lp, lp, sz);
        set = createObject(OBJ_SET, new_lp);
        set->encoding = OBJ_ENCODING_LISTPACK;
-    } else if (o->encoding == OBJ_ENCODING_HT) {
+    } else if (o->encoding == OBJ_ENCODING_HASHTABLE) {
        set = createSetObject();
-        dict *d = o->ptr;
-        dictExpand(set->ptr, dictSize(d));
+        hashtable *ht = o->ptr;
+        hashtableExpand(set->ptr, hashtableSize(ht));
        si = setTypeInitIterator(o);
        char *str;
        size_t len;
@ -891,8 +894,8 @@ void spopWithCountCommand(client *c) {
                if (!newset) {
                    newset = str ? createSetListpackObject() : createIntsetObject();
                }
-                setTypeAddAux(newset, str, len, llele, encoding == OBJ_ENCODING_HT);
-                setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HT);
+                setTypeAddAux(newset, str, len, llele, encoding == OBJ_ENCODING_HASHTABLE);
+                setTypeRemoveAux(set, str, len, llele, encoding == OBJ_ENCODING_HASHTABLE);
            }
        }

@ -1001,8 +1004,6 @@ void srandmemberWithCountCommand(client *c) {
    size_t len;
    int64_t llele;

-    dict *d;
-
    if (getRangeLongFromObjectOrReply(c, c->argv[2], -LONG_MAX, LONG_MAX, &l, NULL) != C_OK) return;
    if (l >= 0) {
        count = (unsigned long)l;
@ -1111,8 +1112,8 @@ void srandmemberWithCountCommand(client *c) {
        return;
    }

-    /* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
-    d = dictCreate(&sdsReplyDictType);
+    /* For CASE 3 and CASE 4 we need an auxiliary hashtable. */
+    hashtable *ht = hashtableCreate(&sdsReplyHashtableType);

    /* CASE 3:
     * The number of elements inside the set is not greater than
@ -1126,29 +1127,25 @@ void srandmemberWithCountCommand(client *c) {
    if (count * SRANDMEMBER_SUB_STRATEGY_MUL > size) {
        setTypeIterator *si;

-        /* Add all the elements into the temporary dictionary. */
+        /* Add all the elements into the temporary hashtable. */
        si = setTypeInitIterator(set);
-        dictExpand(d, size);
+        hashtableExpand(ht, size);
        while (setTypeNext(si, &str, &len, &llele) != -1) {
-            int retval = DICT_ERR;
-
            if (str == NULL) {
-                retval = dictAdd(d, sdsfromlonglong(llele), NULL);
+                serverAssert(hashtableAdd(ht, (void *)sdsfromlonglong(llele)));
            } else {
-                retval = dictAdd(d, sdsnewlen(str, len), NULL);
+                serverAssert(hashtableAdd(ht, (void *)sdsnewlen(str, len)));
            }
-            serverAssert(retval == DICT_OK);
        }
        setTypeReleaseIterator(si);
-        serverAssert(dictSize(d) == size);
+        serverAssert(hashtableSize(ht) == size);

        /* Remove random elements to reach the right count. */
        while (size > count) {
-            dictEntry *de;
-            de = dictGetFairRandomKey(d);
-            dictUnlink(d, dictGetKey(de));
-            sdsfree(dictGetKey(de));
-            dictFreeUnlinkedEntry(d, de);
+            void *element;
+            hashtableFairRandomEntry(ht, &element);
+            hashtableDelete(ht, element);
+            sdsfree((sds)element);
            size--;
        }
    }
@ -1161,7 +1158,7 @@ void srandmemberWithCountCommand(client *c) {
        unsigned long added = 0;
        sds sdsele;

-        dictExpand(d, count);
+        hashtableExpand(ht, count);
        while (added < count) {
            setTypeRandomElement(set, &str, &len, &llele);
            if (str == NULL) {
@ -1172,7 +1169,7 @@ void srandmemberWithCountCommand(client *c) {
            /* Try to add the object to the dictionary. If it already exists
             * free it, otherwise increment the number of objects we have
             * in the result dictionary. */
-            if (dictAdd(d, sdsele, NULL) == DICT_OK)
+            if (hashtableAdd(ht, sdsele))
                added++;
            else
                sdsfree(sdsele);
@ -1181,14 +1178,15 @@ void srandmemberWithCountCommand(client *c) {

    /* CASE 3 & 4: send the result to the user. */
    {
-        dictIterator *di;
-        dictEntry *de;
+        hashtableIterator iter;
+        hashtableInitIterator(&iter, ht);

        addReplyArrayLen(c, count);
-        di = dictGetIterator(d);
-        while ((de = dictNext(di)) != NULL) addReplyBulkSds(c, dictGetKey(de));
-        dictReleaseIterator(di);
-        dictRelease(d);
+        serverAssert(count == hashtableSize(ht));
+        void *element;
+        while (hashtableNext(&iter, &element)) addReplyBulkSds(c, (sds)element);
+        hashtableResetIterator(&iter);
+        hashtableRelease(ht);
    }
 }

@ -1336,7 +1334,7 @@ void sinterGenericCommand(client *c,
    while ((encoding = setTypeNext(si, &str, &len, &intobj)) != -1) {
        for (j = 1; j < setnum; j++) {
            if (sets[j] == sets[0]) continue;
-            if (!setTypeIsMemberAux(sets[j], str, len, intobj, encoding == OBJ_ENCODING_HT)) break;
+            if (!setTypeIsMemberAux(sets[j], str, len, intobj, encoding == OBJ_ENCODING_HASHTABLE)) break;
        }

        /* Only take action when all sets contain the member */
@ -1355,7 +1353,7 @@ void sinterGenericCommand(client *c,
            } else {
                if (str && only_integers) {
                    /* It may be an integer although we got it as a string. */
-                    if (encoding == OBJ_ENCODING_HT && string2ll(str, len, (long long *)&intobj)) {
+                    if (encoding == OBJ_ENCODING_HASHTABLE && string2ll(str, len, (long long *)&intobj)) {
                        if (dstset->encoding == OBJ_ENCODING_LISTPACK || dstset->encoding == OBJ_ENCODING_INTSET) {
                            /* Adding it as an integer is more efficient. */
                            str = NULL;
@ -1365,7 +1363,7 @@ void sinterGenericCommand(client *c,
                        only_integers = 0;
                    }
                }
-                setTypeAddAux(dstset, str, len, intobj, encoding == OBJ_ENCODING_HT);
+                setTypeAddAux(dstset, str, len, intobj, encoding == OBJ_ENCODING_HASHTABLE);
            }
        }
    }
@ -1467,7 +1465,7 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, robj *dstke
        /* For a SET's encoding, according to the factory method setTypeCreate(), currently have 3 types:
         * 1. OBJ_ENCODING_INTSET
         * 2. OBJ_ENCODING_LISTPACK
-         * 3. OBJ_ENCODING_HT
+         * 3. OBJ_ENCODING_HASHTABLE
         * 'dstset_encoding' is used to determine which kind of encoding to use when initialize 'dstset'.
         *
         * If all sets are all OBJ_ENCODING_INTSET encoding or 'dstkey' is not null, keep 'dstset'
@ -1478,8 +1476,8 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, robj *dstke
         * the hashtable is more efficient when find and compare than the listpack. The corresponding
         * time complexity are O(1) vs O(n). */
        if (!dstkey && dstset_encoding == OBJ_ENCODING_INTSET &&
-            (setobj->encoding == OBJ_ENCODING_LISTPACK || setobj->encoding == OBJ_ENCODING_HT)) {
-            dstset_encoding = OBJ_ENCODING_HT;
+            (setobj->encoding == OBJ_ENCODING_LISTPACK || setobj->encoding == OBJ_ENCODING_HASHTABLE)) {
+            dstset_encoding = OBJ_ENCODING_HASHTABLE;
        }
        sets[j] = setobj;
        if (j > 0 && sets[0] == sets[j]) {
@ -1536,7 +1534,7 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, robj *dstke

            si = setTypeInitIterator(sets[j]);
            while ((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
-                cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT);
+                cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HASHTABLE);
            }
            setTypeReleaseIterator(si);
        }
@ -1556,11 +1554,11 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, robj *dstke
            for (j = 1; j < setnum; j++) {
                if (!sets[j]) continue;        /* no key is an empty set. */
                if (sets[j] == sets[0]) break; /* same set! */
-                if (setTypeIsMemberAux(sets[j], str, len, llval, encoding == OBJ_ENCODING_HT)) break;
+                if (setTypeIsMemberAux(sets[j], str, len, llval, encoding == OBJ_ENCODING_HASHTABLE)) break;
            }
            if (j == setnum) {
                /* There is no other set with this element. Add it. */
-                cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT);
+                cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HASHTABLE);
            }
        }
        setTypeReleaseIterator(si);
@ -1578,9 +1576,9 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum, robj *dstke
            si = setTypeInitIterator(sets[j]);
            while ((encoding = setTypeNext(si, &str, &len, &llval)) != -1) {
                if (j == 0) {
-                    cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT);
+                    cardinality += setTypeAddAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HASHTABLE);
                } else {
-                    cardinality -= setTypeRemoveAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HT);
+                    cardinality -= setTypeRemoveAux(dstset, str, len, llval, encoding == OBJ_ENCODING_HASHTABLE);
                }
            }
            setTypeReleaseIterator(si);
--- a/src/t_zset.c
+++ b/src/t_zset.c
@ -2069,9 +2069,7 @@ typedef struct {
                int ii;
            } is;
            struct {
-                dict *dict;
-                dictIterator *di;
-                dictEntry *de;
+                hashtableIterator *iter;
            } ht;
            struct {
                unsigned char *lp;
@ -2126,10 +2124,8 @@ void zuiInitIterator(zsetopsrc *op) {
        if (op->encoding == OBJ_ENCODING_INTSET) {
            it->is.is = op->subject->ptr;
            it->is.ii = 0;
-        } else if (op->encoding == OBJ_ENCODING_HT) {
-            it->ht.dict = op->subject->ptr;
-            it->ht.di = dictGetIterator(op->subject->ptr);
-            it->ht.de = dictNext(it->ht.di);
+        } else if (op->encoding == OBJ_ENCODING_HASHTABLE) {
+            it->ht.iter = hashtableCreateIterator(op->subject->ptr);
        } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
            it->lp.lp = op->subject->ptr;
            it->lp.p = lpFirst(it->lp.lp);
@ -2166,8 +2162,8 @@ void zuiClearIterator(zsetopsrc *op) {
        iterset *it = &op->iter.set;
        if (op->encoding == OBJ_ENCODING_INTSET) {
            UNUSED(it); /* skip */
-        } else if (op->encoding == OBJ_ENCODING_HT) {
-            dictReleaseIterator(it->ht.di);
+        } else if (op->encoding == OBJ_ENCODING_HASHTABLE) {
+            hashtableReleaseIterator(it->ht.iter);
        } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
            UNUSED(it);
        } else {
@ -2235,13 +2231,11 @@ int zuiNext(zsetopsrc *op, zsetopval *val) {

            /* Move to next element. */
            it->is.ii++;
-        } else if (op->encoding == OBJ_ENCODING_HT) {
-            if (it->ht.de == NULL) return 0;
-            val->ele = dictGetKey(it->ht.de);
+        } else if (op->encoding == OBJ_ENCODING_HASHTABLE) {
+            void *next;
+            if (!hashtableNext(it->ht.iter, &next)) return 0;
+            val->ele = next;
            val->score = 1.0;
-
-            /* Move to next element. */
-            it->ht.de = dictNext(it->ht.di);
        } else if (op->encoding == OBJ_ENCODING_LISTPACK) {
            if (it->lp.p == NULL) return 0;
            val->estr = lpGetValue(it->lp.p, &val->elen, &val->ell);
--- a/src/zmalloc.c
+++ b/src/zmalloc.c
@ -451,15 +451,25 @@ void zmalloc_set_oom_handler(void (*oom_handler)(size_t)) {
    zmalloc_oom_handler = oom_handler;
 }

-/* Use 'MADV_DONTNEED' to release memory to operating system quickly.
- * We do that in a fork child process to avoid CoW when the parent modifies
- * these shared pages. */
-void zmadvise_dontneed(void *ptr) {
+/* Try to release pages back to the OS directly using 'MADV_DONTNEED' (bypassing
+ * the allocator) in a fork child process to avoid CoW when the parent modifies
+ * those shared pages. For small allocations, we can't release any full page,
+ * so in an effort to avoid getting the size of the allocation from the
+ * allocator (malloc_size) when we already know it's small, we check the
+ * size_hint. If the size is not already known, passing a size_hint of 0 will
+ * lead the checking the real size of the allocation.
+ * Also please note that the size may be not accurate, so in order to make this
+ * solution effective, the judgement for releasing memory pages should not be
+ * too strict. */
+void zmadvise_dontneed(void *ptr, size_t size_hint) {
 #if defined(USE_JEMALLOC) && defined(__linux__)
+    if (ptr == NULL) return;
+
    static size_t page_size = 0;
    if (page_size == 0) page_size = sysconf(_SC_PAGESIZE);
    size_t page_size_mask = page_size - 1;

+    if (size_hint && size_hint / 2 < page_size) return;
    size_t real_size = zmalloc_size(ptr);
    if (real_size < page_size) return;

@ -473,6 +483,7 @@ void zmadvise_dontneed(void *ptr) {
    }
 #else
    (void)(ptr);
+    (void)(size_hint);
 #endif
 }

--- a/src/zmalloc.h
+++ b/src/zmalloc.h
@ -139,7 +139,7 @@ size_t zmalloc_get_smap_bytes_by_field(char *field, long pid);
 size_t zmalloc_get_memory_size(void);
 void zlibc_free(void *ptr);
 void zlibc_trim(void);
-void zmadvise_dontneed(void *ptr);
+void zmadvise_dontneed(void *ptr, size_t size_hint);

 #ifndef HAVE_MALLOC_SIZE
 size_t zmalloc_size(void *ptr);
--- a/tests/unit/info.tcl
+++ b/tests/unit/info.tcl
@ -515,10 +515,10 @@ start_server {tags {"info" "external:skip"}} {
        set info_mem [r info memory]
        set mem_stats [r memory stats]
        assert_equal [getInfoProperty $info_mem mem_overhead_db_hashtable_rehashing] {0}
-        # overhead.db.hashtable.lut = memory overhead of hashset including hashset struct and tables
-        set hashset_overhead [dict get $mem_stats overhead.db.hashtable.lut]
-        if {$hashset_overhead < 140} {
-            # 32-bit version (hashset struct + 1 bucket of 64 bytes)
+        # overhead.db.hashtable.lut = memory overhead of hashtable including hashtable struct and tables
+        set hashtable_overhead [dict get $mem_stats overhead.db.hashtable.lut]
+        if {$hashtable_overhead < 140} {
+            # 32-bit version (hashtable struct + 1 bucket of 64 bytes)
            set bits 32
        } else {
            set bits 64
--- a/tests/unit/type/set.tcl
+++ b/tests/unit/type/set.tcl
@ -33,6 +33,7 @@ start_server {
        assert_equal {0 1} [r smismember myset bla foo]
        assert_equal {0} [r smismember myset bla]
        assert_equal "bar $initelems($type)" [lsort [r smembers myset]]
+        r memory usage myset
    }
    }

@ -51,6 +52,7 @@ start_server {
        assert_equal {0 1} [r smismember myset 18 16]
        assert_equal {0} [r smismember myset 18]
        assert_equal {16 17} [lsort [r smembers myset]]
+        r memory usage myset
    }

    test {SMISMEMBER SMEMBERS SCARD against non set} {
@ -1029,111 +1031,6 @@ foreach type {single multiple single_multiple} {
        r srem $myset {*}$members
    }

-    proc verify_rehashing_completed_key {myset table_size keys} {
-        set htstats [r debug HTSTATS-KEY $myset]
-        assert {![string match {*rehashing target*} $htstats]}
-        return {[string match {*table size: $table_size*number of elements: $keys*} $htstats]}
-    }
-
-    test "SRANDMEMBER with a dict containing long chain" {
-        set origin_save [config_get_set save ""]
-        set origin_max_lp [config_get_set set-max-listpack-entries 0]
-        set origin_save_delay [config_get_set rdb-key-save-delay 2147483647]
-
-        # 1) Create a hash set with 100000 members.
-        set members {}
-        for {set i 0} {$i < 100000} {incr i} {
-            lappend members [format "m:%d" $i]
-        }
-        create_set myset $members
-
-        # 2) Wait for the hash set rehashing to finish.
-        while {[is_rehashing myset]} {
-            r srandmember myset 100
-        }
-
-        # 3) Turn off the rehashing of this set, and remove the members to 500.
-        r bgsave
-        rem_hash_set_top_N myset [expr {[r scard myset] - 500}]
-        assert_equal [r scard myset] 500
-
-        # 4) Kill RDB child process to restart rehashing.
-        set pid1 [get_child_pid 0]
-        catch {exec kill -9 $pid1}
-        waitForBgsave r
-
-        # 5) Let the set hash to start rehashing
-        r spop myset 1
-        assert [is_rehashing myset]
-
-        # 6) Verify that when rdb saving is in progress, rehashing will still be performed (because
-        # the ratio is extreme) by waiting for it to finish during an active bgsave.
-        r bgsave
-
-        while {[is_rehashing myset]} {
-            r srandmember myset 1
-        }
-        if {$::verbose} {
-            puts [r debug HTSTATS-KEY myset full]
-        }
-
-        set pid1 [get_child_pid 0]
-        catch {exec kill -9 $pid1}
-        waitForBgsave r
-
-        # 7) Check that eventually, SRANDMEMBER returns all elements.
-        array set allmyset {}
-        foreach ele [r smembers myset] {
-            set allmyset($ele) 1
-        }
-        unset -nocomplain auxset
-        set iterations 1000
-        while {$iterations != 0} {
-            incr iterations -1
-            set res [r srandmember myset -10]
-            foreach ele $res {
-                set auxset($ele) 1
-            }
-            if {[lsort [array names allmyset]] eq
-                [lsort [array names auxset]]} {
-                break;
-            }
-        }
-        assert {$iterations != 0}
-
-        # 8) Remove the members to 30 in order to calculate the value of Chi-Square Distribution,
-        #    otherwise we would need more iterations.
-        rem_hash_set_top_N myset [expr {[r scard myset] - 30}]
-        assert_equal [r scard myset] 30
-        
-        # Hash set rehashing would be completed while removing members from the `myset`
-        # We also check the size and members in the hash table.
-        verify_rehashing_completed_key myset 64 30
-
-        # Now that we have a hash set with only one long chain bucket.
-        set htstats [r debug HTSTATS-KEY myset full]
-        assert {[regexp {different slots: ([0-9]+)} $htstats - different_slots]}
-        assert {[regexp {max chain length: ([0-9]+)} $htstats - max_chain_length]}
-        assert {$different_slots == 1 && $max_chain_length == 30}
-
-        # 9) Use positive count (PATH 4) to get 10 elements (out of 30) each time.
-        unset -nocomplain allkey
-        set iterations 1000
-        while {$iterations != 0} {
-            incr iterations -1
-            set res [r srandmember myset 10]
-            foreach ele $res {
-                lappend allkey $ele
-            }
-        }
-        # validate even distribution of random sampling (df = 29, 73 means 0.00001 probability)
-        assert_lessthan [chi_square_value $allkey] 73
-
-        r config set save $origin_save
-        r config set set-max-listpack-entries $origin_max_lp
-        r config set rdb-key-save-delay $origin_save_delay
-    } {OK} {needs:debug slow}
-
    proc setup_move {} {
        r del myset3{t} myset4{t}
        create_set myset1{t} {1 a b}