Hashtable implementation including unit tests

A cache-line aware hash table with a user-defined key-value entry type, supporting incremental rehashing, scan, iterator, random sampling, incremental lookup and more... Signed-off-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
2024-11-18 10:29:49 +01:00 · 2024-11-18 10:29:49 +01:00 · c8ee5c2c46
commit c8ee5c2c46
parent b4c2a1804a
6 changed files with 3194 additions and 1 deletions
--- a/cmake/Modules/SourceFiles.cmake
+++ b/cmake/Modules/SourceFiles.cmake
@ -10,6 +10,7 @@ set(VALKEY_SERVER_SRCS
    ${CMAKE_SOURCE_DIR}/src/ae.c
    ${CMAKE_SOURCE_DIR}/src/anet.c
    ${CMAKE_SOURCE_DIR}/src/dict.c
+    ${CMAKE_SOURCE_DIR}/src/hashtable.c
    ${CMAKE_SOURCE_DIR}/src/kvstore.c
    ${CMAKE_SOURCE_DIR}/src/sds.c
    ${CMAKE_SOURCE_DIR}/src/zmalloc.c
--- a/src/Makefile
+++ b/src/Makefile
@ -411,7 +411,7 @@ endif
 ENGINE_NAME=valkey
 SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX)
 ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX)
-ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o rdma.o
+ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o rdma.o
 ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX)
 ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
 ENGINE_BENCHMARK_NAME=$(ENGINE_NAME)-benchmark$(PROG_SUFFIX)
--- a/src/hashtable.c
+++ b/src/hashtable.c
--- a/src/hashtable.h
+++ b/src/hashtable.h
@ -0,0 +1,167 @@
+#ifndef HASHTABLE_H
+#define HASHTABLE_H
+
+/* Hash table implementation.
+ *
+ * This is a cache-friendly hash table implementation. For details about the
+ * implementation and documentation of functions, see comments in hashtable.c.
+ *
+ * The entries in a hashtable are of a user-defined type, but an entry needs to
+ * contain a key. It can represent a key-value entry, or it can be just a key,
+ * if set semantics are desired.
+ *
+ * Terminology:
+ *
+ * hashtable
+ *         An instance of the data structure.
+ *
+ * entry
+ *         An entry in the hashtable. This may be of the same type as the key,
+ *         or a struct containing a key and other fields.
+ * key
+ *         The part of the entry used for looking the entry up in the hashtable.
+ *         May be the entire entry or a struct field within the entry.
+ *
+ * type
+ *         A struct containing callbacks, such as hash function, key comparison
+ *         function and how to get the key in an entry.
+ */
+
+#include "fmacros.h"
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+
+/* --- Opaque types --- */
+
+typedef struct hashtable hashtable;
+typedef struct hashtableStats hashtableStats;
+
+/* Can types that can be stack allocated. */
+typedef uint64_t hashtableIterator[5];
+typedef uint64_t hashtablePosition[2];
+typedef uint64_t hashtableIncrementalFindState[5];
+
+/* --- Non-opaque types --- */
+
+/* The hashtableType is a set of callbacks for a hashtable. All callbacks are
+ * optional. With all callbacks omitted, the hashtable is effectively a set of
+ * pointer-sized integers. */
+typedef struct {
+    /* If the type of an entry is not the same as the type of a key used for
+     * lookup, this callback needs to return the key within an entry. */
+    const void *(*entryGetKey)(const void *entry);
+    /* Hash function. Defaults to hashing the bits in the pointer, effectively
+     * treating the pointer as an integer. */
+    uint64_t (*hashFunction)(const void *key);
+    /* Compare function, returns 0 if the keys are equal. Defaults to just
+     * comparing the pointers for equality. */
+    int (*keyCompare)(const void *key1, const void *key2);
+    /* Callback to free an entry when it's overwritten or deleted.
+     * Optional. */
+    void (*entryDestructor)(void *entry);
+    /* Callback to control when resizing should be allowed. */
+    int (*resizeAllowed)(size_t moreMem, double usedRatio);
+    /* Invoked at the start of rehashing. */
+    void (*rehashingStarted)(hashtable *ht);
+    /* Invoked at the end of rehashing. */
+    void (*rehashingCompleted)(hashtable *ht);
+    /* Track memory usage using this callback. It is called with a positive
+     * number when the hashtable allocates some memory and with a negative number
+     * when freeing. */
+    void (*trackMemUsage)(hashtable *ht, ssize_t delta);
+    /* Allow a hashtable to carry extra caller-defined metadata. The extra memory
+     * is initialized to 0. */
+    size_t (*getMetadataSize)(void);
+    /* Flag to disable incremental rehashing */
+    unsigned instant_rehashing : 1;
+} hashtableType;
+
+typedef enum {
+    HASHTABLE_RESIZE_ALLOW = 0,
+    HASHTABLE_RESIZE_AVOID,
+    HASHTABLE_RESIZE_FORBID,
+} hashtableResizePolicy;
+
+typedef void (*hashtableScanFunction)(void *privdata, void *entry);
+
+/* Constants */
+#define HASHTABLE_BUCKET_SIZE 64 /* bytes, the most common cache line size */
+
+/* Scan flags */
+#define HASHTABLE_SCAN_EMIT_REF (1 << 0)
+
+/* --- Prototypes --- */
+
+/* Hash function (global seed) */
+void hashtableSetHashFunctionSeed(const uint8_t *seed);
+uint8_t *hashtableGetHashFunctionSeed(void);
+uint64_t hashtableGenHashFunction(const char *buf, size_t len);
+uint64_t hashtableGenCaseHashFunction(const char *buf, size_t len);
+
+/* Global resize policy */
+void hashtableSetResizePolicy(hashtableResizePolicy policy);
+
+/* Hashtable instance */
+hashtable *hashtableCreate(hashtableType *type);
+void hashtableRelease(hashtable *ht);
+void hashtableEmpty(hashtable *ht, void(callback)(hashtable *));
+hashtableType *hashtableGetType(hashtable *ht);
+void *hashtableMetadata(hashtable *ht);
+size_t hashtableSize(hashtable *ht);
+size_t hashtableBuckets(hashtable *ht);
+size_t hashtableChainedBuckets(hashtable *ht, int table);
+size_t hashtableMemUsage(hashtable *ht);
+void hashtablePauseAutoShrink(hashtable *ht);
+void hashtableResumeAutoShrink(hashtable *ht);
+int hashtableIsRehashing(hashtable *ht);
+int hashtableIsRehashingPaused(hashtable *ht);
+void hashtableRehashingInfo(hashtable *ht, size_t *from_size, size_t *to_size);
+int hashtableRehashMicroseconds(hashtable *ht, uint64_t us);
+int hashtableExpand(hashtable *ht, size_t size);
+int hashtableTryExpand(hashtable *ht, size_t size);
+int hashtableExpandIfNeeded(hashtable *ht);
+int hashtableShrinkIfNeeded(hashtable *ht);
+hashtable *hashtableDefragTables(hashtable *ht, void *(*defragfn)(void *));
+
+/* Entries */
+int hashtableFind(hashtable *ht, const void *key, void **found);
+void **hashtableFindRef(hashtable *ht, const void *key);
+int hashtableAdd(hashtable *ht, void *entry);
+int hashtableAddOrFind(hashtable *ht, void *entry, void **existing);
+int hashtableFindPositionForInsert(hashtable *ht, void *key, hashtablePosition *position, void **existing);
+void hashtableInsertAtPosition(hashtable *ht, void *entry, hashtablePosition *position);
+int hashtablePop(hashtable *ht, const void *key, void **popped);
+int hashtableDelete(hashtable *ht, const void *key);
+void **hashtableTwoPhasePopFindRef(hashtable *ht, const void *key, hashtablePosition *position);
+void hashtableTwoPhasePopDelete(hashtable *ht, hashtablePosition *position);
+int hashtableReplaceReallocatedEntry(hashtable *ht, const void *old_entry, void *new_entry);
+void hashtableIncrementalFindInit(hashtableIncrementalFindState *state, hashtable *ht, const void *key);
+int hashtableIncrementalFindStep(hashtableIncrementalFindState *state);
+int hashtableIncrementalFindGetResult(hashtableIncrementalFindState *state, void **found);
+
+/* Iteration & scan */
+size_t hashtableScan(hashtable *ht, size_t cursor, hashtableScanFunction fn, void *privdata);
+size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction fn, void *privdata, void *(*defragfn)(void *), int flags);
+void hashtableInitIterator(hashtableIterator *iter, hashtable *ht);
+void hashtableInitSafeIterator(hashtableIterator *iter, hashtable *ht);
+void hashtableResetIterator(hashtableIterator *iter);
+hashtableIterator *hashtableCreateIterator(hashtable *ht);
+hashtableIterator *hashtableCreateSafeIterator(hashtable *ht);
+void hashtableReleaseIterator(hashtableIterator *iter);
+int hashtableNext(hashtableIterator *iter, void **elemptr);
+
+/* Random entries */
+int hashtableRandomEntry(hashtable *ht, void **found);
+int hashtableFairRandomEntry(hashtable *ht, void **found);
+unsigned hashtableSampleEntries(hashtable *ht, void **dst, unsigned count);
+
+/* Debug & stats */
+
+void hashtableFreeStats(hashtableStats *stats);
+void hashtableCombineStats(hashtableStats *from, hashtableStats *into);
+hashtableStats *hashtableGetStatsHt(hashtable *ht, int htidx, int full);
+size_t hashtableGetStatsMsg(char *buf, size_t bufsize, hashtableStats *stats, int full);
+void hashtableGetStats(char *buf, size_t bufsize, hashtable *ht, int full);
+
+#endif /* HASHTABLE_H */
--- a/src/unit/test_files.h
+++ b/src/unit/test_files.h
@ -19,6 +19,22 @@ int test_dictDisableResizeReduceTo3(int argc, char **argv, int flags);
 int test_dictDeleteOneKeyTriggerResizeAgain(int argc, char **argv, int flags);
 int test_dictBenchmark(int argc, char **argv, int flags);
 int test_endianconv(int argc, char *argv[], int flags);
+int test_cursor(int argc, char **argv, int flags);
+int test_set_hash_function_seed(int argc, char **argv, int flags);
+int test_add_find_delete(int argc, char **argv, int flags);
+int test_add_find_delete_avoid_resize(int argc, char **argv, int flags);
+int test_instant_rehashing(int argc, char **argv, int flags);
+int test_bucket_chain_length(int argc, char **argv, int flags);
+int test_two_phase_insert_and_pop(int argc, char **argv, int flags);
+int test_replace_reallocated_entry(int argc, char **argv, int flags);
+int test_incremental_find(int argc, char **argv, int flags);
+int test_scan(int argc, char **argv, int flags);
+int test_iterator(int argc, char **argv, int flags);
+int test_safe_iterator(int argc, char **argv, int flags);
+int test_compact_bucket_chain(int argc, char **argv, int flags);
+int test_random_entry(int argc, char **argv, int flags);
+int test_random_entry_with_long_chain(int argc, char **argv, int flags);
+int test_all_memory_freed(int argc, char **argv, int flags);
 int test_intsetValueEncodings(int argc, char **argv, int flags);
 int test_intsetBasicAdding(int argc, char **argv, int flags);
 int test_intsetLargeNumberRandomAdd(int argc, char **argv, int flags);
@ -215,6 +231,7 @@ unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}};
 unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}};
 unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}};
 unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, NULL}};
+unitTest __test_hashtable_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_bucket_chain_length", test_bucket_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_replace_reallocated_entry", test_replace_reallocated_entry}, {"test_incremental_find", test_incremental_find}, {"test_scan", test_scan}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_compact_bucket_chain", test_compact_bucket_chain}, {"test_random_entry", test_random_entry}, {"test_random_entry_with_long_chain", test_random_entry_with_long_chain}, {"test_all_memory_freed", test_all_memory_freed}, {NULL, NULL}};
 unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}};
 unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict}, {NULL, NULL}};
 unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}};
@ -237,6 +254,7 @@ struct unitTestSuite {
    {"test_crc64combine.c", __test_crc64combine_c},
    {"test_dict.c", __test_dict_c},
    {"test_endianconv.c", __test_endianconv_c},
+    {"test_hashtable.c", __test_hashtable_c},
    {"test_intset.c", __test_intset_c},
    {"test_kvstore.c", __test_kvstore_c},
    {"test_listpack.c", __test_listpack_c},
--- a/src/unit/test_hashtable.c
+++ b/src/unit/test_hashtable.c
@ -0,0 +1,869 @@
+#include "../hashtable.h"
+#include "test_help.h"
+#include "../mt19937-64.h"
+#include "../zmalloc.h"
+#include "../monotonic.h"
+
+#include <stdio.h>
+#include <limits.h>
+#include <string.h>
+#include <math.h>
+
+/* Global variable to test the memory tracking callback. */
+static size_t mem_usage;
+
+/* From util.c: getRandomBytes to seed hash function. */
+void getRandomBytes(unsigned char *p, size_t len);
+
+/* Init hash function salt and seed random generator. */
+static void randomSeed(void) {
+    unsigned long long seed;
+    getRandomBytes((void *)&seed, sizeof(seed));
+    init_genrand64(seed);
+    srandom((unsigned)seed);
+}
+
+/* An entry holding a string key and a string value in one allocation. */
+typedef struct {
+    unsigned int keysize; /* Sizes, including null-terminator */
+    unsigned int valsize;
+    char data[]; /* key and value */
+} keyval;
+
+static keyval *create_keyval(const char *key, const char *val) {
+    size_t keysize = strlen(key) + 1;
+    size_t valsize = strlen(val) + 1;
+    keyval *e = malloc(sizeof(keyval) + keysize + valsize);
+    e->keysize = keysize;
+    e->valsize = valsize;
+    memcpy(e->data, key, keysize);
+    memcpy(e->data + keysize, val, valsize);
+    return e;
+}
+
+static const void *getkey(const void *entry) {
+    const keyval *e = entry;
+    return e->data;
+}
+
+static const void *getval(const void *entry) {
+    const keyval *e = entry;
+    return e->data + e->keysize;
+}
+
+static uint64_t hashfunc(const void *key) {
+    return hashtableGenHashFunction(key, strlen(key));
+}
+
+static int keycmp(const void *key1, const void *key2) {
+    return strcmp(key1, key2);
+}
+
+static void freekeyval(void *keyval) {
+    free(keyval);
+}
+
+static void trackmemusage(hashtable *ht, ssize_t delta) {
+    UNUSED(ht);
+    mem_usage += delta;
+}
+
+/* Hashtable type used for some of the tests. */
+static hashtableType keyval_type = {
+    .entryGetKey = getkey,
+    .hashFunction = hashfunc,
+    .keyCompare = keycmp,
+    .entryDestructor = freekeyval,
+    .trackMemUsage = trackmemusage,
+};
+
+/* Callback for testing hashtableEmpty(). */
+static long empty_callback_call_counter;
+void emptyCallback(hashtable *ht) {
+    UNUSED(ht);
+    empty_callback_call_counter++;
+}
+
+/* Prototypes for debugging */
+void hashtableDump(hashtable *ht);
+void hashtableHistogram(hashtable *ht);
+int hashtableLongestBucketChain(hashtable *ht);
+size_t nextCursor(size_t v, size_t mask);
+
+int test_cursor(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+    TEST_ASSERT(nextCursor(0x0000, 0xffff) == 0x8000);
+    TEST_ASSERT(nextCursor(0x8000, 0xffff) == 0x4000);
+    TEST_ASSERT(nextCursor(0x4001, 0xffff) == 0xc001);
+    TEST_ASSERT(nextCursor(0xffff, 0xffff) == 0x0000);
+    return 0;
+}
+
+int test_set_hash_function_seed(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+    randomSeed();
+    return 0;
+}
+
+static int add_find_delete_test_helper(int flags) {
+    int count = (flags & UNIT_TEST_ACCURATE) ? 1000000 : 200;
+    TEST_ASSERT(mem_usage == 0);
+    hashtable *ht = hashtableCreate(&keyval_type);
+    int j;
+
+    /* Add */
+    for (j = 0; j < count; j++) {
+        char key[32], val[32];
+        snprintf(key, sizeof(key), "%d", j);
+        snprintf(val, sizeof(val), "%d", count - j + 42);
+        keyval *e = create_keyval(key, val);
+        TEST_ASSERT(hashtableAdd(ht, e));
+    }
+    TEST_ASSERT(hashtableMemUsage(ht) == mem_usage);
+
+    if (count < 1000) {
+        hashtableHistogram(ht);
+        printf("Mem usage: %zu\n", hashtableMemUsage(ht));
+    }
+
+    /* Find */
+    for (j = 0; j < count; j++) {
+        char key[32], val[32];
+        snprintf(key, sizeof(key), "%d", j);
+        snprintf(val, sizeof(val), "%d", count - j + 42);
+        void *found;
+        TEST_ASSERT(hashtableFind(ht, key, &found));
+        keyval *e = found;
+        TEST_ASSERT(!strcmp(val, getval(e)));
+    }
+
+    /* Delete half of them */
+    for (j = 0; j < count / 2; j++) {
+        char key[32];
+        snprintf(key, sizeof(key), "%d", j);
+        if (j % 3 == 0) {
+            /* Test hashtablePop */
+            char val[32];
+            snprintf(val, sizeof(val), "%d", count - j + 42);
+            void *popped;
+            TEST_ASSERT(hashtablePop(ht, key, &popped));
+            keyval *e = popped;
+            TEST_ASSERT(!strcmp(val, getval(e)));
+            free(e);
+        } else {
+            TEST_ASSERT(hashtableDelete(ht, key));
+        }
+    }
+    TEST_ASSERT(hashtableMemUsage(ht) == mem_usage);
+
+    /* Empty, i.e. delete remaining entries, with progress callback. */
+    empty_callback_call_counter = 0;
+    hashtableEmpty(ht, emptyCallback);
+    TEST_ASSERT(empty_callback_call_counter > 0);
+
+    /* Release memory */
+    hashtableRelease(ht);
+    TEST_ASSERT(mem_usage == 0);
+    return 0;
+}
+
+int test_add_find_delete(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    TEST_ASSERT(add_find_delete_test_helper(flags) == 0);
+    TEST_ASSERT(zmalloc_used_memory() == 0);
+    return 0;
+}
+
+int test_add_find_delete_avoid_resize(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    hashtableSetResizePolicy(HASHTABLE_RESIZE_AVOID);
+    TEST_ASSERT(add_find_delete_test_helper(flags) == 0);
+    hashtableSetResizePolicy(HASHTABLE_RESIZE_ALLOW);
+    TEST_ASSERT(zmalloc_used_memory() == 0);
+    return 0;
+}
+
+int test_instant_rehashing(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    long count = 200;
+
+    /* A set of longs, i.e. pointer-sized values. */
+    hashtableType type = {.instant_rehashing = 1};
+    hashtable *ht = hashtableCreate(&type);
+    long j;
+
+    /* Populate and check that rehashing is never ongoing. */
+    for (j = 0; j < count; j++) {
+        TEST_ASSERT(hashtableAdd(ht, (void *)j));
+        TEST_ASSERT(!hashtableIsRehashing(ht));
+    }
+
+    /* Delete and check that rehashing is never ongoing. */
+    for (j = 0; j < count; j++) {
+        TEST_ASSERT(hashtableDelete(ht, (void *)j));
+        TEST_ASSERT(!hashtableIsRehashing(ht));
+    }
+
+    hashtableRelease(ht);
+    return 0;
+}
+
+int test_bucket_chain_length(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    unsigned long count = 1000000;
+
+    /* A set of longs, i.e. pointer-sized integer values. */
+    hashtableType type = {0};
+    hashtable *ht = hashtableCreate(&type);
+    unsigned long j;
+    for (j = 0; j < count; j++) {
+        TEST_ASSERT(hashtableAdd(ht, (void *)j));
+    }
+    /* If it's rehashing, add a few more until rehashing is complete. */
+    while (hashtableIsRehashing(ht)) {
+        j++;
+        TEST_ASSERT(hashtableAdd(ht, (void *)j));
+    }
+    TEST_ASSERT(j < count * 2);
+    int max_chainlen_not_rehashing = hashtableLongestBucketChain(ht);
+    TEST_ASSERT(max_chainlen_not_rehashing < 10);
+
+    /* Add more until rehashing starts again. */
+    while (!hashtableIsRehashing(ht)) {
+        j++;
+        TEST_ASSERT(hashtableAdd(ht, (void *)j));
+    }
+    TEST_ASSERT(j < count * 2);
+    int max_chainlen_rehashing = hashtableLongestBucketChain(ht);
+    TEST_ASSERT(max_chainlen_rehashing < 10);
+
+    hashtableRelease(ht);
+    return 0;
+}
+
+int test_two_phase_insert_and_pop(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    int count = (flags & UNIT_TEST_ACCURATE) ? 1000000 : 200;
+    hashtable *ht = hashtableCreate(&keyval_type);
+    int j;
+
+    /* hashtableFindPositionForInsert + hashtableInsertAtPosition */
+    for (j = 0; j < count; j++) {
+        char key[32], val[32];
+        snprintf(key, sizeof(key), "%d", j);
+        snprintf(val, sizeof(val), "%d", count - j + 42);
+        hashtablePosition position;
+        int ret = hashtableFindPositionForInsert(ht, key, &position, NULL);
+        TEST_ASSERT(ret == 1);
+        keyval *e = create_keyval(key, val);
+        hashtableInsertAtPosition(ht, e, &position);
+    }
+
+    if (count < 1000) {
+        hashtableHistogram(ht);
+    }
+
+    /* Check that all entries were inserted. */
+    for (j = 0; j < count; j++) {
+        char key[32], val[32];
+        snprintf(key, sizeof(key), "%d", j);
+        snprintf(val, sizeof(val), "%d", count - j + 42);
+        void *found;
+        TEST_ASSERT(hashtableFind(ht, key, &found));
+        keyval *e = found;
+        TEST_ASSERT(!strcmp(val, getval(e)));
+    }
+
+    /* Test two-phase pop. */
+    for (j = 0; j < count; j++) {
+        char key[32], val[32];
+        snprintf(key, sizeof(key), "%d", j);
+        snprintf(val, sizeof(val), "%d", count - j + 42);
+        hashtablePosition position;
+        size_t size_before_find = hashtableSize(ht);
+        void **ref = hashtableTwoPhasePopFindRef(ht, key, &position);
+        TEST_ASSERT(ref != NULL);
+        keyval *e = *ref;
+        TEST_ASSERT(!strcmp(val, getval(e)));
+        TEST_ASSERT(hashtableSize(ht) == size_before_find);
+        hashtableTwoPhasePopDelete(ht, &position);
+        TEST_ASSERT(hashtableSize(ht) == size_before_find - 1);
+    }
+    TEST_ASSERT(hashtableSize(ht) == 0);
+
+    hashtableRelease(ht);
+    return 0;
+}
+
+int test_replace_reallocated_entry(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    int count = 100, j;
+    hashtable *ht = hashtableCreate(&keyval_type);
+
+    /* Add */
+    for (j = 0; j < count; j++) {
+        char key[32], val[32];
+        snprintf(key, sizeof(key), "%d", j);
+        snprintf(val, sizeof(val), "%d", count - j + 42);
+        keyval *e = create_keyval(key, val);
+        TEST_ASSERT(hashtableAdd(ht, e));
+    }
+
+    /* Find and replace */
+    for (j = 0; j < count; j++) {
+        char key[32], val[32];
+        snprintf(key, sizeof(key), "%d", j);
+        snprintf(val, sizeof(val), "%d", count - j + 42);
+        void *found;
+        TEST_ASSERT(hashtableFind(ht, key, &found));
+        keyval *old = found;
+        TEST_ASSERT(strcmp(getkey(old), key) == 0);
+        TEST_ASSERT(strcmp(getval(old), val) == 0);
+        snprintf(val, sizeof(val), "%d", j + 1234);
+        keyval *new = create_keyval(key, val);
+        /* If we free 'old' before the call to hashtableReplaceReallocatedEntry,
+         * we get a use-after-free warning, so instead we just overwrite it with
+         * junk. The purpose is to verify that the function doesn't use the
+         * memory it points to. */
+        memset(old->data, 'x', old->keysize + old->valsize);
+        TEST_ASSERT(hashtableReplaceReallocatedEntry(ht, old, new));
+        free(old);
+    }
+
+    /* Check */
+    for (j = 0; j < count; j++) {
+        char key[32], val[32];
+        snprintf(key, sizeof(key), "%d", j);
+        snprintf(val, sizeof(val), "%d", j + 1234);
+        void *found;
+        TEST_ASSERT(hashtableFind(ht, key, &found));
+        keyval *e = found;
+        TEST_ASSERT(!strcmp(val, getval(e)));
+    }
+
+    hashtableRelease(ht);
+    TEST_ASSERT(zmalloc_used_memory() == 0);
+    return 0;
+}
+
+int test_incremental_find(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    size_t count = 2000000;
+    uint8_t element_array[count];
+    memset(element_array, 0, sizeof element_array);
+
+    /* A set of uint8_t pointers */
+    hashtableType type = {0};
+    hashtable *ht = hashtableCreate(&type);
+
+    /* Populate */
+    for (size_t j = 0; j < count; j++) {
+        TEST_ASSERT(hashtableAdd(ht, element_array + j));
+    }
+
+    monotime timer;
+    monotonicInit();
+
+    /* Compare to looking up one by one. */
+    elapsedStart(&timer);
+    for (size_t i = 0; i < count; i++) {
+        uint8_t *key = &element_array[i];
+        void *found;
+        TEST_ASSERT(hashtableFind(ht, key, &found) == 1);
+        TEST_ASSERT(found == key);
+    }
+    uint64_t us2 = elapsedUs(timer);
+    TEST_PRINT_INFO("Lookup %zu elements one by one took %lu microseconds.",
+                    count, (unsigned long)us2);
+
+    /* Lookup elements in batches. */
+    for (size_t batch_size = 1; batch_size <= 64; batch_size *= 2) {
+        elapsedStart(&timer);
+        for (size_t batch = 0; batch < count / batch_size; batch++) {
+            /* Init batches. */
+            hashtableIncrementalFindState states[batch_size];
+            for (size_t i = 0; i < batch_size; i++) {
+                void *key = &element_array[batch * batch_size + i];
+                hashtableIncrementalFindInit(&states[i], ht, key);
+            }
+            /* Work on batches in round-robin order until all are done. */
+            size_t num_left;
+            do {
+                num_left = batch_size;
+                for (size_t i = 0; i < batch_size; i++) {
+                    if (hashtableIncrementalFindStep(&states[i]) == 0) {
+                        num_left--;
+                    }
+                }
+            } while (num_left > 0);
+
+            /* Fetch results. */
+            for (size_t i = 0; i < batch_size; i++) {
+                void *found;
+                TEST_ASSERT(hashtableIncrementalFindGetResult(&states[i], &found) == 1);
+                TEST_ASSERT(found == &element_array[batch * batch_size + i]);
+            }
+        }
+        uint64_t us1 = elapsedUs(timer);
+        TEST_PRINT_INFO("Lookup %zu elements in batches of %zu took %lu microseconds.",
+                        count, batch_size, (unsigned long)us1);
+    }
+
+    hashtableRelease(ht);
+    return 0;
+}
+
+typedef struct {
+    long count;
+    uint8_t entry_seen[];
+} scandata;
+
+void scanfn(void *privdata, void *entry) {
+    scandata *data = (scandata *)privdata;
+    unsigned long j = (unsigned long)entry;
+    data->entry_seen[j]++;
+    data->count++;
+}
+
+int test_scan(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    long num_entries = (flags & UNIT_TEST_LARGE_MEMORY) ? 1000000 : 200000;
+    int num_rounds = (flags & UNIT_TEST_ACCURATE) ? 20 : 5;
+
+    /* A set of longs, i.e. pointer-sized values. */
+    hashtableType type = {0};
+    long j;
+
+    for (int round = 0; round < num_rounds; round++) {
+        /* First round count = num_entries, then some more. */
+        long count = num_entries * (1 + 2 * (double)round / num_rounds);
+
+        /* Seed, to make sure each round is different. */
+        randomSeed();
+
+        /* Populate */
+        hashtable *ht = hashtableCreate(&type);
+        for (j = 0; j < count; j++) {
+            TEST_ASSERT(hashtableAdd(ht, (void *)j));
+        }
+
+        /* Scan */
+        scandata *data = calloc(1, sizeof(scandata) + count);
+        long max_entries_per_cycle = 0;
+        unsigned num_cycles = 0;
+        long scanned_count = 0;
+        size_t cursor = 0;
+        do {
+            data->count = 0;
+            cursor = hashtableScan(ht, cursor, scanfn, data);
+            if (data->count > max_entries_per_cycle) {
+                max_entries_per_cycle = data->count;
+            }
+            scanned_count += data->count;
+            data->count = 0;
+            num_cycles++;
+        } while (cursor != 0);
+
+        /* Verify that every entry was returned exactly once. */
+        TEST_ASSERT(scanned_count == count);
+        for (j = 0; j < count; j++) {
+            TEST_ASSERT(data->entry_seen[j] >= 1);
+            TEST_ASSERT(data->entry_seen[j] <= 2);
+        }
+
+        /* Print some information for curious readers. */
+        TEST_PRINT_INFO("Scanned %ld; max emitted per call: %ld; avg emitted per call: %.2lf",
+                        count, max_entries_per_cycle, (double)count / num_cycles);
+
+        /* Cleanup */
+        hashtableRelease(ht);
+        free(data);
+    }
+    return 0;
+}
+
+typedef struct {
+    uint64_t value;
+    uint64_t hash;
+} mock_hash_entry;
+
+static mock_hash_entry *mock_hash_entry_create(uint64_t value, uint64_t hash) {
+    mock_hash_entry *entry = malloc(sizeof(mock_hash_entry));
+    entry->value = value;
+    entry->hash = hash;
+    return entry;
+}
+
+static uint64_t mock_hash_entry_get_hash(const void *entry) {
+    if (entry == NULL) return 0UL;
+    mock_hash_entry *mock = (mock_hash_entry *)entry;
+    return (mock->hash != 0) ? mock->hash : mock->value;
+}
+
+int test_iterator(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    size_t count = 2000000;
+    uint8_t entry_array[count];
+    memset(entry_array, 0, sizeof entry_array);
+
+    /* A set of uint8_t pointers */
+    hashtableType type = {0};
+    hashtable *ht = hashtableCreate(&type);
+
+    /* Populate */
+    for (size_t j = 0; j < count; j++) {
+        TEST_ASSERT(hashtableAdd(ht, entry_array + j));
+    }
+
+    /* Iterate */
+    size_t num_returned = 0;
+    hashtableIterator iter;
+    void *next;
+    hashtableInitIterator(&iter, ht);
+    while (hashtableNext(&iter, &next)) {
+        uint8_t *entry = next;
+        num_returned++;
+        TEST_ASSERT(entry >= entry_array && entry < entry_array + count);
+        /* increment entry at this position as a counter */
+        (*entry)++;
+    }
+    hashtableResetIterator(&iter);
+
+    /* Check that all entries were returned exactly once. */
+    TEST_ASSERT(num_returned == count);
+    for (size_t j = 0; j < count; j++) {
+        if (entry_array[j] != 1) {
+            printf("Entry %zu returned %d times\n", j, entry_array[j]);
+            return 0;
+        }
+    }
+
+    hashtableRelease(ht);
+    return 0;
+}
+
+int test_safe_iterator(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    size_t count = 1000;
+    uint8_t entry_counts[count * 2];
+    memset(entry_counts, 0, sizeof entry_counts);
+
+    /* A set of pointers into the uint8_t array. */
+    hashtableType type = {0};
+    hashtable *ht = hashtableCreate(&type);
+
+    /* Populate */
+    for (size_t j = 0; j < count; j++) {
+        TEST_ASSERT(hashtableAdd(ht, entry_counts + j));
+    }
+
+    /* Iterate */
+    size_t num_returned = 0;
+    hashtableIterator iter;
+    void *next;
+    hashtableInitSafeIterator(&iter, ht);
+    while (hashtableNext(&iter, &next)) {
+        uint8_t *entry = next;
+        size_t index = entry - entry_counts;
+        num_returned++;
+        TEST_ASSERT(entry >= entry_counts && entry < entry_counts + count * 2);
+        /* increment entry at this position as a counter */
+        (*entry)++;
+        if (index % 4 == 0) {
+            TEST_ASSERT(hashtableDelete(ht, entry));
+        }
+        /* Add new item each time we see one of the original items */
+        if (index < count) {
+            TEST_ASSERT(hashtableAdd(ht, entry + count));
+        }
+    }
+    hashtableResetIterator(&iter);
+
+    /* Check that all entries present during the whole iteration were returned
+     * exactly once. (Some are deleted after being returned.) */
+    TEST_ASSERT(num_returned >= count);
+    for (size_t j = 0; j < count; j++) {
+        if (entry_counts[j] != 1) {
+            printf("Entry %zu returned %d times\n", j, entry_counts[j]);
+            return 0;
+        }
+    }
+    /* Check that entries inserted during the iteration were returned at most
+     * once. */
+    unsigned long num_optional_returned = 0;
+    for (size_t j = count; j < count * 2; j++) {
+        TEST_ASSERT(entry_counts[j] <= 1);
+        num_optional_returned += entry_counts[j];
+    }
+    printf("Safe iterator returned %lu of the %zu entries inserted while iterating.\n", num_optional_returned, count);
+
+    hashtableRelease(ht);
+    return 0;
+}
+
+int test_compact_bucket_chain(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    /* Create a table with only one bucket chain. */
+    hashtableSetResizePolicy(HASHTABLE_RESIZE_AVOID);
+    unsigned long count = 30;
+
+    hashtableType type = {0};
+    hashtable *ht = hashtableCreate(&type);
+
+    /* Populate */
+    unsigned long j;
+    for (j = 0; j < count; j++) {
+        TEST_ASSERT(hashtableAdd(ht, (void *)j));
+    }
+    TEST_ASSERT(hashtableBuckets(ht) == 1);
+    printf("Populated a single bucket chain, avoiding resize.\n");
+    hashtableHistogram(ht);
+
+    /* Delete half of the entries while iterating. */
+    size_t num_chained_buckets = hashtableChainedBuckets(ht, 0);
+    size_t num_returned = 0;
+    hashtableIterator iter;
+    hashtableInitSafeIterator(&iter, ht);
+    void *entry;
+    while (hashtableNext(&iter, &entry)) {
+        /* As long as the iterator is still returning entries from the same
+         * bucket chain, the bucket chain is not compacted, so it still has the
+         * same number of buckets. */
+        TEST_ASSERT(hashtableChainedBuckets(ht, 0) == num_chained_buckets);
+        num_returned++;
+        if (num_returned % 2 == 0) {
+            TEST_ASSERT(hashtableDelete(ht, entry));
+        }
+        if (num_returned == count) {
+            printf("Last iteration. Half of them have been deleted.\n");
+            hashtableHistogram(ht);
+        }
+    }
+    hashtableResetIterator(&iter);
+
+    /* Verify that the bucket chain has been compacted by filling the holes and
+     * freeing empty child buckets. */
+    printf("When the iterator leaves the bucket chain, compaction should happen.\n");
+    hashtableHistogram(ht);
+    TEST_ASSERT(hashtableChainedBuckets(ht, 0) < num_chained_buckets);
+
+    hashtableRelease(ht);
+    hashtableSetResizePolicy(HASHTABLE_RESIZE_ALLOW);
+    TEST_ASSERT(zmalloc_used_memory() == 0);
+    return 0;
+}
+
+int test_random_entry(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+    randomSeed();
+
+    size_t count = (flags & UNIT_TEST_LARGE_MEMORY) ? 7000 : 400;
+    long num_rounds = (flags & UNIT_TEST_ACCURATE) ? 1000000 : 10000;
+
+    /* A set of ints */
+    hashtableType type = {0};
+    hashtable *ht = hashtableCreate(&type);
+
+    /* Populate */
+    unsigned times_picked[count];
+    memset(times_picked, 0, sizeof(times_picked));
+    for (size_t j = 0; j < count; j++) {
+        TEST_ASSERT(hashtableAdd(ht, times_picked + j));
+    }
+
+    /* Pick entries, and count how many times each entry is picked. */
+    for (long i = 0; i < num_rounds; i++) {
+        /* Using void* variable to avoid a cast that violates strict aliasing */
+        void *entry;
+        TEST_ASSERT(hashtableFairRandomEntry(ht, &entry));
+        unsigned *picked = entry;
+        TEST_ASSERT(picked >= times_picked && picked < times_picked + count);
+        /* increment entry at this position as a counter */
+        (*picked)++;
+    }
+    hashtableRelease(ht);
+
+    /* Fairness measurement
+     * --------------------
+     *
+     * Selecting a single random entry: For any entry in the hash table, let
+     * X=1 if the we selected the entry (success) and X=0 otherwise. With m
+     * entries, our entry is sepected with probability p = 1/m, the expected
+     * value is E(X) = 1/m, E(X^2) = 1/m and the variance:
+     *
+     *     Var(X) = E(X^2) - (E(X))^2 = 1/m - 1/(m^2) = (1/m) * (1 - 1/m).
+     *
+     * Repeating the selection of a random entry: Let's repeat the experiment
+     * n times and let Y be the number of times our entry was selected. This
+     * is a binomial distribution.
+     *
+     *     Y = X_1 + X_2 + ... + X_n
+     *     E(Y) = n/m
+     *
+     * The variance of a sum of independent random variables is the sum of the
+     * variances, so Y has variance np(1−p).
+     *
+     *     Var(Y) = npq = np(1 - p) = (n/m) * (1 - 1/m) = n * (m - 1) / (m * m)
+     */
+    double m = (double)count, n = (double)num_rounds;
+    double expected = n / m;                 /* E(Y) */
+    double variance = n * (m - 1) / (m * m); /* Var(Y) */
+    double std_dev = sqrt(variance);
+
+    /* With large n, the distribution approaches a normal distribution and we
+     * can use p68 = within 1 std dev, p95 = within 2 std dev, p99.7 = within 3
+     * std dev. */
+    long p68 = 0, p95 = 0, p99 = 0, p4dev = 0, p5dev = 0;
+    for (size_t j = 0; j < count; j++) {
+        double dev = expected - times_picked[j];
+        p68 += (dev >= -std_dev && dev <= std_dev);
+        p95 += (dev >= -std_dev * 2 && dev <= std_dev * 2);
+        p99 += (dev >= -std_dev * 3 && dev <= std_dev * 3);
+        p4dev += (dev >= -std_dev * 4 && dev <= std_dev * 4);
+        p5dev += (dev >= -std_dev * 5 && dev <= std_dev * 5);
+    }
+    printf("Random entry fairness test\n");
+    printf("  Pick one of %zu entries, %ld times.\n", count, num_rounds);
+    printf("  Expecting each entry to be picked %.2lf times, std dev %.3lf.\n", expected, std_dev);
+    printf("  Within 1 std dev (p68) = %.2lf%%\n", 100 * p68 / m);
+    printf("  Within 2 std dev (p95) = %.2lf%%\n", 100 * p95 / m);
+    printf("  Within 3 std dev (p99) = %.2lf%%\n", 100 * p99 / m);
+    printf("  Within 4 std dev       = %.2lf%%\n", 100 * p4dev / m);
+    printf("  Within 5 std dev       = %.2lf%%\n", 100 * p5dev / m);
+
+    /* Conclusion? The number of trials (n) relative to the probabilities (p and
+     * 1 − p) must be sufficiently large (n * p ≥ 5 and n * (1 − p) ≥ 5) to
+     * approximate a binomial distribution with a normal distribution. */
+    if (n / m >= 5 && n * (1 - 1 / m) >= 5) {
+        TEST_ASSERT_MESSAGE("Too unfair randomness", 100 * p99 / m >= 60.0);
+    } else {
+        printf("To uncertain numbers to draw any conclusions about fairness.\n");
+    }
+    return 0;
+}
+
+int test_random_entry_with_long_chain(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+
+    /* We use an estimator of true probability.
+     * We determine how many samples to take based on how precise of a
+     * measurement we want to take, and how certain we want to be that the
+     * measurement is correct.
+     * https://en.wikipedia.org/wiki/Checking_whether_a_coin_is_fair#Estimator_of_true_probability
+     */
+
+    /* In a thousand runs the worst deviation seen was 0.018 +/- 0.01.
+     * This means the true deviation was at least 0.008 or 0.8%.
+     * Accept a deviation of 5% to be on the safe side so we don't get
+     * a flaky test case. */
+    const double acceptable_probability_deviation = 0.05;
+
+    const size_t num_chained_entries = 64;
+    const size_t num_random_entries = 448;
+    const double p_fair = (double)num_chained_entries / (num_chained_entries + num_random_entries);
+
+    /* Precision of our measurement */
+    const double precision = (flags & UNIT_TEST_ACCURATE) ? 0.001 : 0.01;
+
+    /* This is confidence level for our measurement as the Z value of a normal
+     * distribution. 5 sigma corresponds to 0.00002% probability that our
+     * measurement is farther than 'precision' from the truth. This value is
+     * used in particle physics. */
+    const double z = 5;
+
+    const double n = p_fair * (1 - p_fair) * z * z / (precision * precision);
+    const size_t num_samples = (size_t)n + 1;
+
+    hashtableType type = {
+        .hashFunction = mock_hash_entry_get_hash,
+        .entryDestructor = freekeyval,
+    };
+
+    hashtable *ht = hashtableCreate(&type);
+    hashtableExpand(ht, num_random_entries + num_chained_entries);
+    uint64_t chain_hash = (uint64_t)genrand64_int64();
+    if (chain_hash == 0) chain_hash++;
+
+    /* add random entries */
+    for (size_t i = 0; i < num_random_entries; i++) {
+        uint64_t random_hash = (uint64_t)genrand64_int64();
+        if (random_hash == chain_hash) random_hash++;
+        hashtableAdd(ht, mock_hash_entry_create(random_hash, 0));
+    }
+
+    /* create long chain */
+    for (size_t i = 0; i < num_chained_entries; i++) {
+        hashtableAdd(ht, mock_hash_entry_create(i, chain_hash));
+    }
+
+    TEST_ASSERT(!hashtableIsRehashing(ht));
+
+    printf("Created a table with a long bucket chain.\n");
+    hashtableHistogram(ht);
+
+    printf("Taking %zu random samples\n", num_samples);
+    size_t count_chain_entry_picked = 0;
+    for (size_t i = 0; i < num_samples; i++) {
+        void *entry;
+        TEST_ASSERT(hashtableFairRandomEntry(ht, &entry));
+        mock_hash_entry *mock_entry = entry;
+        if (mock_entry->hash == chain_hash) {
+            count_chain_entry_picked++;
+        }
+    }
+    const double measured_probability = (double)count_chain_entry_picked / num_samples;
+    const double deviation = fabs(measured_probability - p_fair);
+    printf("Measured probability: %.1f%%\n", measured_probability * 100);
+    printf("Expected probability: %.1f%%\n", p_fair * 100);
+    printf("Measured probability deviated %1.1f%% +/- %1.1f%% from expected probability\n",
+           deviation * 100, precision * 100);
+    TEST_ASSERT(deviation <= precision + acceptable_probability_deviation);
+
+    hashtableRelease(ht);
+    return 0;
+}
+
+int test_all_memory_freed(int argc, char **argv, int flags) {
+    UNUSED(argc);
+    UNUSED(argv);
+    UNUSED(flags);
+    TEST_ASSERT(zmalloc_used_memory() == 0);
+    return 0;
+}