Hashtable implementation including unit tests

A cache-line aware hash table with a user-defined key-value entry type,
supporting incremental rehashing, scan, iterator, random sampling,
incremental lookup and more...

Signed-off-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
This commit is contained in:
Viktor Söderqvist 2024-11-18 10:29:49 +01:00
parent b4c2a1804a
commit c8ee5c2c46
6 changed files with 3194 additions and 1 deletions

View File

@ -10,6 +10,7 @@ set(VALKEY_SERVER_SRCS
${CMAKE_SOURCE_DIR}/src/ae.c
${CMAKE_SOURCE_DIR}/src/anet.c
${CMAKE_SOURCE_DIR}/src/dict.c
${CMAKE_SOURCE_DIR}/src/hashtable.c
${CMAKE_SOURCE_DIR}/src/kvstore.c
${CMAKE_SOURCE_DIR}/src/sds.c
${CMAKE_SOURCE_DIR}/src/zmalloc.c

View File

@ -411,7 +411,7 @@ endif
ENGINE_NAME=valkey
SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX)
ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX)
ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o rdma.o
ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o hashtable.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o rdma.o
ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX)
ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
ENGINE_BENCHMARK_NAME=$(ENGINE_NAME)-benchmark$(PROG_SUFFIX)

2138
src/hashtable.c Normal file

File diff suppressed because it is too large Load Diff

167
src/hashtable.h Normal file
View File

@ -0,0 +1,167 @@
#ifndef HASHTABLE_H
#define HASHTABLE_H
/* Hash table implementation.
*
* This is a cache-friendly hash table implementation. For details about the
* implementation and documentation of functions, see comments in hashtable.c.
*
* The entries in a hashtable are of a user-defined type, but an entry needs to
* contain a key. It can represent a key-value entry, or it can be just a key,
* if set semantics are desired.
*
* Terminology:
*
* hashtable
* An instance of the data structure.
*
* entry
* An entry in the hashtable. This may be of the same type as the key,
* or a struct containing a key and other fields.
* key
* The part of the entry used for looking the entry up in the hashtable.
* May be the entire entry or a struct field within the entry.
*
* type
* A struct containing callbacks, such as hash function, key comparison
* function and how to get the key in an entry.
*/
#include "fmacros.h"
#include <stddef.h>
#include <stdint.h>
#include <unistd.h>
/* --- Opaque types --- */
typedef struct hashtable hashtable;
typedef struct hashtableStats hashtableStats;
/* Can types that can be stack allocated. */
typedef uint64_t hashtableIterator[5];
typedef uint64_t hashtablePosition[2];
typedef uint64_t hashtableIncrementalFindState[5];
/* --- Non-opaque types --- */
/* The hashtableType is a set of callbacks for a hashtable. All callbacks are
* optional. With all callbacks omitted, the hashtable is effectively a set of
* pointer-sized integers. */
typedef struct {
/* If the type of an entry is not the same as the type of a key used for
* lookup, this callback needs to return the key within an entry. */
const void *(*entryGetKey)(const void *entry);
/* Hash function. Defaults to hashing the bits in the pointer, effectively
* treating the pointer as an integer. */
uint64_t (*hashFunction)(const void *key);
/* Compare function, returns 0 if the keys are equal. Defaults to just
* comparing the pointers for equality. */
int (*keyCompare)(const void *key1, const void *key2);
/* Callback to free an entry when it's overwritten or deleted.
* Optional. */
void (*entryDestructor)(void *entry);
/* Callback to control when resizing should be allowed. */
int (*resizeAllowed)(size_t moreMem, double usedRatio);
/* Invoked at the start of rehashing. */
void (*rehashingStarted)(hashtable *ht);
/* Invoked at the end of rehashing. */
void (*rehashingCompleted)(hashtable *ht);
/* Track memory usage using this callback. It is called with a positive
* number when the hashtable allocates some memory and with a negative number
* when freeing. */
void (*trackMemUsage)(hashtable *ht, ssize_t delta);
/* Allow a hashtable to carry extra caller-defined metadata. The extra memory
* is initialized to 0. */
size_t (*getMetadataSize)(void);
/* Flag to disable incremental rehashing */
unsigned instant_rehashing : 1;
} hashtableType;
typedef enum {
HASHTABLE_RESIZE_ALLOW = 0,
HASHTABLE_RESIZE_AVOID,
HASHTABLE_RESIZE_FORBID,
} hashtableResizePolicy;
typedef void (*hashtableScanFunction)(void *privdata, void *entry);
/* Constants */
#define HASHTABLE_BUCKET_SIZE 64 /* bytes, the most common cache line size */
/* Scan flags */
#define HASHTABLE_SCAN_EMIT_REF (1 << 0)
/* --- Prototypes --- */
/* Hash function (global seed) */
void hashtableSetHashFunctionSeed(const uint8_t *seed);
uint8_t *hashtableGetHashFunctionSeed(void);
uint64_t hashtableGenHashFunction(const char *buf, size_t len);
uint64_t hashtableGenCaseHashFunction(const char *buf, size_t len);
/* Global resize policy */
void hashtableSetResizePolicy(hashtableResizePolicy policy);
/* Hashtable instance */
hashtable *hashtableCreate(hashtableType *type);
void hashtableRelease(hashtable *ht);
void hashtableEmpty(hashtable *ht, void(callback)(hashtable *));
hashtableType *hashtableGetType(hashtable *ht);
void *hashtableMetadata(hashtable *ht);
size_t hashtableSize(hashtable *ht);
size_t hashtableBuckets(hashtable *ht);
size_t hashtableChainedBuckets(hashtable *ht, int table);
size_t hashtableMemUsage(hashtable *ht);
void hashtablePauseAutoShrink(hashtable *ht);
void hashtableResumeAutoShrink(hashtable *ht);
int hashtableIsRehashing(hashtable *ht);
int hashtableIsRehashingPaused(hashtable *ht);
void hashtableRehashingInfo(hashtable *ht, size_t *from_size, size_t *to_size);
int hashtableRehashMicroseconds(hashtable *ht, uint64_t us);
int hashtableExpand(hashtable *ht, size_t size);
int hashtableTryExpand(hashtable *ht, size_t size);
int hashtableExpandIfNeeded(hashtable *ht);
int hashtableShrinkIfNeeded(hashtable *ht);
hashtable *hashtableDefragTables(hashtable *ht, void *(*defragfn)(void *));
/* Entries */
int hashtableFind(hashtable *ht, const void *key, void **found);
void **hashtableFindRef(hashtable *ht, const void *key);
int hashtableAdd(hashtable *ht, void *entry);
int hashtableAddOrFind(hashtable *ht, void *entry, void **existing);
int hashtableFindPositionForInsert(hashtable *ht, void *key, hashtablePosition *position, void **existing);
void hashtableInsertAtPosition(hashtable *ht, void *entry, hashtablePosition *position);
int hashtablePop(hashtable *ht, const void *key, void **popped);
int hashtableDelete(hashtable *ht, const void *key);
void **hashtableTwoPhasePopFindRef(hashtable *ht, const void *key, hashtablePosition *position);
void hashtableTwoPhasePopDelete(hashtable *ht, hashtablePosition *position);
int hashtableReplaceReallocatedEntry(hashtable *ht, const void *old_entry, void *new_entry);
void hashtableIncrementalFindInit(hashtableIncrementalFindState *state, hashtable *ht, const void *key);
int hashtableIncrementalFindStep(hashtableIncrementalFindState *state);
int hashtableIncrementalFindGetResult(hashtableIncrementalFindState *state, void **found);
/* Iteration & scan */
size_t hashtableScan(hashtable *ht, size_t cursor, hashtableScanFunction fn, void *privdata);
size_t hashtableScanDefrag(hashtable *ht, size_t cursor, hashtableScanFunction fn, void *privdata, void *(*defragfn)(void *), int flags);
void hashtableInitIterator(hashtableIterator *iter, hashtable *ht);
void hashtableInitSafeIterator(hashtableIterator *iter, hashtable *ht);
void hashtableResetIterator(hashtableIterator *iter);
hashtableIterator *hashtableCreateIterator(hashtable *ht);
hashtableIterator *hashtableCreateSafeIterator(hashtable *ht);
void hashtableReleaseIterator(hashtableIterator *iter);
int hashtableNext(hashtableIterator *iter, void **elemptr);
/* Random entries */
int hashtableRandomEntry(hashtable *ht, void **found);
int hashtableFairRandomEntry(hashtable *ht, void **found);
unsigned hashtableSampleEntries(hashtable *ht, void **dst, unsigned count);
/* Debug & stats */
void hashtableFreeStats(hashtableStats *stats);
void hashtableCombineStats(hashtableStats *from, hashtableStats *into);
hashtableStats *hashtableGetStatsHt(hashtable *ht, int htidx, int full);
size_t hashtableGetStatsMsg(char *buf, size_t bufsize, hashtableStats *stats, int full);
void hashtableGetStats(char *buf, size_t bufsize, hashtable *ht, int full);
#endif /* HASHTABLE_H */

View File

@ -19,6 +19,22 @@ int test_dictDisableResizeReduceTo3(int argc, char **argv, int flags);
int test_dictDeleteOneKeyTriggerResizeAgain(int argc, char **argv, int flags);
int test_dictBenchmark(int argc, char **argv, int flags);
int test_endianconv(int argc, char *argv[], int flags);
int test_cursor(int argc, char **argv, int flags);
int test_set_hash_function_seed(int argc, char **argv, int flags);
int test_add_find_delete(int argc, char **argv, int flags);
int test_add_find_delete_avoid_resize(int argc, char **argv, int flags);
int test_instant_rehashing(int argc, char **argv, int flags);
int test_bucket_chain_length(int argc, char **argv, int flags);
int test_two_phase_insert_and_pop(int argc, char **argv, int flags);
int test_replace_reallocated_entry(int argc, char **argv, int flags);
int test_incremental_find(int argc, char **argv, int flags);
int test_scan(int argc, char **argv, int flags);
int test_iterator(int argc, char **argv, int flags);
int test_safe_iterator(int argc, char **argv, int flags);
int test_compact_bucket_chain(int argc, char **argv, int flags);
int test_random_entry(int argc, char **argv, int flags);
int test_random_entry_with_long_chain(int argc, char **argv, int flags);
int test_all_memory_freed(int argc, char **argv, int flags);
int test_intsetValueEncodings(int argc, char **argv, int flags);
int test_intsetBasicAdding(int argc, char **argv, int flags);
int test_intsetLargeNumberRandomAdd(int argc, char **argv, int flags);
@ -215,6 +231,7 @@ unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}};
unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}};
unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}};
unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, NULL}};
unitTest __test_hashtable_c[] = {{"test_cursor", test_cursor}, {"test_set_hash_function_seed", test_set_hash_function_seed}, {"test_add_find_delete", test_add_find_delete}, {"test_add_find_delete_avoid_resize", test_add_find_delete_avoid_resize}, {"test_instant_rehashing", test_instant_rehashing}, {"test_bucket_chain_length", test_bucket_chain_length}, {"test_two_phase_insert_and_pop", test_two_phase_insert_and_pop}, {"test_replace_reallocated_entry", test_replace_reallocated_entry}, {"test_incremental_find", test_incremental_find}, {"test_scan", test_scan}, {"test_iterator", test_iterator}, {"test_safe_iterator", test_safe_iterator}, {"test_compact_bucket_chain", test_compact_bucket_chain}, {"test_random_entry", test_random_entry}, {"test_random_entry_with_long_chain", test_random_entry_with_long_chain}, {"test_all_memory_freed", test_all_memory_freed}, {NULL, NULL}};
unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}};
unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict}, {NULL, NULL}};
unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}};
@ -237,6 +254,7 @@ struct unitTestSuite {
{"test_crc64combine.c", __test_crc64combine_c},
{"test_dict.c", __test_dict_c},
{"test_endianconv.c", __test_endianconv_c},
{"test_hashtable.c", __test_hashtable_c},
{"test_intset.c", __test_intset_c},
{"test_kvstore.c", __test_kvstore_c},
{"test_listpack.c", __test_listpack_c},

869
src/unit/test_hashtable.c Normal file
View File

@ -0,0 +1,869 @@
#include "../hashtable.h"
#include "test_help.h"
#include "../mt19937-64.h"
#include "../zmalloc.h"
#include "../monotonic.h"
#include <stdio.h>
#include <limits.h>
#include <string.h>
#include <math.h>
/* Global variable to test the memory tracking callback. */
static size_t mem_usage;
/* From util.c: getRandomBytes to seed hash function. */
void getRandomBytes(unsigned char *p, size_t len);
/* Init hash function salt and seed random generator. */
static void randomSeed(void) {
unsigned long long seed;
getRandomBytes((void *)&seed, sizeof(seed));
init_genrand64(seed);
srandom((unsigned)seed);
}
/* An entry holding a string key and a string value in one allocation. */
typedef struct {
unsigned int keysize; /* Sizes, including null-terminator */
unsigned int valsize;
char data[]; /* key and value */
} keyval;
static keyval *create_keyval(const char *key, const char *val) {
size_t keysize = strlen(key) + 1;
size_t valsize = strlen(val) + 1;
keyval *e = malloc(sizeof(keyval) + keysize + valsize);
e->keysize = keysize;
e->valsize = valsize;
memcpy(e->data, key, keysize);
memcpy(e->data + keysize, val, valsize);
return e;
}
static const void *getkey(const void *entry) {
const keyval *e = entry;
return e->data;
}
static const void *getval(const void *entry) {
const keyval *e = entry;
return e->data + e->keysize;
}
static uint64_t hashfunc(const void *key) {
return hashtableGenHashFunction(key, strlen(key));
}
static int keycmp(const void *key1, const void *key2) {
return strcmp(key1, key2);
}
static void freekeyval(void *keyval) {
free(keyval);
}
static void trackmemusage(hashtable *ht, ssize_t delta) {
UNUSED(ht);
mem_usage += delta;
}
/* Hashtable type used for some of the tests. */
static hashtableType keyval_type = {
.entryGetKey = getkey,
.hashFunction = hashfunc,
.keyCompare = keycmp,
.entryDestructor = freekeyval,
.trackMemUsage = trackmemusage,
};
/* Callback for testing hashtableEmpty(). */
static long empty_callback_call_counter;
void emptyCallback(hashtable *ht) {
UNUSED(ht);
empty_callback_call_counter++;
}
/* Prototypes for debugging */
void hashtableDump(hashtable *ht);
void hashtableHistogram(hashtable *ht);
int hashtableLongestBucketChain(hashtable *ht);
size_t nextCursor(size_t v, size_t mask);
int test_cursor(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
TEST_ASSERT(nextCursor(0x0000, 0xffff) == 0x8000);
TEST_ASSERT(nextCursor(0x8000, 0xffff) == 0x4000);
TEST_ASSERT(nextCursor(0x4001, 0xffff) == 0xc001);
TEST_ASSERT(nextCursor(0xffff, 0xffff) == 0x0000);
return 0;
}
int test_set_hash_function_seed(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
randomSeed();
return 0;
}
static int add_find_delete_test_helper(int flags) {
int count = (flags & UNIT_TEST_ACCURATE) ? 1000000 : 200;
TEST_ASSERT(mem_usage == 0);
hashtable *ht = hashtableCreate(&keyval_type);
int j;
/* Add */
for (j = 0; j < count; j++) {
char key[32], val[32];
snprintf(key, sizeof(key), "%d", j);
snprintf(val, sizeof(val), "%d", count - j + 42);
keyval *e = create_keyval(key, val);
TEST_ASSERT(hashtableAdd(ht, e));
}
TEST_ASSERT(hashtableMemUsage(ht) == mem_usage);
if (count < 1000) {
hashtableHistogram(ht);
printf("Mem usage: %zu\n", hashtableMemUsage(ht));
}
/* Find */
for (j = 0; j < count; j++) {
char key[32], val[32];
snprintf(key, sizeof(key), "%d", j);
snprintf(val, sizeof(val), "%d", count - j + 42);
void *found;
TEST_ASSERT(hashtableFind(ht, key, &found));
keyval *e = found;
TEST_ASSERT(!strcmp(val, getval(e)));
}
/* Delete half of them */
for (j = 0; j < count / 2; j++) {
char key[32];
snprintf(key, sizeof(key), "%d", j);
if (j % 3 == 0) {
/* Test hashtablePop */
char val[32];
snprintf(val, sizeof(val), "%d", count - j + 42);
void *popped;
TEST_ASSERT(hashtablePop(ht, key, &popped));
keyval *e = popped;
TEST_ASSERT(!strcmp(val, getval(e)));
free(e);
} else {
TEST_ASSERT(hashtableDelete(ht, key));
}
}
TEST_ASSERT(hashtableMemUsage(ht) == mem_usage);
/* Empty, i.e. delete remaining entries, with progress callback. */
empty_callback_call_counter = 0;
hashtableEmpty(ht, emptyCallback);
TEST_ASSERT(empty_callback_call_counter > 0);
/* Release memory */
hashtableRelease(ht);
TEST_ASSERT(mem_usage == 0);
return 0;
}
int test_add_find_delete(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
TEST_ASSERT(add_find_delete_test_helper(flags) == 0);
TEST_ASSERT(zmalloc_used_memory() == 0);
return 0;
}
int test_add_find_delete_avoid_resize(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
hashtableSetResizePolicy(HASHTABLE_RESIZE_AVOID);
TEST_ASSERT(add_find_delete_test_helper(flags) == 0);
hashtableSetResizePolicy(HASHTABLE_RESIZE_ALLOW);
TEST_ASSERT(zmalloc_used_memory() == 0);
return 0;
}
int test_instant_rehashing(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
long count = 200;
/* A set of longs, i.e. pointer-sized values. */
hashtableType type = {.instant_rehashing = 1};
hashtable *ht = hashtableCreate(&type);
long j;
/* Populate and check that rehashing is never ongoing. */
for (j = 0; j < count; j++) {
TEST_ASSERT(hashtableAdd(ht, (void *)j));
TEST_ASSERT(!hashtableIsRehashing(ht));
}
/* Delete and check that rehashing is never ongoing. */
for (j = 0; j < count; j++) {
TEST_ASSERT(hashtableDelete(ht, (void *)j));
TEST_ASSERT(!hashtableIsRehashing(ht));
}
hashtableRelease(ht);
return 0;
}
int test_bucket_chain_length(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
unsigned long count = 1000000;
/* A set of longs, i.e. pointer-sized integer values. */
hashtableType type = {0};
hashtable *ht = hashtableCreate(&type);
unsigned long j;
for (j = 0; j < count; j++) {
TEST_ASSERT(hashtableAdd(ht, (void *)j));
}
/* If it's rehashing, add a few more until rehashing is complete. */
while (hashtableIsRehashing(ht)) {
j++;
TEST_ASSERT(hashtableAdd(ht, (void *)j));
}
TEST_ASSERT(j < count * 2);
int max_chainlen_not_rehashing = hashtableLongestBucketChain(ht);
TEST_ASSERT(max_chainlen_not_rehashing < 10);
/* Add more until rehashing starts again. */
while (!hashtableIsRehashing(ht)) {
j++;
TEST_ASSERT(hashtableAdd(ht, (void *)j));
}
TEST_ASSERT(j < count * 2);
int max_chainlen_rehashing = hashtableLongestBucketChain(ht);
TEST_ASSERT(max_chainlen_rehashing < 10);
hashtableRelease(ht);
return 0;
}
int test_two_phase_insert_and_pop(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
int count = (flags & UNIT_TEST_ACCURATE) ? 1000000 : 200;
hashtable *ht = hashtableCreate(&keyval_type);
int j;
/* hashtableFindPositionForInsert + hashtableInsertAtPosition */
for (j = 0; j < count; j++) {
char key[32], val[32];
snprintf(key, sizeof(key), "%d", j);
snprintf(val, sizeof(val), "%d", count - j + 42);
hashtablePosition position;
int ret = hashtableFindPositionForInsert(ht, key, &position, NULL);
TEST_ASSERT(ret == 1);
keyval *e = create_keyval(key, val);
hashtableInsertAtPosition(ht, e, &position);
}
if (count < 1000) {
hashtableHistogram(ht);
}
/* Check that all entries were inserted. */
for (j = 0; j < count; j++) {
char key[32], val[32];
snprintf(key, sizeof(key), "%d", j);
snprintf(val, sizeof(val), "%d", count - j + 42);
void *found;
TEST_ASSERT(hashtableFind(ht, key, &found));
keyval *e = found;
TEST_ASSERT(!strcmp(val, getval(e)));
}
/* Test two-phase pop. */
for (j = 0; j < count; j++) {
char key[32], val[32];
snprintf(key, sizeof(key), "%d", j);
snprintf(val, sizeof(val), "%d", count - j + 42);
hashtablePosition position;
size_t size_before_find = hashtableSize(ht);
void **ref = hashtableTwoPhasePopFindRef(ht, key, &position);
TEST_ASSERT(ref != NULL);
keyval *e = *ref;
TEST_ASSERT(!strcmp(val, getval(e)));
TEST_ASSERT(hashtableSize(ht) == size_before_find);
hashtableTwoPhasePopDelete(ht, &position);
TEST_ASSERT(hashtableSize(ht) == size_before_find - 1);
}
TEST_ASSERT(hashtableSize(ht) == 0);
hashtableRelease(ht);
return 0;
}
int test_replace_reallocated_entry(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
int count = 100, j;
hashtable *ht = hashtableCreate(&keyval_type);
/* Add */
for (j = 0; j < count; j++) {
char key[32], val[32];
snprintf(key, sizeof(key), "%d", j);
snprintf(val, sizeof(val), "%d", count - j + 42);
keyval *e = create_keyval(key, val);
TEST_ASSERT(hashtableAdd(ht, e));
}
/* Find and replace */
for (j = 0; j < count; j++) {
char key[32], val[32];
snprintf(key, sizeof(key), "%d", j);
snprintf(val, sizeof(val), "%d", count - j + 42);
void *found;
TEST_ASSERT(hashtableFind(ht, key, &found));
keyval *old = found;
TEST_ASSERT(strcmp(getkey(old), key) == 0);
TEST_ASSERT(strcmp(getval(old), val) == 0);
snprintf(val, sizeof(val), "%d", j + 1234);
keyval *new = create_keyval(key, val);
/* If we free 'old' before the call to hashtableReplaceReallocatedEntry,
* we get a use-after-free warning, so instead we just overwrite it with
* junk. The purpose is to verify that the function doesn't use the
* memory it points to. */
memset(old->data, 'x', old->keysize + old->valsize);
TEST_ASSERT(hashtableReplaceReallocatedEntry(ht, old, new));
free(old);
}
/* Check */
for (j = 0; j < count; j++) {
char key[32], val[32];
snprintf(key, sizeof(key), "%d", j);
snprintf(val, sizeof(val), "%d", j + 1234);
void *found;
TEST_ASSERT(hashtableFind(ht, key, &found));
keyval *e = found;
TEST_ASSERT(!strcmp(val, getval(e)));
}
hashtableRelease(ht);
TEST_ASSERT(zmalloc_used_memory() == 0);
return 0;
}
int test_incremental_find(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
size_t count = 2000000;
uint8_t element_array[count];
memset(element_array, 0, sizeof element_array);
/* A set of uint8_t pointers */
hashtableType type = {0};
hashtable *ht = hashtableCreate(&type);
/* Populate */
for (size_t j = 0; j < count; j++) {
TEST_ASSERT(hashtableAdd(ht, element_array + j));
}
monotime timer;
monotonicInit();
/* Compare to looking up one by one. */
elapsedStart(&timer);
for (size_t i = 0; i < count; i++) {
uint8_t *key = &element_array[i];
void *found;
TEST_ASSERT(hashtableFind(ht, key, &found) == 1);
TEST_ASSERT(found == key);
}
uint64_t us2 = elapsedUs(timer);
TEST_PRINT_INFO("Lookup %zu elements one by one took %lu microseconds.",
count, (unsigned long)us2);
/* Lookup elements in batches. */
for (size_t batch_size = 1; batch_size <= 64; batch_size *= 2) {
elapsedStart(&timer);
for (size_t batch = 0; batch < count / batch_size; batch++) {
/* Init batches. */
hashtableIncrementalFindState states[batch_size];
for (size_t i = 0; i < batch_size; i++) {
void *key = &element_array[batch * batch_size + i];
hashtableIncrementalFindInit(&states[i], ht, key);
}
/* Work on batches in round-robin order until all are done. */
size_t num_left;
do {
num_left = batch_size;
for (size_t i = 0; i < batch_size; i++) {
if (hashtableIncrementalFindStep(&states[i]) == 0) {
num_left--;
}
}
} while (num_left > 0);
/* Fetch results. */
for (size_t i = 0; i < batch_size; i++) {
void *found;
TEST_ASSERT(hashtableIncrementalFindGetResult(&states[i], &found) == 1);
TEST_ASSERT(found == &element_array[batch * batch_size + i]);
}
}
uint64_t us1 = elapsedUs(timer);
TEST_PRINT_INFO("Lookup %zu elements in batches of %zu took %lu microseconds.",
count, batch_size, (unsigned long)us1);
}
hashtableRelease(ht);
return 0;
}
typedef struct {
long count;
uint8_t entry_seen[];
} scandata;
void scanfn(void *privdata, void *entry) {
scandata *data = (scandata *)privdata;
unsigned long j = (unsigned long)entry;
data->entry_seen[j]++;
data->count++;
}
int test_scan(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
long num_entries = (flags & UNIT_TEST_LARGE_MEMORY) ? 1000000 : 200000;
int num_rounds = (flags & UNIT_TEST_ACCURATE) ? 20 : 5;
/* A set of longs, i.e. pointer-sized values. */
hashtableType type = {0};
long j;
for (int round = 0; round < num_rounds; round++) {
/* First round count = num_entries, then some more. */
long count = num_entries * (1 + 2 * (double)round / num_rounds);
/* Seed, to make sure each round is different. */
randomSeed();
/* Populate */
hashtable *ht = hashtableCreate(&type);
for (j = 0; j < count; j++) {
TEST_ASSERT(hashtableAdd(ht, (void *)j));
}
/* Scan */
scandata *data = calloc(1, sizeof(scandata) + count);
long max_entries_per_cycle = 0;
unsigned num_cycles = 0;
long scanned_count = 0;
size_t cursor = 0;
do {
data->count = 0;
cursor = hashtableScan(ht, cursor, scanfn, data);
if (data->count > max_entries_per_cycle) {
max_entries_per_cycle = data->count;
}
scanned_count += data->count;
data->count = 0;
num_cycles++;
} while (cursor != 0);
/* Verify that every entry was returned exactly once. */
TEST_ASSERT(scanned_count == count);
for (j = 0; j < count; j++) {
TEST_ASSERT(data->entry_seen[j] >= 1);
TEST_ASSERT(data->entry_seen[j] <= 2);
}
/* Print some information for curious readers. */
TEST_PRINT_INFO("Scanned %ld; max emitted per call: %ld; avg emitted per call: %.2lf",
count, max_entries_per_cycle, (double)count / num_cycles);
/* Cleanup */
hashtableRelease(ht);
free(data);
}
return 0;
}
typedef struct {
uint64_t value;
uint64_t hash;
} mock_hash_entry;
static mock_hash_entry *mock_hash_entry_create(uint64_t value, uint64_t hash) {
mock_hash_entry *entry = malloc(sizeof(mock_hash_entry));
entry->value = value;
entry->hash = hash;
return entry;
}
static uint64_t mock_hash_entry_get_hash(const void *entry) {
if (entry == NULL) return 0UL;
mock_hash_entry *mock = (mock_hash_entry *)entry;
return (mock->hash != 0) ? mock->hash : mock->value;
}
int test_iterator(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
size_t count = 2000000;
uint8_t entry_array[count];
memset(entry_array, 0, sizeof entry_array);
/* A set of uint8_t pointers */
hashtableType type = {0};
hashtable *ht = hashtableCreate(&type);
/* Populate */
for (size_t j = 0; j < count; j++) {
TEST_ASSERT(hashtableAdd(ht, entry_array + j));
}
/* Iterate */
size_t num_returned = 0;
hashtableIterator iter;
void *next;
hashtableInitIterator(&iter, ht);
while (hashtableNext(&iter, &next)) {
uint8_t *entry = next;
num_returned++;
TEST_ASSERT(entry >= entry_array && entry < entry_array + count);
/* increment entry at this position as a counter */
(*entry)++;
}
hashtableResetIterator(&iter);
/* Check that all entries were returned exactly once. */
TEST_ASSERT(num_returned == count);
for (size_t j = 0; j < count; j++) {
if (entry_array[j] != 1) {
printf("Entry %zu returned %d times\n", j, entry_array[j]);
return 0;
}
}
hashtableRelease(ht);
return 0;
}
int test_safe_iterator(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
size_t count = 1000;
uint8_t entry_counts[count * 2];
memset(entry_counts, 0, sizeof entry_counts);
/* A set of pointers into the uint8_t array. */
hashtableType type = {0};
hashtable *ht = hashtableCreate(&type);
/* Populate */
for (size_t j = 0; j < count; j++) {
TEST_ASSERT(hashtableAdd(ht, entry_counts + j));
}
/* Iterate */
size_t num_returned = 0;
hashtableIterator iter;
void *next;
hashtableInitSafeIterator(&iter, ht);
while (hashtableNext(&iter, &next)) {
uint8_t *entry = next;
size_t index = entry - entry_counts;
num_returned++;
TEST_ASSERT(entry >= entry_counts && entry < entry_counts + count * 2);
/* increment entry at this position as a counter */
(*entry)++;
if (index % 4 == 0) {
TEST_ASSERT(hashtableDelete(ht, entry));
}
/* Add new item each time we see one of the original items */
if (index < count) {
TEST_ASSERT(hashtableAdd(ht, entry + count));
}
}
hashtableResetIterator(&iter);
/* Check that all entries present during the whole iteration were returned
* exactly once. (Some are deleted after being returned.) */
TEST_ASSERT(num_returned >= count);
for (size_t j = 0; j < count; j++) {
if (entry_counts[j] != 1) {
printf("Entry %zu returned %d times\n", j, entry_counts[j]);
return 0;
}
}
/* Check that entries inserted during the iteration were returned at most
* once. */
unsigned long num_optional_returned = 0;
for (size_t j = count; j < count * 2; j++) {
TEST_ASSERT(entry_counts[j] <= 1);
num_optional_returned += entry_counts[j];
}
printf("Safe iterator returned %lu of the %zu entries inserted while iterating.\n", num_optional_returned, count);
hashtableRelease(ht);
return 0;
}
int test_compact_bucket_chain(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
/* Create a table with only one bucket chain. */
hashtableSetResizePolicy(HASHTABLE_RESIZE_AVOID);
unsigned long count = 30;
hashtableType type = {0};
hashtable *ht = hashtableCreate(&type);
/* Populate */
unsigned long j;
for (j = 0; j < count; j++) {
TEST_ASSERT(hashtableAdd(ht, (void *)j));
}
TEST_ASSERT(hashtableBuckets(ht) == 1);
printf("Populated a single bucket chain, avoiding resize.\n");
hashtableHistogram(ht);
/* Delete half of the entries while iterating. */
size_t num_chained_buckets = hashtableChainedBuckets(ht, 0);
size_t num_returned = 0;
hashtableIterator iter;
hashtableInitSafeIterator(&iter, ht);
void *entry;
while (hashtableNext(&iter, &entry)) {
/* As long as the iterator is still returning entries from the same
* bucket chain, the bucket chain is not compacted, so it still has the
* same number of buckets. */
TEST_ASSERT(hashtableChainedBuckets(ht, 0) == num_chained_buckets);
num_returned++;
if (num_returned % 2 == 0) {
TEST_ASSERT(hashtableDelete(ht, entry));
}
if (num_returned == count) {
printf("Last iteration. Half of them have been deleted.\n");
hashtableHistogram(ht);
}
}
hashtableResetIterator(&iter);
/* Verify that the bucket chain has been compacted by filling the holes and
* freeing empty child buckets. */
printf("When the iterator leaves the bucket chain, compaction should happen.\n");
hashtableHistogram(ht);
TEST_ASSERT(hashtableChainedBuckets(ht, 0) < num_chained_buckets);
hashtableRelease(ht);
hashtableSetResizePolicy(HASHTABLE_RESIZE_ALLOW);
TEST_ASSERT(zmalloc_used_memory() == 0);
return 0;
}
int test_random_entry(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
randomSeed();
size_t count = (flags & UNIT_TEST_LARGE_MEMORY) ? 7000 : 400;
long num_rounds = (flags & UNIT_TEST_ACCURATE) ? 1000000 : 10000;
/* A set of ints */
hashtableType type = {0};
hashtable *ht = hashtableCreate(&type);
/* Populate */
unsigned times_picked[count];
memset(times_picked, 0, sizeof(times_picked));
for (size_t j = 0; j < count; j++) {
TEST_ASSERT(hashtableAdd(ht, times_picked + j));
}
/* Pick entries, and count how many times each entry is picked. */
for (long i = 0; i < num_rounds; i++) {
/* Using void* variable to avoid a cast that violates strict aliasing */
void *entry;
TEST_ASSERT(hashtableFairRandomEntry(ht, &entry));
unsigned *picked = entry;
TEST_ASSERT(picked >= times_picked && picked < times_picked + count);
/* increment entry at this position as a counter */
(*picked)++;
}
hashtableRelease(ht);
/* Fairness measurement
* --------------------
*
* Selecting a single random entry: For any entry in the hash table, let
* X=1 if the we selected the entry (success) and X=0 otherwise. With m
* entries, our entry is sepected with probability p = 1/m, the expected
* value is E(X) = 1/m, E(X^2) = 1/m and the variance:
*
* Var(X) = E(X^2) - (E(X))^2 = 1/m - 1/(m^2) = (1/m) * (1 - 1/m).
*
* Repeating the selection of a random entry: Let's repeat the experiment
* n times and let Y be the number of times our entry was selected. This
* is a binomial distribution.
*
* Y = X_1 + X_2 + ... + X_n
* E(Y) = n/m
*
* The variance of a sum of independent random variables is the sum of the
* variances, so Y has variance np(1p).
*
* Var(Y) = npq = np(1 - p) = (n/m) * (1 - 1/m) = n * (m - 1) / (m * m)
*/
double m = (double)count, n = (double)num_rounds;
double expected = n / m; /* E(Y) */
double variance = n * (m - 1) / (m * m); /* Var(Y) */
double std_dev = sqrt(variance);
/* With large n, the distribution approaches a normal distribution and we
* can use p68 = within 1 std dev, p95 = within 2 std dev, p99.7 = within 3
* std dev. */
long p68 = 0, p95 = 0, p99 = 0, p4dev = 0, p5dev = 0;
for (size_t j = 0; j < count; j++) {
double dev = expected - times_picked[j];
p68 += (dev >= -std_dev && dev <= std_dev);
p95 += (dev >= -std_dev * 2 && dev <= std_dev * 2);
p99 += (dev >= -std_dev * 3 && dev <= std_dev * 3);
p4dev += (dev >= -std_dev * 4 && dev <= std_dev * 4);
p5dev += (dev >= -std_dev * 5 && dev <= std_dev * 5);
}
printf("Random entry fairness test\n");
printf(" Pick one of %zu entries, %ld times.\n", count, num_rounds);
printf(" Expecting each entry to be picked %.2lf times, std dev %.3lf.\n", expected, std_dev);
printf(" Within 1 std dev (p68) = %.2lf%%\n", 100 * p68 / m);
printf(" Within 2 std dev (p95) = %.2lf%%\n", 100 * p95 / m);
printf(" Within 3 std dev (p99) = %.2lf%%\n", 100 * p99 / m);
printf(" Within 4 std dev = %.2lf%%\n", 100 * p4dev / m);
printf(" Within 5 std dev = %.2lf%%\n", 100 * p5dev / m);
/* Conclusion? The number of trials (n) relative to the probabilities (p and
* 1 p) must be sufficiently large (n * p 5 and n * (1 p) 5) to
* approximate a binomial distribution with a normal distribution. */
if (n / m >= 5 && n * (1 - 1 / m) >= 5) {
TEST_ASSERT_MESSAGE("Too unfair randomness", 100 * p99 / m >= 60.0);
} else {
printf("To uncertain numbers to draw any conclusions about fairness.\n");
}
return 0;
}
int test_random_entry_with_long_chain(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
/* We use an estimator of true probability.
* We determine how many samples to take based on how precise of a
* measurement we want to take, and how certain we want to be that the
* measurement is correct.
* https://en.wikipedia.org/wiki/Checking_whether_a_coin_is_fair#Estimator_of_true_probability
*/
/* In a thousand runs the worst deviation seen was 0.018 +/- 0.01.
* This means the true deviation was at least 0.008 or 0.8%.
* Accept a deviation of 5% to be on the safe side so we don't get
* a flaky test case. */
const double acceptable_probability_deviation = 0.05;
const size_t num_chained_entries = 64;
const size_t num_random_entries = 448;
const double p_fair = (double)num_chained_entries / (num_chained_entries + num_random_entries);
/* Precision of our measurement */
const double precision = (flags & UNIT_TEST_ACCURATE) ? 0.001 : 0.01;
/* This is confidence level for our measurement as the Z value of a normal
* distribution. 5 sigma corresponds to 0.00002% probability that our
* measurement is farther than 'precision' from the truth. This value is
* used in particle physics. */
const double z = 5;
const double n = p_fair * (1 - p_fair) * z * z / (precision * precision);
const size_t num_samples = (size_t)n + 1;
hashtableType type = {
.hashFunction = mock_hash_entry_get_hash,
.entryDestructor = freekeyval,
};
hashtable *ht = hashtableCreate(&type);
hashtableExpand(ht, num_random_entries + num_chained_entries);
uint64_t chain_hash = (uint64_t)genrand64_int64();
if (chain_hash == 0) chain_hash++;
/* add random entries */
for (size_t i = 0; i < num_random_entries; i++) {
uint64_t random_hash = (uint64_t)genrand64_int64();
if (random_hash == chain_hash) random_hash++;
hashtableAdd(ht, mock_hash_entry_create(random_hash, 0));
}
/* create long chain */
for (size_t i = 0; i < num_chained_entries; i++) {
hashtableAdd(ht, mock_hash_entry_create(i, chain_hash));
}
TEST_ASSERT(!hashtableIsRehashing(ht));
printf("Created a table with a long bucket chain.\n");
hashtableHistogram(ht);
printf("Taking %zu random samples\n", num_samples);
size_t count_chain_entry_picked = 0;
for (size_t i = 0; i < num_samples; i++) {
void *entry;
TEST_ASSERT(hashtableFairRandomEntry(ht, &entry));
mock_hash_entry *mock_entry = entry;
if (mock_entry->hash == chain_hash) {
count_chain_entry_picked++;
}
}
const double measured_probability = (double)count_chain_entry_picked / num_samples;
const double deviation = fabs(measured_probability - p_fair);
printf("Measured probability: %.1f%%\n", measured_probability * 100);
printf("Expected probability: %.1f%%\n", p_fair * 100);
printf("Measured probability deviated %1.1f%% +/- %1.1f%% from expected probability\n",
deviation * 100, precision * 100);
TEST_ASSERT(deviation <= precision + acceptable_probability_deviation);
hashtableRelease(ht);
return 0;
}
int test_all_memory_freed(int argc, char **argv, int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
TEST_ASSERT(zmalloc_used_memory() == 0);
return 0;
}