
This PR builds upon the [previous entry prefetching optimization](https://github.com/valkey-io/valkey/pull/1501) to further enhance performance by implementing value prefetching for hashtable iterators. ## Implementation Modified `hashtableInitIterator` to accept a new flags parameter, allowing control over iterator behavior. Implemented conditional value prefetching within `hashtableNext` based on the new `HASHTABLE_ITER_PREFETCH_VALUES` flag. When the flag is set, hashtableNext now calls `prefetchBucketValues` at the start of each new bucket, preemptively loading the values of filled entries into the CPU cache. The actual prefetching of values is performed using type-specific callback functions implemented in `server.c`: - For `robj` the `hashtableObjectPrefetchValue` callback is used to prefetch the value if not embeded. This implementation is specifically focused on main database iterations at this stage. Applying it to hashtables that hold other object types should not be problematic, but its performance benefits for those cases will need to be proven through testing and benchmarking. ## Performance ### Setup: - 64cores Graviton 3 Amazon EC2 instance. - 50 mil keys with different value sizes. - Running valkey server over RAM file system. - crc checksum and comperssion off. ### Action - save command. ### Results The results regarding the duration of “save” command was taken from “info all” command. ``` +--------------------+------------------+------------------+ | Prefetching | Value size (byte)| Time (seconds) | +--------------------+------------------+------------------+ | No | 100 | 20.112279 | | Yes | 100 | 12.758519 | | No | 40 | 16.945366 | | Yes | 40 | 10.902022 | | No | 20 | 9.817000 | | Yes | 20 | 9.626821 | | No | 10 | 9.71510 | | Yes | 10 | 9.510565 | +--------------------+------------------+------------------+ ``` The results largely align with our expectations, showing significant improvements for larger values (100 bytes and 40 bytes) that are stored outside the robj. For smaller values (20 bytes and 10 bytes) that are embedded within the robj, we see almost no improvement, which is as expected. However, the small improvement observed even for these embedded values is somewhat surprising. Given that we are not actively prefetching these embedded values, this minor performance gain was not anticipated. perf record on save command **without** value prefetching: ``` --99.98%--rdbSaveDb | |--91.38%--rdbSaveKeyValuePair | | | |--42.72%--rdbSaveRawString | | | | | |--26.69%--rdbWriteRaw | | | | | | | --25.75%--rioFileWrite.lto_priv.0 | | | | | --15.41%--rdbSaveLen | | | | | |--7.58%--rdbWriteRaw | | | | | | | --7.08%--rioFileWrite.lto_priv.0 | | | | | | | --6.54%--_IO_fwrite | | | | | | | | --7.42%--rdbWriteRaw.constprop.1 | | | | | --7.18%--rioFileWrite.lto_priv.0 | | | | | --6.73%--_IO_fwrite | | | | | |--40.44%--rdbSaveStringObject | | | --7.62%--rdbSaveObjectType | | | --7.39%--rdbWriteRaw.constprop.1 | | | --7.04%--rioFileWrite.lto_priv.0 | | | --6.59%--_IO_fwrite | | --7.33%--hashtableNext.constprop.1 | --6.28%--prefetchNextBucketEntries.lto_priv.0 ``` perf record on save command **with** value prefetching: ``` rdbSaveRio | --99.93%--rdbSaveDb | |--79.81%--rdbSaveKeyValuePair | | | |--66.79%--rdbSaveRawString | | | | | |--42.31%--rdbWriteRaw | | | | | | | --40.74%--rioFileWrite.lto_priv.0 | | | | | --23.37%--rdbSaveLen | | | | | |--11.78%--rdbWriteRaw | | | | | | | --11.03%--rioFileWrite.lto_priv.0 | | | | | | | --10.30%--_IO_fwrite | | | | | | | | | --10.98%--rdbWriteRaw.constprop.1 | | | | | --10.44%--rioFileWrite.lto_priv.0 | | | | | --9.74%--_IO_fwrite | | | | | | |--11.33%--rdbSaveObjectType | | | | | --10.96%--rdbWriteRaw.constprop.1 | | | | | --10.51%--rioFileWrite.lto_priv.0 | | | | | --9.75%--_IO_fwrite | | | | | | --0.77%--rdbSaveStringObject | --18.39%--hashtableNext | |--10.04%--hashtableObjectPrefetchValue | --6.06%--prefetchNextBucketEntries ``` Conclusions: The prefetching strategy appears to be working as intended, shifting the performance bottleneck from data access to I/O operations. The significant reduction in rdbSaveStringObject time suggests that string objects(which are the values) are being accessed more efficiently. Signed-off-by: NadavGigi <nadavgigi102@gmail.com>
90 lines
4.6 KiB
C
90 lines
4.6 KiB
C
#ifndef KVSTORE_H
|
|
#define KVSTORE_H
|
|
|
|
#include "hashtable.h"
|
|
#include "adlist.h"
|
|
|
|
typedef struct _kvstore kvstore;
|
|
typedef struct _kvstoreIterator kvstoreIterator;
|
|
typedef struct _kvstoreHashtableIterator kvstoreHashtableIterator;
|
|
|
|
typedef int(kvstoreScanShouldSkipHashtable)(hashtable *d);
|
|
typedef int(kvstoreExpandShouldSkipHashtableIndex)(int didx);
|
|
|
|
#define KVSTORE_ALLOCATE_HASHTABLES_ON_DEMAND (1 << 0)
|
|
#define KVSTORE_FREE_EMPTY_HASHTABLES (1 << 1)
|
|
kvstore *kvstoreCreate(hashtableType *type, int num_hashtables_bits, int flags);
|
|
void kvstoreEmpty(kvstore *kvs, void(callback)(hashtable *));
|
|
void kvstoreRelease(kvstore *kvs);
|
|
unsigned long long kvstoreSize(kvstore *kvs);
|
|
unsigned long kvstoreBuckets(kvstore *kvs);
|
|
size_t kvstoreMemUsage(kvstore *kvs);
|
|
unsigned long long kvstoreScan(kvstore *kvs,
|
|
unsigned long long cursor,
|
|
int onlydidx,
|
|
hashtableScanFunction scan_cb,
|
|
kvstoreScanShouldSkipHashtable *skip_cb,
|
|
void *privdata);
|
|
int kvstoreExpand(kvstore *kvs, uint64_t newsize, int try_expand, kvstoreExpandShouldSkipHashtableIndex *skip_cb);
|
|
int kvstoreGetFairRandomHashtableIndex(kvstore *kvs);
|
|
void kvstoreGetStats(kvstore *kvs, char *buf, size_t bufsize, int full);
|
|
|
|
int kvstoreFindHashtableIndexByKeyIndex(kvstore *kvs, unsigned long target);
|
|
int kvstoreGetFirstNonEmptyHashtableIndex(kvstore *kvs);
|
|
int kvstoreGetNextNonEmptyHashtableIndex(kvstore *kvs, int didx);
|
|
int kvstoreNumNonEmptyHashtables(kvstore *kvs);
|
|
int kvstoreNumAllocatedHashtables(kvstore *kvs);
|
|
int kvstoreNumHashtables(kvstore *kvs);
|
|
uint64_t kvstoreGetHash(kvstore *kvs, const void *key);
|
|
|
|
void kvstoreHashtableRehashingStarted(hashtable *d);
|
|
void kvstoreHashtableRehashingCompleted(hashtable *d);
|
|
void kvstoreHashtableTrackMemUsage(hashtable *s, ssize_t delta);
|
|
size_t kvstoreHashtableMetadataSize(void);
|
|
|
|
/* kvstore iterator specific functions */
|
|
kvstoreIterator *kvstoreIteratorInit(kvstore *kvs, uint8_t flags);
|
|
void kvstoreIteratorRelease(kvstoreIterator *kvs_it);
|
|
int kvstoreIteratorGetCurrentHashtableIndex(kvstoreIterator *kvs_it);
|
|
int kvstoreIteratorNext(kvstoreIterator *kvs_it, void **next);
|
|
|
|
/* Rehashing */
|
|
void kvstoreTryResizeHashtables(kvstore *kvs, int limit);
|
|
uint64_t kvstoreIncrementallyRehash(kvstore *kvs, uint64_t threshold_us);
|
|
size_t kvstoreOverheadHashtableLut(kvstore *kvs);
|
|
size_t kvstoreOverheadHashtableRehashing(kvstore *kvs);
|
|
unsigned long kvstoreHashtableRehashingCount(kvstore *kvs);
|
|
|
|
/* Specific hashtable access by hashtable-index */
|
|
unsigned long kvstoreHashtableSize(kvstore *kvs, int didx);
|
|
kvstoreHashtableIterator *kvstoreGetHashtableIterator(kvstore *kvs, int didx, uint8_t flags);
|
|
void kvstoreReleaseHashtableIterator(kvstoreHashtableIterator *kvs_id);
|
|
int kvstoreHashtableIteratorNext(kvstoreHashtableIterator *kvs_di, void **next);
|
|
int kvstoreHashtableRandomEntry(kvstore *kvs, int didx, void **found);
|
|
int kvstoreHashtableFairRandomEntry(kvstore *kvs, int didx, void **found);
|
|
unsigned int kvstoreHashtableSampleEntries(kvstore *kvs, int didx, void **dst, unsigned int count);
|
|
int kvstoreHashtableExpand(kvstore *kvs, int didx, unsigned long size);
|
|
unsigned long kvstoreHashtableScanDefrag(kvstore *kvs,
|
|
int didx,
|
|
unsigned long v,
|
|
hashtableScanFunction fn,
|
|
void *privdata,
|
|
void *(*defragfn)(void *),
|
|
int flags);
|
|
unsigned long kvstoreHashtableDefragTables(kvstore *kvs, unsigned long cursor, void *(*defragfn)(void *));
|
|
int kvstoreHashtableFind(kvstore *kvs, int didx, void *key, void **found);
|
|
void **kvstoreHashtableFindRef(kvstore *kvs, int didx, const void *key);
|
|
int kvstoreHashtableAddOrFind(kvstore *kvs, int didx, void *key, void **existing);
|
|
int kvstoreHashtableAdd(kvstore *kvs, int didx, void *entry);
|
|
|
|
int kvstoreHashtableFindPositionForInsert(kvstore *kvs, int didx, void *key, hashtablePosition *position, void **existing);
|
|
void kvstoreHashtableInsertAtPosition(kvstore *kvs, int didx, void *entry, void *position);
|
|
|
|
void **kvstoreHashtableTwoPhasePopFindRef(kvstore *kvs, int didx, const void *key, void *position);
|
|
void kvstoreHashtableTwoPhasePopDelete(kvstore *kvs, int didx, void *position);
|
|
int kvstoreHashtablePop(kvstore *kvs, int didx, const void *key, void **popped);
|
|
int kvstoreHashtableDelete(kvstore *kvs, int didx, const void *key);
|
|
hashtable *kvstoreGetHashtable(kvstore *kvs, int didx);
|
|
|
|
#endif /* KVSTORE_H */
|