Embed key into dict entry (#541)

This PR incorporates changes related to key embedding described in the
https://github.com/redis/redis/issues/12216
With this change there will be no `key` pointer and embedded the key
within the `dictEntry`. 1 byte is used for additional bookkeeping.
Overall the saving would be 7 bytes on average.

Key changes:

New dict entry type introduced, which is now used as an entry for the
main dictionary:

```c
typedef struct {
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;  /* Next entry in the same hash bucket. */
    uint8_t key_header_size; /* offset into key_buf where the key is located at. */
    unsigned char key_buf[]; /* buffer with embedded key. */
} embeddedDictEntry;
```

One new function has been added to the dictType:

```c
size_t (*embedKey)(unsigned char *buf, size_t buf_len, const void *key, unsigned char *header_size);
```


Change is opt-in per dict type, hence sets, hashes and other types that
are using dictionary are not impacted.
With this change main dictionary now owns the data, so copy on insert in
dbAdd is no longer needed.

### Benchmarking results

TLDR; Around 9-10% memory usage reduction in overall memory usage for
scenario with key of 16 bytes and value of 8 bytes and 16 bytes. The
throughput per second varies but is similar or greater in most of the
run(s) with the changes against unstable (ae2d421).

---------

Signed-off-by: Harkrishn Patro <harkrisp@amazon.com>
Signed-off-by: Madelyn Olson <madelyneolson@gmail.com>
Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
This commit is contained in:
Harkrishn Patro 2024-07-02 15:45:37 -07:00 committed by GitHub
parent 1ea49e5845
commit 8faf2788a2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 240 additions and 86 deletions

View File

@ -190,7 +190,11 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) {
return o;
}
/* Add the key to the DB. It's up to the caller to increment the reference
/* Add the key to the DB.
*
* In this case a copy of `key` is copied in kvstore, the caller must ensure the `key` is properly freed.
*
* It's up to the caller to increment the reference
* counter of the value if needed.
*
* If the update_if_existing argument is false, the program is aborted
@ -204,7 +208,6 @@ static void dbAddInternal(serverDb *db, robj *key, robj *val, int update_if_exis
return;
}
serverAssertWithInfo(NULL, key, de != NULL);
kvstoreDictSetKey(db->keys, slot, de, sdsdup(key->ptr));
initObjectLRUOrLFU(val);
kvstoreDictSetVal(db->keys, slot, de, val);
signalKeyAsReady(db, key, val->type);
@ -240,15 +243,16 @@ int getKeySlot(sds key) {
/* This is a special version of dbAdd() that is used only when loading
* keys from the RDB file: the key is passed as an SDS string that is
* retained by the function (and not freed by the caller).
* copied by the function and freed by the caller.
*
* Moreover this function will not abort if the key is already busy, to
* give more control to the caller, nor will signal the key as ready
* since it is not useful in this context.
*
* The function returns 1 if the key was added to the database, taking
* ownership of the SDS string, otherwise 0 is returned, and is up to the
* caller to free the SDS string. */
* The function returns 1 if the key was added to the database, otherwise 0 is returned.
*
* In this case a copy of `key` is copied in kvstore, the caller must ensure the `key` is properly freed.
*/
int dbAddRDBLoad(serverDb *db, sds key, robj *val) {
int slot = getKeySlot(key);
dictEntry *de = kvstoreDictAddRaw(db->keys, slot, key, NULL);

View File

@ -864,7 +864,7 @@ void debugCommand(client *c) {
sds sizes = sdsempty();
sizes = sdscatprintf(sizes, "bits:%d ", (sizeof(void *) == 8) ? 64 : 32);
sizes = sdscatprintf(sizes, "robj:%d ", (int)sizeof(robj));
sizes = sdscatprintf(sizes, "dictentry:%d ", (int)dictEntryMemUsage());
sizes = sdscatprintf(sizes, "dictentry:%d ", (int)dictEntryMemUsage(NULL));
sizes = sdscatprintf(sizes, "sdshdr5:%d ", (int)sizeof(struct sdshdr5));
sizes = sdscatprintf(sizes, "sdshdr8:%d ", (int)sizeof(struct sdshdr8));
sizes = sdscatprintf(sizes, "sdshdr16:%d ", (int)sizeof(struct sdshdr16));

View File

@ -41,6 +41,7 @@
typedef struct defragCtx {
void *privdata;
int slot;
void *aux;
} defragCtx;
typedef struct defragPubSubCtx {
@ -75,6 +76,36 @@ void *activeDefragAlloc(void *ptr) {
return newptr;
}
/* This method captures the expiry db dict entry which refers to data stored in keys db dict entry. */
void defragEntryStartCbForKeys(void *ctx, void *oldptr) {
defragCtx *defragctx = (defragCtx *)ctx;
serverDb *db = defragctx->privdata;
sds oldsds = (sds)dictGetKey((dictEntry *)oldptr);
int slot = defragctx->slot;
if (kvstoreDictSize(db->expires, slot)) {
dictEntry *expire_de = kvstoreDictFind(db->expires, slot, oldsds);
defragctx->aux = expire_de;
}
}
/* This method updates the key of expiry db dict entry. The key might be no longer valid
* as it could have been cleaned up during the defrag-realloc of the main dictionary. */
void defragEntryFinishCbForKeys(void *ctx, void *newptr) {
defragCtx *defragctx = (defragCtx *)ctx;
dictEntry *expire_de = (dictEntry *)defragctx->aux;
/* Item doesn't have TTL associated to it. */
if (!expire_de) return;
/* No reallocation happened. */
if (!newptr) {
expire_de = NULL;
return;
}
serverDb *db = defragctx->privdata;
sds newsds = (sds)dictGetKey((dictEntry *)newptr);
int slot = defragctx->slot;
kvstoreDictSetKey(db->expires, slot, expire_de, newsds);
}
/*Defrag helper for sds strings
*
* returns NULL in case the allocation wasn't moved.
@ -650,25 +681,10 @@ void defragModule(serverDb *db, dictEntry *kde) {
/* for each key we scan in the main dict, this function will attempt to defrag
* all the various pointers it has. */
void defragKey(defragCtx *ctx, dictEntry *de) {
sds keysds = dictGetKey(de);
robj *newob, *ob;
unsigned char *newzl;
sds newsds;
serverDb *db = ctx->privdata;
int slot = ctx->slot;
/* Try to defrag the key name. */
newsds = activeDefragSds(keysds);
if (newsds) {
kvstoreDictSetKey(db->keys, slot, de, newsds);
if (kvstoreDictSize(db->expires, slot)) {
/* We can't search in db->expires for that key after we've released
* the pointer it holds, since it won't be able to do the string
* compare, but we can find the entry using key hash and pointer. */
uint64_t hash = kvstoreGetHash(db->expires, newsds);
dictEntry *expire_de = kvstoreDictFindEntryByPtrAndHash(db->expires, slot, keysds, hash);
if (expire_de) kvstoreDictSetKey(db->expires, slot, expire_de, newsds);
}
}
robj *newob, *ob;
unsigned char *newzl;
/* Try to defrag robj and / or string value. */
ob = dictGetVal(de);
@ -984,7 +1000,9 @@ void activeDefragCycle(void) {
endtime = start + timelimit;
latencyStartMonitor(latency);
dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc};
dictDefragFunctions defragfns = {.defragAlloc = activeDefragAlloc,
.defragEntryStartCb = defragEntryStartCbForKeys,
.defragEntryFinishCb = defragEntryFinishCbForKeys};
do {
/* if we're not continuing a scan from the last call or loop, start a new one */
if (!defrag_stage && !defrag_cursor && (slot < 0)) {

View File

@ -35,6 +35,7 @@
#include "fmacros.h"
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
@ -48,6 +49,10 @@
#include "serverassert.h"
#include "monotonic.h"
#ifndef static_assert
#define static_assert(expr, lit) _Static_assert(expr, lit)
#endif
#define UNUSED(V) ((void)V)
/* Using dictSetResizeEnabled() we make possible to disable
@ -76,6 +81,33 @@ struct dictEntry {
struct dictEntry *next; /* Next entry in the same hash bucket. */
};
typedef struct {
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next; /* Next entry in the same hash bucket. */
uint8_t key_header_size; /* offset into key_buf where the key is located at. */
unsigned char key_buf[]; /* buffer with embedded key. */
} embeddedDictEntry;
/* Validation and helper for `embeddedDictEntry` */
static_assert(offsetof(embeddedDictEntry, v) == 0, "unexpected field offset");
static_assert(offsetof(embeddedDictEntry, next) == sizeof(double), "unexpected field offset");
static_assert(offsetof(embeddedDictEntry, key_header_size) == sizeof(double) + sizeof(void *),
"unexpected field offset");
/* key_buf is located after a union with a double value `v.d`, a pointer `next` and uint8_t field `key_header_size` */
static_assert(offsetof(embeddedDictEntry, key_buf) == sizeof(double) + sizeof(void *) + sizeof(uint8_t),
"unexpected field offset");
/* The minimum amount of bytes required for embedded dict entry. */
static inline size_t compactSizeEmbeddedDictEntry(void) {
return offsetof(embeddedDictEntry, key_buf);
}
typedef struct {
void *key;
dictEntry *next;
@ -91,6 +123,19 @@ static dictEntry *dictGetNext(const dictEntry *de);
static dictEntry **dictGetNextRef(dictEntry *de);
static void dictSetNext(dictEntry *de, dictEntry *next);
/* -------------------------- Utility functions -------------------------------- */
/* Validates dict type members dependencies. */
static inline void validateDictType(dictType *type) {
if (type->embedded_entry) {
assert(type->embedKey);
assert(!type->keyDup);
assert(!type->keyDestructor);
} else {
assert(!type->embedKey);
}
}
/* -------------------------- hash functions -------------------------------- */
static uint8_t dict_hash_function_seed[16];
@ -126,6 +171,8 @@ uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) {
#define ENTRY_PTR_MASK 7 /* 111 */
#define ENTRY_PTR_NORMAL 0 /* 000 */
#define ENTRY_PTR_NO_VALUE 2 /* 010 */
#define ENTRY_PTR_EMBEDDED 4 /* 100 */
/* ENTRY_PTR_IS_KEY xx1 */
/* Returns 1 if the entry pointer is a pointer to a key, rather than to an
* allocated entry. Returns 0 otherwise. */
@ -145,12 +192,9 @@ static inline int entryIsNoValue(const dictEntry *de) {
return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_NO_VALUE;
}
/* Creates an entry without a value field. */
static inline dictEntry *createEntryNoValue(void *key, dictEntry *next) {
dictEntryNoValue *entry = zmalloc(sizeof(*entry));
entry->key = key;
entry->next = next;
return (dictEntry *)(void *)((uintptr_t)(void *)entry | ENTRY_PTR_NO_VALUE);
static inline int entryIsEmbedded(const dictEntry *de) {
return ((uintptr_t)(void *)de & ENTRY_PTR_MASK) == ENTRY_PTR_EMBEDDED;
}
static inline dictEntry *encodeMaskedPtr(const void *ptr, unsigned int bits) {
@ -163,15 +207,40 @@ static inline void *decodeMaskedPtr(const dictEntry *de) {
return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK);
}
/* Creates an entry without a value field. */
static inline dictEntry *createEntryNoValue(void *key, dictEntry *next) {
dictEntryNoValue *entry = zmalloc(sizeof(*entry));
entry->key = key;
entry->next = next;
return encodeMaskedPtr(entry, ENTRY_PTR_NO_VALUE);
}
static inline dictEntry *createEmbeddedEntry(void *key, dictEntry *next, dictType *dt) {
size_t key_len = dt->embedKey(NULL, 0, key, NULL);
embeddedDictEntry *entry = zmalloc(compactSizeEmbeddedDictEntry() + key_len);
dt->embedKey(entry->key_buf, key_len, key, &entry->key_header_size);
entry->next = next;
return encodeMaskedPtr(entry, ENTRY_PTR_EMBEDDED);
}
static inline void *getEmbeddedKey(const dictEntry *de) {
embeddedDictEntry *entry = (embeddedDictEntry *)decodeMaskedPtr(de);
return &entry->key_buf[entry->key_header_size];
}
/* Decodes the pointer to an entry without value, when you know it is an entry
* without value. Hint: Use entryIsNoValue to check. */
static inline dictEntryNoValue *decodeEntryNoValue(const dictEntry *de) {
return decodeMaskedPtr(de);
}
static inline embeddedDictEntry *decodeEmbeddedEntry(const dictEntry *de) {
return decodeMaskedPtr(de);
}
/* Returns 1 if the entry has a value field and 0 otherwise. */
static inline int entryHasValue(const dictEntry *de) {
return entryIsNormal(de);
return entryIsNormal(de) || entryIsEmbedded(de);
}
/* ----------------------------- API implementation ------------------------- */
@ -185,6 +254,7 @@ static void _dictReset(dict *d, int htidx) {
/* Create a new hash table */
dict *dictCreate(dictType *type) {
validateDictType(type);
size_t metasize = type->dictMetadataBytes ? type->dictMetadataBytes(NULL) : 0;
dict *d = zmalloc(sizeof(*d) + metasize);
if (metasize > 0) {
@ -473,6 +543,10 @@ int dictAdd(dict *d, void *key, void *val) {
* with the existing entry if existing is not NULL.
*
* If key was added, the hash entry is returned to be manipulated by the caller.
*
* The dict handles `key` based on `dictType` during initialization:
* - If `dictType.embedded-entry` is 1, it clones the `key`.
* - Otherwise, it assumes ownership of the `key`.
*/
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) {
/* Get the position for the new key or NULL if the key already exists. */
@ -511,6 +585,8 @@ dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) {
/* Allocate an entry without value. */
entry = createEntryNoValue(key, *bucket);
}
} else if (d->type->embedded_entry) {
entry = createEmbeddedEntry(key, *bucket, d->type);
} else {
/* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
@ -658,6 +734,7 @@ void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {
if (he == NULL) return;
dictFreeKey(d, he);
dictFreeVal(d, he);
/* Clear the dictEntry */
if (!entryIsKey(he)) zfree(decodeMaskedPtr(he));
}
@ -804,7 +881,11 @@ void dictSetKey(dict *d, dictEntry *de, void *key) {
void dictSetVal(dict *d, dictEntry *de, void *val) {
UNUSED(d);
assert(entryHasValue(de));
de->v.val = val;
if (entryIsEmbedded(de)) {
decodeEmbeddedEntry(de)->v.val = val;
} else {
de->v.val = val;
}
}
void dictSetSignedIntegerVal(dictEntry *de, int64_t val) {
@ -840,11 +921,15 @@ double dictIncrDoubleVal(dictEntry *de, double val) {
void *dictGetKey(const dictEntry *de) {
if (entryIsKey(de)) return (void *)de;
if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key;
if (entryIsEmbedded(de)) return getEmbeddedKey(de);
return de->key;
}
void *dictGetVal(const dictEntry *de) {
assert(entryHasValue(de));
if (entryIsEmbedded(de)) {
return decodeEmbeddedEntry(de)->v.val;
}
return de->v.val;
}
@ -874,6 +959,7 @@ double *dictGetDoubleValPtr(dictEntry *de) {
static dictEntry *dictGetNext(const dictEntry *de) {
if (entryIsKey(de)) return NULL; /* there's no next */
if (entryIsNoValue(de)) return decodeEntryNoValue(de)->next;
if (entryIsEmbedded(de)) return decodeEmbeddedEntry(de)->next;
return de->next;
}
@ -882,14 +968,16 @@ static dictEntry *dictGetNext(const dictEntry *de) {
static dictEntry **dictGetNextRef(dictEntry *de) {
if (entryIsKey(de)) return NULL;
if (entryIsNoValue(de)) return &decodeEntryNoValue(de)->next;
if (entryIsEmbedded(de)) return &decodeEmbeddedEntry(de)->next;
return &de->next;
}
static void dictSetNext(dictEntry *de, dictEntry *next) {
assert(!entryIsKey(de));
if (entryIsNoValue(de)) {
dictEntryNoValue *entry = decodeEntryNoValue(de);
entry->next = next;
decodeEntryNoValue(de)->next = next;
} else if (entryIsEmbedded(de)) {
decodeEmbeddedEntry(de)->next = next;
} else {
de->next = next;
}
@ -901,8 +989,20 @@ size_t dictMemUsage(const dict *d) {
return dictSize(d) * sizeof(dictEntry) + dictBuckets(d) * sizeof(dictEntry *);
}
size_t dictEntryMemUsage(void) {
return sizeof(dictEntry);
/* Returns the memory usage in bytes of dictEntry based on the type. if `de` is NULL, return the size of
* regular dict entry else return based on the type. */
size_t dictEntryMemUsage(dictEntry *de) {
if (de == NULL || entryIsNormal(de))
return sizeof(dictEntry);
else if (entryIsKey(de))
return 0;
else if (entryIsNoValue(de))
return sizeof(dictEntryNoValue);
else if (entryIsEmbedded(de))
return zmalloc_size(decodeEmbeddedEntry(de));
else
assert("Entry type not supported");
return 0;
}
/* A fingerprint is a 64 bit number that represents the state of the dictionary
@ -1172,7 +1272,7 @@ end:
/* Reallocate the dictEntry, key and value allocations in a bucket using the
* provided allocation functions in order to defrag them. */
static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragfns) {
static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragfns, void *privdata) {
dictDefragAllocFunction *defragalloc = defragfns->defragAlloc;
dictDefragAllocFunction *defragkey = defragfns->defragKey;
dictDefragAllocFunction *defragval = defragfns->defragVal;
@ -1190,6 +1290,17 @@ static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragf
entry = newentry;
}
if (newkey) entry->key = newkey;
} else if (entryIsEmbedded(de)) {
defragfns->defragEntryStartCb(privdata, de);
embeddedDictEntry *entry = decodeEmbeddedEntry(de), *newentry;
if ((newentry = defragalloc(entry))) {
newde = encodeMaskedPtr(newentry, ENTRY_PTR_EMBEDDED);
entry = newentry;
defragfns->defragEntryFinishCb(privdata, newde);
} else {
defragfns->defragEntryFinishCb(privdata, NULL);
}
if (newval) entry->v.val = newval;
} else {
assert(entryIsNormal(de));
newde = defragalloc(de);
@ -1353,7 +1464,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio
/* Emit entries at cursor */
if (defragfns) {
dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns);
dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns, privdata);
}
de = d->ht_table[htidx0][v & m0];
while (de) {
@ -1386,7 +1497,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio
/* Emit entries at cursor */
if (defragfns) {
dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns);
dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns, privdata);
}
de = d->ht_table[htidx0][v & m0];
while (de) {
@ -1400,7 +1511,7 @@ dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctio
do {
/* Emit entries at cursor */
if (defragfns) {
dictDefragBucket(&d->ht_table[htidx1][v & m1], defragfns);
dictDefragBucket(&d->ht_table[htidx1][v & m1], defragfns, privdata);
}
de = d->ht_table[htidx1][v & m1];
while (de) {
@ -1573,29 +1684,6 @@ uint64_t dictGetHash(dict *d, const void *key) {
return dictHashKey(d, key);
}
/* Finds the dictEntry using pointer and pre-calculated hash.
* oldkey is a dead pointer and should not be accessed.
* the hash value should be provided using dictGetHash.
* no string / key comparison is performed.
* return value is a pointer to the dictEntry if found, or NULL if not found. */
dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) {
dictEntry *he;
unsigned long idx, table;
if (dictSize(d) == 0) return NULL; /* dict is empty */
for (table = 0; table <= 1; table++) {
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
if (table == 0 && (long)idx < d->rehashidx) continue;
he = d->ht_table[table][idx];
while (he) {
if (oldptr == dictGetKey(he)) return he;
he = dictGetNext(he);
}
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}
/* Provides the old and new ht size for a given dictionary during rehashing. This method
* should only be invoked during initialization/rehashing. */
void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size) {

View File

@ -66,6 +66,10 @@ typedef struct dictType {
/* Allow a dict to carry extra caller-defined metadata. The
* extra memory is initialized to 0 when a dict is allocated. */
size_t (*dictMetadataBytes)(dict *d);
/* Method for copying a given key into a buffer of buf_len. Also used for
* computing the length of the key + header when buf is NULL. */
size_t (*embedKey)(unsigned char *buf, size_t buf_len, const void *key, unsigned char *header_size);
/* Data */
void *userdata;
@ -80,8 +84,9 @@ typedef struct dictType {
* enables one more optimization: to store a key without an allocated
* dictEntry. */
unsigned int keys_are_odd : 1;
/* TODO: Add a 'keys_are_even' flag and use a similar optimization if that
* flag is set. */
/* If embedded_entry flag is set, it indicates that a copy of the key is created and the key is embedded
* as part of the dict entry. */
unsigned int embedded_entry : 1;
} dictType;
#define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1 << (exp))
@ -127,10 +132,13 @@ typedef struct dictStats {
typedef void(dictScanFunction)(void *privdata, const dictEntry *de);
typedef void *(dictDefragAllocFunction)(void *ptr);
typedef void(dictDefragEntryCb)(void *privdata, void *ptr);
typedef struct {
dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */
dictDefragAllocFunction *defragKey; /* Defrag-realloc keys (optional) */
dictDefragAllocFunction *defragVal; /* Defrag-realloc values (optional) */
dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */
dictDefragAllocFunction *defragKey; /* Defrag-realloc keys (optional) */
dictDefragAllocFunction *defragVal; /* Defrag-realloc values (optional) */
dictDefragEntryCb *defragEntryStartCb; /* Callback invoked prior to the start of defrag of dictEntry. */
dictDefragEntryCb *defragEntryFinishCb; /* Callback invoked after the defrag of dictEntry is tried. */
} dictDefragFunctions;
/* This is the initial size of every hash table */
@ -212,7 +220,7 @@ uint64_t dictGetUnsignedIntegerVal(const dictEntry *de);
double dictGetDoubleVal(const dictEntry *de);
double *dictGetDoubleValPtr(dictEntry *de);
size_t dictMemUsage(const dict *d);
size_t dictEntryMemUsage(void);
size_t dictEntryMemUsage(dictEntry *de);
dictIterator *dictGetIterator(dict *d);
dictIterator *dictGetSafeIterator(dict *d);
void dictInitIterator(dictIterator *iter, dict *d);
@ -236,7 +244,6 @@ unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *pri
unsigned long
dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
uint64_t dictGetHash(dict *d, const void *key);
dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);
void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size);
size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full);

View File

@ -241,7 +241,12 @@ static size_t kvstoreDictMetadataSize(dict *d) {
/* Create an array of dictionaries
* num_dicts_bits is the log2 of the amount of dictionaries needed (e.g. 0 for 1 dict,
* 3 for 8 dicts, etc.) */
* 3 for 8 dicts, etc.)
*
* The kvstore handles `key` based on `dictType` during initialization:
* - If `dictType.embedded-entry` is 1, it clones the `key`.
* - Otherwise, it assumes ownership of the `key`.
*/
kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) {
/* We can't support more than 2^16 dicts because we want to save 48 bits
* for the dict cursor, see kvstoreScan */
@ -340,7 +345,7 @@ size_t kvstoreMemUsage(kvstore *kvs) {
size_t mem = sizeof(*kvs);
unsigned long long keys_count = kvstoreSize(kvs);
mem += keys_count * dictEntryMemUsage() + kvstoreBuckets(kvs) * sizeof(dictEntry *) +
mem += keys_count * dictEntryMemUsage(NULL) + kvstoreBuckets(kvs) * sizeof(dictEntry *) +
kvs->allocated_dicts * (sizeof(dict) + kvstoreDictMetadataSize(NULL));
/* Values are dict* shared with kvs->dicts */
@ -717,12 +722,6 @@ dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx) {
return dictGetFairRandomKey(d);
}
dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash) {
dict *d = kvstoreGetDict(kvs, didx);
if (!d) return NULL;
return dictFindEntryByPtrAndHash(d, oldptr, hash);
}
unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count) {
dict *d = kvstoreGetDict(kvs, didx);
if (!d) return 0;
@ -776,6 +775,17 @@ dictEntry *kvstoreDictFind(kvstore *kvs, int didx, void *key) {
return dictFind(d, key);
}
/*
* The kvstore handles `key` based on `dictType` during initialization:
* - If `dictType.embedded-entry` is 1, it clones the `key`.
* - Otherwise, it assumes ownership of the `key`.
* The caller must ensure the `key` is properly freed.
*
* kvstore current usage:
*
* 1. keyspace (db.keys) kvstore - creates a copy of the key.
* 2. expiry (db.expires), pubsub_channels and pubsubshard_channels kvstore - takes ownership of the key.
*/
dictEntry *kvstoreDictAddRaw(kvstore *kvs, int didx, void *key, dictEntry **existing) {
dict *d = createDictIfNeeded(kvs, didx);
dictEntry *ret = dictAddRaw(d, key, existing);

View File

@ -58,7 +58,6 @@ void kvstoreReleaseDictIterator(kvstoreDictIterator *kvs_id);
dictEntry *kvstoreDictIteratorNext(kvstoreDictIterator *kvs_di);
dictEntry *kvstoreDictGetRandomKey(kvstore *kvs, int didx);
dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx);
dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash);
unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count);
int kvstoreDictExpand(kvstore *kvs, int didx, unsigned long size);
unsigned long kvstoreDictScanDefrag(kvstore *kvs,

View File

@ -1010,7 +1010,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
asize = sizeof(*o) + sizeof(dict) + (sizeof(struct dictEntry *) * dictBuckets(d));
while ((de = dictNext(di)) != NULL && samples < sample_size) {
ele = dictGetKey(de);
elesize += dictEntryMemUsage() + sdsZmallocSize(ele);
elesize += dictEntryMemUsage(de) + sdsZmallocSize(ele);
samples++;
}
dictReleaseIterator(di);
@ -1033,7 +1033,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
(sizeof(struct dictEntry *) * dictBuckets(d)) + zmalloc_size(zsl->header);
while (znode != NULL && samples < sample_size) {
elesize += sdsZmallocSize(znode->ele);
elesize += dictEntryMemUsage() + zmalloc_size(znode);
elesize += dictEntryMemUsage(NULL) + zmalloc_size(znode);
samples++;
znode = znode->level[0].forward;
}
@ -1052,7 +1052,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
ele = dictGetKey(de);
ele2 = dictGetVal(de);
elesize += sdsZmallocSize(ele) + sdsZmallocSize(ele2);
elesize += dictEntryMemUsage();
elesize += dictEntryMemUsage(de);
samples++;
}
dictReleaseIterator(di);
@ -1552,8 +1552,7 @@ NULL
return;
}
size_t usage = objectComputeSize(c->argv[2], dictGetVal(de), samples, c->db->id);
usage += sdsZmallocSize(dictGetKey(de));
usage += dictEntryMemUsage();
usage += dictEntryMemUsage(de);
addReplyLongLong(c, usage);
} else if (!strcasecmp(c->argv[1]->ptr, "stats") && c->argc == 2) {
struct serverMemOverhead *mh = getMemoryOverheadData();

View File

@ -3324,6 +3324,9 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
/* call key space notification on key loaded for modules only */
moduleNotifyKeyspaceEvent(NOTIFY_LOADED, "loaded", &keyobj, db->id);
/* Release key (sds), dictEntry stores a copy of it in embedded data */
sdsfree(key);
}
/* Loading the database more slowly is useful in order to test

View File

@ -192,6 +192,25 @@ sds sdsdup(const sds s) {
return sdsnewlen(s, sdslen(s));
}
/*
* This method returns the minimum amount of bytes required to store the sds (header + data + NULL terminator).
*/
static inline size_t sdsminlen(sds s) {
return sdslen(s) + sdsHdrSize(s[-1]) + 1;
}
/* This method copies the sds `s` into `buf` which is the target character buffer. */
size_t sdscopytobuffer(unsigned char *buf, size_t buf_len, sds s, uint8_t *hdr_size) {
size_t required_keylen = sdsminlen(s);
if (buf == NULL) {
return required_keylen;
}
assert(buf_len >= required_keylen);
memcpy(buf, sdsAllocPtr(s), required_keylen);
*hdr_size = sdsHdrSize(s[-1]);
return required_keylen;
}
/* Free an sds string. No operation is performed if 's' is NULL. */
void sdsfree(sds s) {
if (s == NULL) return;

View File

@ -182,6 +182,7 @@ sds sdstrynewlen(const void *init, size_t initlen);
sds sdsnew(const char *init);
sds sdsempty(void);
sds sdsdup(const sds s);
size_t sdscopytobuffer(unsigned char *buf, size_t buf_len, sds s, uint8_t *hdr_size);
void sdsfree(sds s);
sds sdsgrowzero(sds s, size_t len);
sds sdscatlen(sds s, const void *t, size_t len);

View File

@ -289,6 +289,10 @@ int dictSdsKeyCompare(dict *d, const void *key1, const void *key2) {
return memcmp(key1, key2, l1) == 0;
}
size_t dictSdsEmbedKey(unsigned char *buf, size_t buf_len, const void *key, uint8_t *key_offset) {
return sdscopytobuffer(buf, buf_len, (sds)key, key_offset);
}
/* A case insensitive version used for the command lookup table and other
* places where case insensitive non binary-safe comparison is needed. */
int dictSdsKeyCaseCompare(dict *d, const void *key1, const void *key2) {
@ -468,9 +472,11 @@ dictType dbDictType = {
dictSdsHash, /* hash function */
NULL, /* key dup */
dictSdsKeyCompare, /* key compare */
dictSdsDestructor, /* key destructor */
NULL, /* key is embedded in the dictEntry and freed internally */
dictObjectDestructor, /* val destructor */
dictResizeAllowed, /* allow to resize */
.embedKey = dictSdsEmbedKey,
.embedded_entry = 1,
};
/* Db->expires */