1692 lines
66 KiB
C
1692 lines
66 KiB
C
/* Object implementation.
|
|
*
|
|
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
* to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "server.h"
|
|
#include "functions.h"
|
|
#include "intset.h" /* Compact integer set structure */
|
|
#include <math.h>
|
|
#include <ctype.h>
|
|
|
|
#ifdef __CYGWIN__
|
|
#define strtold(a, b) ((long double)strtod((a), (b)))
|
|
#endif
|
|
|
|
/* ===================== Creation and parsing of objects ==================== */
|
|
|
|
robj *createObject(int type, void *ptr) {
|
|
robj *o = zmalloc(sizeof(*o));
|
|
o->type = type;
|
|
o->encoding = OBJ_ENCODING_RAW;
|
|
o->ptr = ptr;
|
|
o->refcount = 1;
|
|
o->lru = 0;
|
|
return o;
|
|
}
|
|
|
|
void initObjectLRUOrLFU(robj *o) {
|
|
if (o->refcount == OBJ_SHARED_REFCOUNT) return;
|
|
/* Set the LRU to the current lruclock (minutes resolution), or
|
|
* alternatively the LFU counter. */
|
|
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
|
|
o->lru = (LFUGetTimeInMinutes() << 8) | LFU_INIT_VAL;
|
|
} else {
|
|
o->lru = LRU_CLOCK();
|
|
}
|
|
return;
|
|
}
|
|
|
|
/* Set a special refcount in the object to make it "shared":
|
|
* incrRefCount and decrRefCount() will test for this special refcount
|
|
* and will not touch the object. This way it is free to access shared
|
|
* objects such as small integers from different threads without any
|
|
* mutex.
|
|
*
|
|
* A common pattern to create shared objects:
|
|
*
|
|
* robj *myobject = makeObjectShared(createObject(...));
|
|
*
|
|
*/
|
|
robj *makeObjectShared(robj *o) {
|
|
serverAssert(o->refcount == 1);
|
|
o->refcount = OBJ_SHARED_REFCOUNT;
|
|
return o;
|
|
}
|
|
|
|
/* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
|
|
* string object where o->ptr points to a proper sds string. */
|
|
robj *createRawStringObject(const char *ptr, size_t len) {
|
|
return createObject(OBJ_STRING, sdsnewlen(ptr, len));
|
|
}
|
|
|
|
/* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
|
|
* an object where the sds string is actually an unmodifiable string
|
|
* allocated in the same chunk as the object itself. */
|
|
robj *createEmbeddedStringObject(const char *ptr, size_t len) {
|
|
robj *o = zmalloc(sizeof(robj) + sizeof(struct sdshdr8) + len + 1);
|
|
struct sdshdr8 *sh = (void *)(o + 1);
|
|
|
|
o->type = OBJ_STRING;
|
|
o->encoding = OBJ_ENCODING_EMBSTR;
|
|
o->ptr = sh + 1;
|
|
o->refcount = 1;
|
|
o->lru = 0;
|
|
|
|
sh->len = len;
|
|
sh->alloc = len;
|
|
sh->flags = SDS_TYPE_8;
|
|
if (ptr == SDS_NOINIT)
|
|
sh->buf[len] = '\0';
|
|
else if (ptr) {
|
|
memcpy(sh->buf, ptr, len);
|
|
sh->buf[len] = '\0';
|
|
} else {
|
|
memset(sh->buf, 0, len + 1);
|
|
}
|
|
return o;
|
|
}
|
|
|
|
/* Create a string object with EMBSTR encoding if it is smaller than
|
|
* OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
|
|
* used.
|
|
*
|
|
* The current limit of 44 is chosen so that the biggest string object
|
|
* we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
|
|
#define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
|
|
robj *createStringObject(const char *ptr, size_t len) {
|
|
if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
|
|
return createEmbeddedStringObject(ptr, len);
|
|
else
|
|
return createRawStringObject(ptr, len);
|
|
}
|
|
|
|
/* Same as CreateRawStringObject, can return NULL if allocation fails */
|
|
robj *tryCreateRawStringObject(const char *ptr, size_t len) {
|
|
sds str = sdstrynewlen(ptr, len);
|
|
if (!str) return NULL;
|
|
return createObject(OBJ_STRING, str);
|
|
}
|
|
|
|
/* Same as createStringObject, can return NULL if allocation fails */
|
|
robj *tryCreateStringObject(const char *ptr, size_t len) {
|
|
if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
|
|
return createEmbeddedStringObject(ptr, len);
|
|
else
|
|
return tryCreateRawStringObject(ptr, len);
|
|
}
|
|
|
|
/* Create a string object from a long long value according to the specified flag. */
|
|
#define LL2STROBJ_AUTO 0 /* automatically create the optimal string object */
|
|
#define LL2STROBJ_NO_SHARED 1 /* disallow shared objects */
|
|
#define LL2STROBJ_NO_INT_ENC 2 /* disallow integer encoded objects. */
|
|
robj *createStringObjectFromLongLongWithOptions(long long value, int flag) {
|
|
robj *o;
|
|
|
|
if (value >= 0 && value < OBJ_SHARED_INTEGERS && flag == LL2STROBJ_AUTO) {
|
|
o = shared.integers[value];
|
|
} else {
|
|
if ((value >= LONG_MIN && value <= LONG_MAX) && flag != LL2STROBJ_NO_INT_ENC) {
|
|
o = createObject(OBJ_STRING, NULL);
|
|
o->encoding = OBJ_ENCODING_INT;
|
|
o->ptr = (void *)((long)value);
|
|
} else {
|
|
char buf[LONG_STR_SIZE];
|
|
int len = ll2string(buf, sizeof(buf), value);
|
|
o = createStringObject(buf, len);
|
|
}
|
|
}
|
|
return o;
|
|
}
|
|
|
|
/* Wrapper for createStringObjectFromLongLongWithOptions() always demanding
|
|
* to create a shared object if possible. */
|
|
robj *createStringObjectFromLongLong(long long value) {
|
|
return createStringObjectFromLongLongWithOptions(value, LL2STROBJ_AUTO);
|
|
}
|
|
|
|
/* The function avoids returning a shared integer when LFU/LRU info
|
|
* are needed, that is, when the object is used as a value in the key
|
|
* space(for instance when the INCR command is used), and the server is
|
|
* configured to evict based on LFU/LRU, so we want LFU/LRU values
|
|
* specific for each key. */
|
|
robj *createStringObjectFromLongLongForValue(long long value) {
|
|
if (server.maxmemory == 0 || !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) {
|
|
/* If the maxmemory policy permits, we can still return shared integers */
|
|
return createStringObjectFromLongLongWithOptions(value, LL2STROBJ_AUTO);
|
|
} else {
|
|
return createStringObjectFromLongLongWithOptions(value, LL2STROBJ_NO_SHARED);
|
|
}
|
|
}
|
|
|
|
/* Create a string object that contains an sds inside it. That means it can't be
|
|
* integer encoded (OBJ_ENCODING_INT), and it'll always be an EMBSTR type. */
|
|
robj *createStringObjectFromLongLongWithSds(long long value) {
|
|
return createStringObjectFromLongLongWithOptions(value, LL2STROBJ_NO_INT_ENC);
|
|
}
|
|
|
|
/* Create a string object from a long double. If humanfriendly is non-zero
|
|
* it does not use exponential format and trims trailing zeroes at the end,
|
|
* however this results in loss of precision. Otherwise exp format is used
|
|
* and the output of snprintf() is not modified.
|
|
*
|
|
* The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
|
|
robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
|
|
char buf[MAX_LONG_DOUBLE_CHARS];
|
|
int len = ld2string(buf, sizeof(buf), value, humanfriendly ? LD_STR_HUMAN : LD_STR_AUTO);
|
|
return createStringObject(buf, len);
|
|
}
|
|
|
|
/* Duplicate a string object, with the guarantee that the returned object
|
|
* has the same encoding as the original one.
|
|
*
|
|
* This function also guarantees that duplicating a small integer object
|
|
* (or a string object that contains a representation of a small integer)
|
|
* will always result in a fresh object that is unshared (refcount == 1).
|
|
*
|
|
* The resulting object always has refcount set to 1. */
|
|
robj *dupStringObject(const robj *o) {
|
|
robj *d;
|
|
|
|
serverAssert(o->type == OBJ_STRING);
|
|
|
|
switch (o->encoding) {
|
|
case OBJ_ENCODING_RAW: return createRawStringObject(o->ptr, sdslen(o->ptr));
|
|
case OBJ_ENCODING_EMBSTR: return createEmbeddedStringObject(o->ptr, sdslen(o->ptr));
|
|
case OBJ_ENCODING_INT:
|
|
d = createObject(OBJ_STRING, NULL);
|
|
d->encoding = OBJ_ENCODING_INT;
|
|
d->ptr = o->ptr;
|
|
return d;
|
|
default: serverPanic("Wrong encoding."); break;
|
|
}
|
|
}
|
|
|
|
robj *createQuicklistObject(int fill, int compress) {
|
|
quicklist *l = quicklistNew(fill, compress);
|
|
robj *o = createObject(OBJ_LIST, l);
|
|
o->encoding = OBJ_ENCODING_QUICKLIST;
|
|
return o;
|
|
}
|
|
|
|
robj *createListListpackObject(void) {
|
|
unsigned char *lp = lpNew(0);
|
|
robj *o = createObject(OBJ_LIST, lp);
|
|
o->encoding = OBJ_ENCODING_LISTPACK;
|
|
return o;
|
|
}
|
|
|
|
robj *createSetObject(void) {
|
|
dict *d = dictCreate(&setDictType);
|
|
robj *o = createObject(OBJ_SET, d);
|
|
o->encoding = OBJ_ENCODING_HT;
|
|
return o;
|
|
}
|
|
|
|
robj *createIntsetObject(void) {
|
|
intset *is = intsetNew();
|
|
robj *o = createObject(OBJ_SET, is);
|
|
o->encoding = OBJ_ENCODING_INTSET;
|
|
return o;
|
|
}
|
|
|
|
robj *createSetListpackObject(void) {
|
|
unsigned char *lp = lpNew(0);
|
|
robj *o = createObject(OBJ_SET, lp);
|
|
o->encoding = OBJ_ENCODING_LISTPACK;
|
|
return o;
|
|
}
|
|
|
|
robj *createHashObject(void) {
|
|
unsigned char *zl = lpNew(0);
|
|
robj *o = createObject(OBJ_HASH, zl);
|
|
o->encoding = OBJ_ENCODING_LISTPACK;
|
|
return o;
|
|
}
|
|
|
|
robj *createZsetObject(void) {
|
|
zset *zs = zmalloc(sizeof(*zs));
|
|
robj *o;
|
|
|
|
zs->dict = dictCreate(&zsetDictType);
|
|
zs->zsl = zslCreate();
|
|
o = createObject(OBJ_ZSET, zs);
|
|
o->encoding = OBJ_ENCODING_SKIPLIST;
|
|
return o;
|
|
}
|
|
|
|
robj *createZsetListpackObject(void) {
|
|
unsigned char *lp = lpNew(0);
|
|
robj *o = createObject(OBJ_ZSET, lp);
|
|
o->encoding = OBJ_ENCODING_LISTPACK;
|
|
return o;
|
|
}
|
|
|
|
robj *createStreamObject(void) {
|
|
stream *s = streamNew();
|
|
robj *o = createObject(OBJ_STREAM, s);
|
|
o->encoding = OBJ_ENCODING_STREAM;
|
|
return o;
|
|
}
|
|
|
|
robj *createModuleObject(moduleType *mt, void *value) {
|
|
moduleValue *mv = zmalloc(sizeof(*mv));
|
|
mv->type = mt;
|
|
mv->value = value;
|
|
return createObject(OBJ_MODULE, mv);
|
|
}
|
|
|
|
void freeStringObject(robj *o) {
|
|
if (o->encoding == OBJ_ENCODING_RAW) {
|
|
sdsfree(o->ptr);
|
|
}
|
|
}
|
|
|
|
void freeListObject(robj *o) {
|
|
if (o->encoding == OBJ_ENCODING_QUICKLIST) {
|
|
quicklistRelease(o->ptr);
|
|
} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
|
|
lpFree(o->ptr);
|
|
} else {
|
|
serverPanic("Unknown list encoding type");
|
|
}
|
|
}
|
|
|
|
void freeSetObject(robj *o) {
|
|
switch (o->encoding) {
|
|
case OBJ_ENCODING_HT: dictRelease((dict *)o->ptr); break;
|
|
case OBJ_ENCODING_INTSET:
|
|
case OBJ_ENCODING_LISTPACK: zfree(o->ptr); break;
|
|
default: serverPanic("Unknown set encoding type");
|
|
}
|
|
}
|
|
|
|
void freeZsetObject(robj *o) {
|
|
zset *zs;
|
|
switch (o->encoding) {
|
|
case OBJ_ENCODING_SKIPLIST:
|
|
zs = o->ptr;
|
|
dictRelease(zs->dict);
|
|
zslFree(zs->zsl);
|
|
zfree(zs);
|
|
break;
|
|
case OBJ_ENCODING_LISTPACK: zfree(o->ptr); break;
|
|
default: serverPanic("Unknown sorted set encoding");
|
|
}
|
|
}
|
|
|
|
void freeHashObject(robj *o) {
|
|
switch (o->encoding) {
|
|
case OBJ_ENCODING_HT: dictRelease((dict *)o->ptr); break;
|
|
case OBJ_ENCODING_LISTPACK: lpFree(o->ptr); break;
|
|
default: serverPanic("Unknown hash encoding type"); break;
|
|
}
|
|
}
|
|
|
|
void freeModuleObject(robj *o) {
|
|
moduleValue *mv = o->ptr;
|
|
mv->type->free(mv->value);
|
|
zfree(mv);
|
|
}
|
|
|
|
void freeStreamObject(robj *o) {
|
|
freeStream(o->ptr);
|
|
}
|
|
|
|
void incrRefCount(robj *o) {
|
|
if (o->refcount < OBJ_FIRST_SPECIAL_REFCOUNT) {
|
|
o->refcount++;
|
|
} else {
|
|
if (o->refcount == OBJ_SHARED_REFCOUNT) {
|
|
/* Nothing to do: this refcount is immutable. */
|
|
} else if (o->refcount == OBJ_STATIC_REFCOUNT) {
|
|
serverPanic("You tried to retain an object allocated in the stack");
|
|
}
|
|
}
|
|
}
|
|
|
|
void decrRefCount(robj *o) {
|
|
if (o->refcount == 1) {
|
|
switch (o->type) {
|
|
case OBJ_STRING: freeStringObject(o); break;
|
|
case OBJ_LIST: freeListObject(o); break;
|
|
case OBJ_SET: freeSetObject(o); break;
|
|
case OBJ_ZSET: freeZsetObject(o); break;
|
|
case OBJ_HASH: freeHashObject(o); break;
|
|
case OBJ_MODULE: freeModuleObject(o); break;
|
|
case OBJ_STREAM: freeStreamObject(o); break;
|
|
default: serverPanic("Unknown object type"); break;
|
|
}
|
|
zfree(o);
|
|
} else {
|
|
if (o->refcount <= 0) serverPanic("decrRefCount against refcount <= 0");
|
|
if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount--;
|
|
}
|
|
}
|
|
|
|
/* See dismissObject() */
|
|
void dismissSds(sds s) {
|
|
dismissMemory(sdsAllocPtr(s), sdsAllocSize(s));
|
|
}
|
|
|
|
/* See dismissObject() */
|
|
void dismissStringObject(robj *o) {
|
|
if (o->encoding == OBJ_ENCODING_RAW) {
|
|
dismissSds(o->ptr);
|
|
}
|
|
}
|
|
|
|
/* See dismissObject() */
|
|
void dismissListObject(robj *o, size_t size_hint) {
|
|
if (o->encoding == OBJ_ENCODING_QUICKLIST) {
|
|
quicklist *ql = o->ptr;
|
|
serverAssert(ql->len != 0);
|
|
/* We iterate all nodes only when average node size is bigger than a
|
|
* page size, and there's a high chance we'll actually dismiss something. */
|
|
if (size_hint / ql->len >= server.page_size) {
|
|
quicklistNode *node = ql->head;
|
|
while (node) {
|
|
if (quicklistNodeIsCompressed(node)) {
|
|
dismissMemory(node->entry, ((quicklistLZF *)node->entry)->sz);
|
|
} else {
|
|
dismissMemory(node->entry, node->sz);
|
|
}
|
|
node = node->next;
|
|
}
|
|
}
|
|
} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
|
|
dismissMemory(o->ptr, lpBytes((unsigned char *)o->ptr));
|
|
} else {
|
|
serverPanic("Unknown list encoding type");
|
|
}
|
|
}
|
|
|
|
/* See dismissObject() */
|
|
void dismissSetObject(robj *o, size_t size_hint) {
|
|
if (o->encoding == OBJ_ENCODING_HT) {
|
|
dict *set = o->ptr;
|
|
serverAssert(dictSize(set) != 0);
|
|
/* We iterate all nodes only when average member size is bigger than a
|
|
* page size, and there's a high chance we'll actually dismiss something. */
|
|
if (size_hint / dictSize(set) >= server.page_size) {
|
|
dictEntry *de;
|
|
dictIterator *di = dictGetIterator(set);
|
|
while ((de = dictNext(di)) != NULL) {
|
|
dismissSds(dictGetKey(de));
|
|
}
|
|
dictReleaseIterator(di);
|
|
}
|
|
|
|
/* Dismiss hash table memory. */
|
|
dismissMemory(set->ht_table[0], DICTHT_SIZE(set->ht_size_exp[0]) * sizeof(dictEntry *));
|
|
dismissMemory(set->ht_table[1], DICTHT_SIZE(set->ht_size_exp[1]) * sizeof(dictEntry *));
|
|
} else if (o->encoding == OBJ_ENCODING_INTSET) {
|
|
dismissMemory(o->ptr, intsetBlobLen((intset *)o->ptr));
|
|
} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
|
|
dismissMemory(o->ptr, lpBytes((unsigned char *)o->ptr));
|
|
} else {
|
|
serverPanic("Unknown set encoding type");
|
|
}
|
|
}
|
|
|
|
/* See dismissObject() */
|
|
void dismissZsetObject(robj *o, size_t size_hint) {
|
|
if (o->encoding == OBJ_ENCODING_SKIPLIST) {
|
|
zset *zs = o->ptr;
|
|
zskiplist *zsl = zs->zsl;
|
|
serverAssert(zsl->length != 0);
|
|
/* We iterate all nodes only when average member size is bigger than a
|
|
* page size, and there's a high chance we'll actually dismiss something. */
|
|
if (size_hint / zsl->length >= server.page_size) {
|
|
zskiplistNode *zn = zsl->tail;
|
|
while (zn != NULL) {
|
|
dismissSds(zn->ele);
|
|
zn = zn->backward;
|
|
}
|
|
}
|
|
|
|
/* Dismiss hash table memory. */
|
|
dict *d = zs->dict;
|
|
dismissMemory(d->ht_table[0], DICTHT_SIZE(d->ht_size_exp[0]) * sizeof(dictEntry *));
|
|
dismissMemory(d->ht_table[1], DICTHT_SIZE(d->ht_size_exp[1]) * sizeof(dictEntry *));
|
|
} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
|
|
dismissMemory(o->ptr, lpBytes((unsigned char *)o->ptr));
|
|
} else {
|
|
serverPanic("Unknown zset encoding type");
|
|
}
|
|
}
|
|
|
|
/* See dismissObject() */
|
|
void dismissHashObject(robj *o, size_t size_hint) {
|
|
if (o->encoding == OBJ_ENCODING_HT) {
|
|
dict *d = o->ptr;
|
|
serverAssert(dictSize(d) != 0);
|
|
/* We iterate all fields only when average field/value size is bigger than
|
|
* a page size, and there's a high chance we'll actually dismiss something. */
|
|
if (size_hint / dictSize(d) >= server.page_size) {
|
|
dictEntry *de;
|
|
dictIterator *di = dictGetIterator(d);
|
|
while ((de = dictNext(di)) != NULL) {
|
|
/* Only dismiss values memory since the field size
|
|
* usually is small. */
|
|
dismissSds(dictGetVal(de));
|
|
}
|
|
dictReleaseIterator(di);
|
|
}
|
|
|
|
/* Dismiss hash table memory. */
|
|
dismissMemory(d->ht_table[0], DICTHT_SIZE(d->ht_size_exp[0]) * sizeof(dictEntry *));
|
|
dismissMemory(d->ht_table[1], DICTHT_SIZE(d->ht_size_exp[1]) * sizeof(dictEntry *));
|
|
} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
|
|
dismissMemory(o->ptr, lpBytes((unsigned char *)o->ptr));
|
|
} else {
|
|
serverPanic("Unknown hash encoding type");
|
|
}
|
|
}
|
|
|
|
/* See dismissObject() */
|
|
void dismissStreamObject(robj *o, size_t size_hint) {
|
|
stream *s = o->ptr;
|
|
rax *rax = s->rax;
|
|
if (raxSize(rax) == 0) return;
|
|
|
|
/* Iterate only on stream entries, although size_hint may include serialized
|
|
* consumer groups info, but usually, stream entries take up most of
|
|
* the space. */
|
|
if (size_hint / raxSize(rax) >= server.page_size) {
|
|
raxIterator ri;
|
|
raxStart(&ri, rax);
|
|
raxSeek(&ri, "^", NULL, 0);
|
|
while (raxNext(&ri)) {
|
|
dismissMemory(ri.data, lpBytes(ri.data));
|
|
}
|
|
raxStop(&ri);
|
|
}
|
|
}
|
|
|
|
/* When creating a snapshot in a fork child process, the main process and child
|
|
* process share the same physical memory pages, and if / when the parent
|
|
* modifies any keys due to write traffic, it'll cause CoW which consume
|
|
* physical memory. In the child process, after serializing the key and value,
|
|
* the data is definitely not accessed again, so to avoid unnecessary CoW, we
|
|
* try to release their memory back to OS. see dismissMemory().
|
|
*
|
|
* Because of the cost of iterating all node/field/member/entry of complex data
|
|
* types, we iterate and dismiss them only when approximate average we estimate
|
|
* the size of an individual allocation is more than a page size of OS.
|
|
* 'size_hint' is the size of serialized value. This method is not accurate, but
|
|
* it can reduce unnecessary iteration for complex data types that are probably
|
|
* not going to release any memory. */
|
|
void dismissObject(robj *o, size_t size_hint) {
|
|
/* madvise(MADV_DONTNEED) may not work if Transparent Huge Pages is enabled. */
|
|
if (server.thp_enabled) return;
|
|
|
|
/* Currently we use zmadvise_dontneed only when we use jemalloc with Linux.
|
|
* so we avoid these pointless loops when they're not going to do anything. */
|
|
#if defined(USE_JEMALLOC) && defined(__linux__)
|
|
if (o->refcount != 1) return;
|
|
switch (o->type) {
|
|
case OBJ_STRING: dismissStringObject(o); break;
|
|
case OBJ_LIST: dismissListObject(o, size_hint); break;
|
|
case OBJ_SET: dismissSetObject(o, size_hint); break;
|
|
case OBJ_ZSET: dismissZsetObject(o, size_hint); break;
|
|
case OBJ_HASH: dismissHashObject(o, size_hint); break;
|
|
case OBJ_STREAM: dismissStreamObject(o, size_hint); break;
|
|
default: break;
|
|
}
|
|
#else
|
|
UNUSED(o);
|
|
UNUSED(size_hint);
|
|
#endif
|
|
}
|
|
|
|
/* This variant of decrRefCount() gets its argument as void, and is useful
|
|
* as free method in data structures that expect a 'void free_object(void*)'
|
|
* prototype for the free method. */
|
|
void decrRefCountVoid(void *o) {
|
|
decrRefCount(o);
|
|
}
|
|
|
|
int checkType(client *c, robj *o, int type) {
|
|
/* A NULL is considered an empty key */
|
|
if (o && o->type != type) {
|
|
addReplyErrorObject(c, shared.wrongtypeerr);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int isSdsRepresentableAsLongLong(sds s, long long *llval) {
|
|
return string2ll(s, sdslen(s), llval) ? C_OK : C_ERR;
|
|
}
|
|
|
|
int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
|
|
serverAssertWithInfo(NULL, o, o->type == OBJ_STRING);
|
|
if (o->encoding == OBJ_ENCODING_INT) {
|
|
if (llval) *llval = (long)o->ptr;
|
|
return C_OK;
|
|
} else {
|
|
return isSdsRepresentableAsLongLong(o->ptr, llval);
|
|
}
|
|
}
|
|
|
|
/* Optimize the SDS string inside the string object to require little space,
|
|
* in case there is more than 10% of free space at the end of the SDS. */
|
|
void trimStringObjectIfNeeded(robj *o, int trim_small_values) {
|
|
if (o->encoding != OBJ_ENCODING_RAW) return;
|
|
/* A string may have free space in the following cases:
|
|
* 1. When an arg len is greater than PROTO_MBULK_BIG_ARG the query buffer may be used directly as the SDS string.
|
|
* 2. When utilizing the argument caching mechanism in Lua.
|
|
* 3. When calling from RM_TrimStringAllocation (trim_small_values is true). */
|
|
size_t len = sdslen(o->ptr);
|
|
if (len >= PROTO_MBULK_BIG_ARG || trim_small_values ||
|
|
(server.executing_client && server.executing_client->flag.script && len < LUA_CMD_OBJCACHE_MAX_LEN)) {
|
|
if (sdsavail(o->ptr) > len / 10) {
|
|
o->ptr = sdsRemoveFreeSpace(o->ptr, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Try to encode a string object in order to save space */
|
|
robj *tryObjectEncodingEx(robj *o, int try_trim) {
|
|
long value;
|
|
sds s = o->ptr;
|
|
size_t len;
|
|
|
|
/* Make sure this is a string object, the only type we encode
|
|
* in this function. Other types use encoded memory efficient
|
|
* representations but are handled by the commands implementing
|
|
* the type. */
|
|
serverAssertWithInfo(NULL, o, o->type == OBJ_STRING);
|
|
|
|
/* We try some specialized encoding only for objects that are
|
|
* RAW or EMBSTR encoded, in other words objects that are still
|
|
* in represented by an actually array of chars. */
|
|
if (!sdsEncodedObject(o)) return o;
|
|
|
|
/* It's not safe to encode shared objects: shared objects can be shared
|
|
* everywhere in the "object space" of the server and may end in places where
|
|
* they are not handled. We handle them only as values in the keyspace. */
|
|
if (o->refcount > 1) return o;
|
|
|
|
/* Check if we can represent this string as a long integer.
|
|
* Note that we are sure that a string larger than 20 chars is not
|
|
* representable as a 32 nor 64 bit integer. */
|
|
len = sdslen(s);
|
|
if (len <= 20 && string2l(s, len, &value)) {
|
|
/* This object is encodable as a long. Try to use a shared object.
|
|
* Note that we avoid using shared integers when maxmemory is used
|
|
* because every object needs to have a private LRU field for the LRU
|
|
* algorithm to work well. */
|
|
if (canUseSharedObject() && value >= 0 && value < OBJ_SHARED_INTEGERS) {
|
|
decrRefCount(o);
|
|
return shared.integers[value];
|
|
} else {
|
|
if (o->encoding == OBJ_ENCODING_RAW) {
|
|
sdsfree(o->ptr);
|
|
o->encoding = OBJ_ENCODING_INT;
|
|
o->ptr = (void *)value;
|
|
return o;
|
|
} else if (o->encoding == OBJ_ENCODING_EMBSTR) {
|
|
decrRefCount(o);
|
|
return createStringObjectFromLongLongForValue(value);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If the string is small and is still RAW encoded,
|
|
* try the EMBSTR encoding which is more efficient.
|
|
* In this representation the object and the SDS string are allocated
|
|
* in the same chunk of memory to save space and cache misses. */
|
|
if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) {
|
|
robj *emb;
|
|
|
|
if (o->encoding == OBJ_ENCODING_EMBSTR) return o;
|
|
emb = createEmbeddedStringObject(s, sdslen(s));
|
|
decrRefCount(o);
|
|
return emb;
|
|
}
|
|
|
|
/* We can't encode the object...
|
|
* Do the last try, and at least optimize the SDS string inside */
|
|
if (try_trim) trimStringObjectIfNeeded(o, 0);
|
|
|
|
/* Return the original object. */
|
|
return o;
|
|
}
|
|
|
|
robj *tryObjectEncoding(robj *o) {
|
|
return tryObjectEncodingEx(o, 1);
|
|
}
|
|
|
|
/* Get a decoded version of an encoded object (returned as a new object).
|
|
* If the object is already raw-encoded just increment the ref count. */
|
|
robj *getDecodedObject(robj *o) {
|
|
robj *dec;
|
|
|
|
if (sdsEncodedObject(o)) {
|
|
incrRefCount(o);
|
|
return o;
|
|
}
|
|
if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_INT) {
|
|
char buf[32];
|
|
|
|
ll2string(buf, 32, (long)o->ptr);
|
|
dec = createStringObject(buf, strlen(buf));
|
|
return dec;
|
|
} else {
|
|
serverPanic("Unknown encoding type");
|
|
}
|
|
}
|
|
|
|
/* Compare two string objects via strcmp() or strcoll() depending on flags.
|
|
* Note that the objects may be integer-encoded. In such a case we
|
|
* use ll2string() to get a string representation of the numbers on the stack
|
|
* and compare the strings, it's much faster than calling getDecodedObject().
|
|
*
|
|
* Important note: when STRING_COMPARE_BINARY is used a binary-safe comparison
|
|
* is used. */
|
|
|
|
#define STRING_COMPARE_BINARY (1 << 0)
|
|
#define STRING_COMPARE_COLL (1 << 1)
|
|
|
|
int compareStringObjectsWithFlags(const robj *a, const robj *b, int flags) {
|
|
serverAssertWithInfo(NULL, a, a->type == OBJ_STRING && b->type == OBJ_STRING);
|
|
char bufa[128], bufb[128], *astr, *bstr;
|
|
size_t alen, blen, minlen;
|
|
|
|
if (a == b) return 0;
|
|
if (sdsEncodedObject(a)) {
|
|
astr = a->ptr;
|
|
alen = sdslen(astr);
|
|
} else {
|
|
alen = ll2string(bufa, sizeof(bufa), (long)a->ptr);
|
|
astr = bufa;
|
|
}
|
|
if (sdsEncodedObject(b)) {
|
|
bstr = b->ptr;
|
|
blen = sdslen(bstr);
|
|
} else {
|
|
blen = ll2string(bufb, sizeof(bufb), (long)b->ptr);
|
|
bstr = bufb;
|
|
}
|
|
if (flags & STRING_COMPARE_COLL) {
|
|
return strcoll(astr, bstr);
|
|
} else {
|
|
int cmp;
|
|
|
|
minlen = (alen < blen) ? alen : blen;
|
|
cmp = memcmp(astr, bstr, minlen);
|
|
if (cmp == 0) return alen - blen;
|
|
return cmp;
|
|
}
|
|
}
|
|
|
|
/* Wrapper for compareStringObjectsWithFlags() using binary comparison. */
|
|
int compareStringObjects(const robj *a, const robj *b) {
|
|
return compareStringObjectsWithFlags(a, b, STRING_COMPARE_BINARY);
|
|
}
|
|
|
|
/* Wrapper for compareStringObjectsWithFlags() using collation. */
|
|
int collateStringObjects(const robj *a, const robj *b) {
|
|
return compareStringObjectsWithFlags(a, b, STRING_COMPARE_COLL);
|
|
}
|
|
|
|
/* Equal string objects return 1 if the two objects are the same from the
|
|
* point of view of a string comparison, otherwise 0 is returned. Note that
|
|
* this function is faster then checking for (compareStringObject(a,b) == 0)
|
|
* because it can perform some more optimization. */
|
|
int equalStringObjects(robj *a, robj *b) {
|
|
if (a->encoding == OBJ_ENCODING_INT && b->encoding == OBJ_ENCODING_INT) {
|
|
/* If both strings are integer encoded just check if the stored
|
|
* long is the same. */
|
|
return a->ptr == b->ptr;
|
|
} else {
|
|
return compareStringObjects(a, b) == 0;
|
|
}
|
|
}
|
|
|
|
size_t stringObjectLen(robj *o) {
|
|
serverAssertWithInfo(NULL, o, o->type == OBJ_STRING);
|
|
if (sdsEncodedObject(o)) {
|
|
return sdslen(o->ptr);
|
|
} else {
|
|
return sdigits10((long)o->ptr);
|
|
}
|
|
}
|
|
|
|
int getDoubleFromObject(const robj *o, double *target) {
|
|
double value;
|
|
|
|
if (o == NULL) {
|
|
value = 0;
|
|
} else {
|
|
serverAssertWithInfo(NULL, o, o->type == OBJ_STRING);
|
|
if (sdsEncodedObject(o)) {
|
|
if (!string2d(o->ptr, sdslen(o->ptr), &value)) return C_ERR;
|
|
} else if (o->encoding == OBJ_ENCODING_INT) {
|
|
value = (long)o->ptr;
|
|
} else {
|
|
serverPanic("Unknown string encoding");
|
|
}
|
|
}
|
|
*target = value;
|
|
return C_OK;
|
|
}
|
|
|
|
int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg) {
|
|
double value;
|
|
if (getDoubleFromObject(o, &value) != C_OK) {
|
|
if (msg != NULL) {
|
|
addReplyError(c, (char *)msg);
|
|
} else {
|
|
addReplyError(c, "value is not a valid float");
|
|
}
|
|
return C_ERR;
|
|
}
|
|
*target = value;
|
|
return C_OK;
|
|
}
|
|
|
|
int getLongDoubleFromObject(robj *o, long double *target) {
|
|
long double value;
|
|
|
|
if (o == NULL) {
|
|
value = 0;
|
|
} else {
|
|
serverAssertWithInfo(NULL, o, o->type == OBJ_STRING);
|
|
if (sdsEncodedObject(o)) {
|
|
if (!string2ld(o->ptr, sdslen(o->ptr), &value)) return C_ERR;
|
|
} else if (o->encoding == OBJ_ENCODING_INT) {
|
|
value = (long)o->ptr;
|
|
} else {
|
|
serverPanic("Unknown string encoding");
|
|
}
|
|
}
|
|
*target = value;
|
|
return C_OK;
|
|
}
|
|
|
|
int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg) {
|
|
long double value;
|
|
if (getLongDoubleFromObject(o, &value) != C_OK) {
|
|
if (msg != NULL) {
|
|
addReplyError(c, (char *)msg);
|
|
} else {
|
|
addReplyError(c, "value is not a valid float");
|
|
}
|
|
return C_ERR;
|
|
}
|
|
*target = value;
|
|
return C_OK;
|
|
}
|
|
|
|
int getLongLongFromObject(robj *o, long long *target) {
|
|
long long value;
|
|
|
|
if (o == NULL) {
|
|
value = 0;
|
|
} else {
|
|
serverAssertWithInfo(NULL, o, o->type == OBJ_STRING);
|
|
if (sdsEncodedObject(o)) {
|
|
if (string2ll(o->ptr, sdslen(o->ptr), &value) == 0) return C_ERR;
|
|
} else if (o->encoding == OBJ_ENCODING_INT) {
|
|
value = (long)o->ptr;
|
|
} else {
|
|
serverPanic("Unknown string encoding");
|
|
}
|
|
}
|
|
if (target) *target = value;
|
|
return C_OK;
|
|
}
|
|
|
|
int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) {
|
|
long long value;
|
|
if (getLongLongFromObject(o, &value) != C_OK) {
|
|
if (msg != NULL) {
|
|
addReplyError(c, (char *)msg);
|
|
} else {
|
|
addReplyError(c, "value is not an integer or out of range");
|
|
}
|
|
return C_ERR;
|
|
}
|
|
*target = value;
|
|
return C_OK;
|
|
}
|
|
|
|
int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg) {
|
|
long long value;
|
|
|
|
if (getLongLongFromObjectOrReply(c, o, &value, msg) != C_OK) return C_ERR;
|
|
if (value < LONG_MIN || value > LONG_MAX) {
|
|
if (msg != NULL) {
|
|
addReplyError(c, (char *)msg);
|
|
} else {
|
|
addReplyError(c, "value is out of range");
|
|
}
|
|
return C_ERR;
|
|
}
|
|
*target = value;
|
|
return C_OK;
|
|
}
|
|
|
|
int getRangeLongFromObjectOrReply(client *c, robj *o, long min, long max, long *target, const char *msg) {
|
|
if (getLongFromObjectOrReply(c, o, target, msg) != C_OK) return C_ERR;
|
|
if (*target < min || *target > max) {
|
|
if (msg != NULL) {
|
|
addReplyError(c, (char *)msg);
|
|
} else {
|
|
addReplyErrorFormat(c, "value is out of range, value must between %ld and %ld", min, max);
|
|
}
|
|
return C_ERR;
|
|
}
|
|
return C_OK;
|
|
}
|
|
|
|
int getPositiveLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg) {
|
|
if (msg) {
|
|
return getRangeLongFromObjectOrReply(c, o, 0, LONG_MAX, target, msg);
|
|
} else {
|
|
return getRangeLongFromObjectOrReply(c, o, 0, LONG_MAX, target, "value is out of range, must be positive");
|
|
}
|
|
}
|
|
|
|
int getIntFromObjectOrReply(client *c, robj *o, int *target, const char *msg) {
|
|
long value;
|
|
|
|
if (getRangeLongFromObjectOrReply(c, o, INT_MIN, INT_MAX, &value, msg) != C_OK) return C_ERR;
|
|
|
|
*target = value;
|
|
return C_OK;
|
|
}
|
|
|
|
char *strEncoding(int encoding) {
|
|
switch (encoding) {
|
|
case OBJ_ENCODING_RAW: return "raw";
|
|
case OBJ_ENCODING_INT: return "int";
|
|
case OBJ_ENCODING_HT: return "hashtable";
|
|
case OBJ_ENCODING_QUICKLIST: return "quicklist";
|
|
case OBJ_ENCODING_LISTPACK: return "listpack";
|
|
case OBJ_ENCODING_INTSET: return "intset";
|
|
case OBJ_ENCODING_SKIPLIST: return "skiplist";
|
|
case OBJ_ENCODING_EMBSTR: return "embstr";
|
|
case OBJ_ENCODING_STREAM: return "stream";
|
|
default: return "unknown";
|
|
}
|
|
}
|
|
|
|
/* =========================== Memory introspection ========================= */
|
|
|
|
|
|
/* This is a helper function with the goal of estimating the memory
|
|
* size of a radix tree that is used to store Stream IDs.
|
|
*
|
|
* Note: to guess the size of the radix tree is not trivial, so we
|
|
* approximate it considering 16 bytes of data overhead for each
|
|
* key (the ID), and then adding the number of bare nodes, plus some
|
|
* overhead due by the data and child pointers. This secret recipe
|
|
* was obtained by checking the average radix tree created by real
|
|
* workloads, and then adjusting the constants to get numbers that
|
|
* more or less match the real memory usage.
|
|
*
|
|
* Actually the number of nodes and keys may be different depending
|
|
* on the insertion speed and thus the ability of the radix tree
|
|
* to compress prefixes. */
|
|
size_t streamRadixTreeMemoryUsage(rax *rax) {
|
|
size_t size = sizeof(*rax);
|
|
size = rax->numele * sizeof(streamID);
|
|
size += rax->numnodes * sizeof(raxNode);
|
|
/* Add a fixed overhead due to the aux data pointer, children, ... */
|
|
size += rax->numnodes * sizeof(long) * 30;
|
|
return size;
|
|
}
|
|
|
|
/* Returns the size in bytes consumed by the key's value in RAM.
|
|
* Note that the returned value is just an approximation, especially in the
|
|
* case of aggregated data types where only "sample_size" elements
|
|
* are checked and averaged to estimate the total size. */
|
|
#define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
|
|
size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
|
|
sds ele, ele2;
|
|
dict *d;
|
|
dictIterator *di;
|
|
struct dictEntry *de;
|
|
size_t asize = 0, elesize = 0, samples = 0;
|
|
|
|
if (o->type == OBJ_STRING) {
|
|
if (o->encoding == OBJ_ENCODING_INT) {
|
|
asize = sizeof(*o);
|
|
} else if (o->encoding == OBJ_ENCODING_RAW) {
|
|
asize = sdsZmallocSize(o->ptr) + sizeof(*o);
|
|
} else if (o->encoding == OBJ_ENCODING_EMBSTR) {
|
|
asize = zmalloc_size((void *)o);
|
|
} else {
|
|
serverPanic("Unknown string encoding");
|
|
}
|
|
} else if (o->type == OBJ_LIST) {
|
|
if (o->encoding == OBJ_ENCODING_QUICKLIST) {
|
|
quicklist *ql = o->ptr;
|
|
quicklistNode *node = ql->head;
|
|
asize = sizeof(*o) + sizeof(quicklist);
|
|
do {
|
|
elesize += sizeof(quicklistNode) + zmalloc_size(node->entry);
|
|
samples++;
|
|
} while ((node = node->next) && samples < sample_size);
|
|
asize += (double)elesize / samples * ql->len;
|
|
} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
|
|
asize = sizeof(*o) + zmalloc_size(o->ptr);
|
|
} else {
|
|
serverPanic("Unknown list encoding");
|
|
}
|
|
} else if (o->type == OBJ_SET) {
|
|
if (o->encoding == OBJ_ENCODING_HT) {
|
|
d = o->ptr;
|
|
di = dictGetIterator(d);
|
|
asize = sizeof(*o) + sizeof(dict) + (sizeof(struct dictEntry *) * dictBuckets(d));
|
|
while ((de = dictNext(di)) != NULL && samples < sample_size) {
|
|
ele = dictGetKey(de);
|
|
elesize += dictEntryMemUsage() + sdsZmallocSize(ele);
|
|
samples++;
|
|
}
|
|
dictReleaseIterator(di);
|
|
if (samples) asize += (double)elesize / samples * dictSize(d);
|
|
} else if (o->encoding == OBJ_ENCODING_INTSET) {
|
|
asize = sizeof(*o) + zmalloc_size(o->ptr);
|
|
} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
|
|
asize = sizeof(*o) + zmalloc_size(o->ptr);
|
|
} else {
|
|
serverPanic("Unknown set encoding");
|
|
}
|
|
} else if (o->type == OBJ_ZSET) {
|
|
if (o->encoding == OBJ_ENCODING_LISTPACK) {
|
|
asize = sizeof(*o) + zmalloc_size(o->ptr);
|
|
} else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
|
|
d = ((zset *)o->ptr)->dict;
|
|
zskiplist *zsl = ((zset *)o->ptr)->zsl;
|
|
zskiplistNode *znode = zsl->header->level[0].forward;
|
|
asize = sizeof(*o) + sizeof(zset) + sizeof(zskiplist) + sizeof(dict) +
|
|
(sizeof(struct dictEntry *) * dictBuckets(d)) + zmalloc_size(zsl->header);
|
|
while (znode != NULL && samples < sample_size) {
|
|
elesize += sdsZmallocSize(znode->ele);
|
|
elesize += dictEntryMemUsage() + zmalloc_size(znode);
|
|
samples++;
|
|
znode = znode->level[0].forward;
|
|
}
|
|
if (samples) asize += (double)elesize / samples * dictSize(d);
|
|
} else {
|
|
serverPanic("Unknown sorted set encoding");
|
|
}
|
|
} else if (o->type == OBJ_HASH) {
|
|
if (o->encoding == OBJ_ENCODING_LISTPACK) {
|
|
asize = sizeof(*o) + zmalloc_size(o->ptr);
|
|
} else if (o->encoding == OBJ_ENCODING_HT) {
|
|
d = o->ptr;
|
|
di = dictGetIterator(d);
|
|
asize = sizeof(*o) + sizeof(dict) + (sizeof(struct dictEntry *) * dictBuckets(d));
|
|
while ((de = dictNext(di)) != NULL && samples < sample_size) {
|
|
ele = dictGetKey(de);
|
|
ele2 = dictGetVal(de);
|
|
elesize += sdsZmallocSize(ele) + sdsZmallocSize(ele2);
|
|
elesize += dictEntryMemUsage();
|
|
samples++;
|
|
}
|
|
dictReleaseIterator(di);
|
|
if (samples) asize += (double)elesize / samples * dictSize(d);
|
|
} else {
|
|
serverPanic("Unknown hash encoding");
|
|
}
|
|
} else if (o->type == OBJ_STREAM) {
|
|
stream *s = o->ptr;
|
|
asize = sizeof(*o) + sizeof(*s);
|
|
asize += streamRadixTreeMemoryUsage(s->rax);
|
|
|
|
/* Now we have to add the listpacks. The last listpack is often non
|
|
* complete, so we estimate the size of the first N listpacks, and
|
|
* use the average to compute the size of the first N-1 listpacks, and
|
|
* finally add the real size of the last node. */
|
|
raxIterator ri;
|
|
raxStart(&ri, s->rax);
|
|
raxSeek(&ri, "^", NULL, 0);
|
|
size_t lpsize = 0, samples = 0;
|
|
while (samples < sample_size && raxNext(&ri)) {
|
|
unsigned char *lp = ri.data;
|
|
/* Use the allocated size, since we overprovision the node initially. */
|
|
lpsize += zmalloc_size(lp);
|
|
samples++;
|
|
}
|
|
if (s->rax->numele <= samples) {
|
|
asize += lpsize;
|
|
} else {
|
|
if (samples) lpsize /= samples; /* Compute the average. */
|
|
asize += lpsize * (s->rax->numele - 1);
|
|
/* No need to check if seek succeeded, we enter this branch only
|
|
* if there are a few elements in the radix tree. */
|
|
raxSeek(&ri, "$", NULL, 0);
|
|
raxNext(&ri);
|
|
/* Use the allocated size, since we overprovision the node initially. */
|
|
asize += zmalloc_size(ri.data);
|
|
}
|
|
raxStop(&ri);
|
|
|
|
/* Consumer groups also have a non trivial memory overhead if there
|
|
* are many consumers and many groups, let's count at least the
|
|
* overhead of the pending entries in the groups and consumers
|
|
* PELs. */
|
|
if (s->cgroups) {
|
|
raxStart(&ri, s->cgroups);
|
|
raxSeek(&ri, "^", NULL, 0);
|
|
while (raxNext(&ri)) {
|
|
streamCG *cg = ri.data;
|
|
asize += sizeof(*cg);
|
|
asize += streamRadixTreeMemoryUsage(cg->pel);
|
|
asize += sizeof(streamNACK) * raxSize(cg->pel);
|
|
|
|
/* For each consumer we also need to add the basic data
|
|
* structures and the PEL memory usage. */
|
|
raxIterator cri;
|
|
raxStart(&cri, cg->consumers);
|
|
raxSeek(&cri, "^", NULL, 0);
|
|
while (raxNext(&cri)) {
|
|
streamConsumer *consumer = cri.data;
|
|
asize += sizeof(*consumer);
|
|
asize += sdslen(consumer->name);
|
|
asize += streamRadixTreeMemoryUsage(consumer->pel);
|
|
/* Don't count NACKs again, they are shared with the
|
|
* consumer group PEL. */
|
|
}
|
|
raxStop(&cri);
|
|
}
|
|
raxStop(&ri);
|
|
}
|
|
} else if (o->type == OBJ_MODULE) {
|
|
asize = moduleGetMemUsage(key, o, sample_size, dbid);
|
|
} else {
|
|
serverPanic("Unknown object type");
|
|
}
|
|
return asize;
|
|
}
|
|
|
|
/* Release data obtained with getMemoryOverheadData(). */
|
|
void freeMemoryOverheadData(struct serverMemOverhead *mh) {
|
|
zfree(mh->db);
|
|
zfree(mh);
|
|
}
|
|
|
|
/* Return a struct serverMemOverhead filled with memory overhead
|
|
* information used for the MEMORY OVERHEAD and INFO command. The returned
|
|
* structure pointer should be freed calling freeMemoryOverheadData(). */
|
|
struct serverMemOverhead *getMemoryOverheadData(void) {
|
|
int j;
|
|
size_t mem_total = 0;
|
|
size_t mem = 0;
|
|
size_t zmalloc_used = zmalloc_used_memory();
|
|
struct serverMemOverhead *mh = zcalloc(sizeof(*mh));
|
|
|
|
mh->total_allocated = zmalloc_used;
|
|
mh->startup_allocated = server.initial_memory_usage;
|
|
mh->peak_allocated = server.stat_peak_memory;
|
|
mh->total_frag = (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.zmalloc_used;
|
|
mh->total_frag_bytes = server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
|
|
mh->allocator_frag =
|
|
(float)server.cron_malloc_stats.allocator_frag_smallbins_bytes / server.cron_malloc_stats.allocator_allocated +
|
|
1;
|
|
mh->allocator_frag_bytes = server.cron_malloc_stats.allocator_frag_smallbins_bytes;
|
|
mh->allocator_rss = (float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
|
|
mh->allocator_rss_bytes = server.cron_malloc_stats.allocator_resident - server.cron_malloc_stats.allocator_active;
|
|
mh->rss_extra = (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.allocator_resident;
|
|
mh->rss_extra_bytes = server.cron_malloc_stats.process_rss - server.cron_malloc_stats.allocator_resident;
|
|
|
|
mem_total += server.initial_memory_usage;
|
|
|
|
/* Replication backlog and replicas share one global replication buffer,
|
|
* only if replication buffer memory is more than the repl backlog setting,
|
|
* we consider the excess as replicas' memory. Otherwise, replication buffer
|
|
* memory is the consumption of repl backlog. */
|
|
if (listLength(server.replicas) && (long long)server.repl_buffer_mem > server.repl_backlog_size) {
|
|
mh->clients_replicas = server.repl_buffer_mem - server.repl_backlog_size;
|
|
mh->repl_backlog = server.repl_backlog_size;
|
|
} else {
|
|
mh->clients_replicas = 0;
|
|
mh->repl_backlog = server.repl_buffer_mem;
|
|
}
|
|
if (server.repl_backlog) {
|
|
/* The approximate memory of rax tree for indexed blocks. */
|
|
mh->repl_backlog += server.repl_backlog->blocks_index->numnodes * sizeof(raxNode) +
|
|
raxSize(server.repl_backlog->blocks_index) * sizeof(void *);
|
|
}
|
|
mem_total += mh->repl_backlog;
|
|
mem_total += mh->clients_replicas;
|
|
|
|
/* Computing the memory used by the clients would be O(N) if done
|
|
* here online. We use our values computed incrementally by
|
|
* updateClientMemoryUsage(). */
|
|
mh->clients_normal = server.stat_clients_type_memory[CLIENT_TYPE_PRIMARY] +
|
|
server.stat_clients_type_memory[CLIENT_TYPE_PUBSUB] +
|
|
server.stat_clients_type_memory[CLIENT_TYPE_NORMAL];
|
|
mem_total += mh->clients_normal;
|
|
|
|
mh->cluster_links = server.stat_cluster_links_memory;
|
|
mem_total += mh->cluster_links;
|
|
|
|
mem = 0;
|
|
if (server.aof_state != AOF_OFF) {
|
|
mem += sdsZmallocSize(server.aof_buf);
|
|
}
|
|
mh->aof_buffer = mem;
|
|
mem_total += mem;
|
|
|
|
mem = evalScriptsMemory();
|
|
mh->lua_caches = mem;
|
|
mem_total += mem;
|
|
mh->functions_caches = functionsMemoryOverhead();
|
|
mem_total += mh->functions_caches;
|
|
|
|
for (j = 0; j < server.dbnum; j++) {
|
|
serverDb *db = server.db + j;
|
|
if (!kvstoreNumAllocatedDicts(db->keys)) continue;
|
|
|
|
unsigned long long keyscount = kvstoreSize(db->keys);
|
|
|
|
mh->total_keys += keyscount;
|
|
mh->db = zrealloc(mh->db, sizeof(mh->db[0]) * (mh->num_dbs + 1));
|
|
mh->db[mh->num_dbs].dbid = j;
|
|
|
|
mem = kvstoreMemUsage(db->keys) + keyscount * sizeof(robj);
|
|
mh->db[mh->num_dbs].overhead_ht_main = mem;
|
|
mem_total += mem;
|
|
|
|
mem = kvstoreMemUsage(db->expires);
|
|
mh->db[mh->num_dbs].overhead_ht_expires = mem;
|
|
mem_total += mem;
|
|
|
|
mh->num_dbs++;
|
|
|
|
mh->overhead_db_hashtable_lut += kvstoreOverheadHashtableLut(db->keys);
|
|
mh->overhead_db_hashtable_lut += kvstoreOverheadHashtableLut(db->expires);
|
|
mh->overhead_db_hashtable_rehashing += kvstoreOverheadHashtableRehashing(db->keys);
|
|
mh->overhead_db_hashtable_rehashing += kvstoreOverheadHashtableRehashing(db->expires);
|
|
mh->db_dict_rehashing_count += kvstoreDictRehashingCount(db->keys);
|
|
mh->db_dict_rehashing_count += kvstoreDictRehashingCount(db->expires);
|
|
}
|
|
|
|
mh->overhead_total = mem_total;
|
|
mh->dataset = zmalloc_used - mem_total;
|
|
mh->peak_perc = (float)zmalloc_used * 100 / mh->peak_allocated;
|
|
|
|
/* Metrics computed after subtracting the startup memory from
|
|
* the total memory. */
|
|
size_t net_usage = 1;
|
|
if (zmalloc_used > mh->startup_allocated) net_usage = zmalloc_used - mh->startup_allocated;
|
|
mh->dataset_perc = (float)mh->dataset * 100 / net_usage;
|
|
mh->bytes_per_key = mh->total_keys ? (mh->dataset / mh->total_keys) : 0;
|
|
|
|
return mh;
|
|
}
|
|
|
|
/* Helper for "MEMORY allocator-stats", used as a callback for the jemalloc
|
|
* stats output. */
|
|
void inputCatSds(void *result, const char *str) {
|
|
/* result is actually a (sds *), so re-cast it here */
|
|
sds *info = (sds *)result;
|
|
*info = sdscat(*info, str);
|
|
}
|
|
|
|
/* This implements MEMORY DOCTOR. An human readable analysis of the server
|
|
* memory condition. */
|
|
sds getMemoryDoctorReport(void) {
|
|
int empty = 0; /* Instance is empty or almost empty. */
|
|
int big_peak = 0; /* Memory peak is much larger than used mem. */
|
|
int high_frag = 0; /* High fragmentation. */
|
|
int high_alloc_frag = 0; /* High allocator fragmentation. */
|
|
int high_proc_rss = 0; /* High process rss overhead. */
|
|
int high_alloc_rss = 0; /* High rss overhead. */
|
|
int big_replica_buf = 0; /* Replica buffers are too big. */
|
|
int big_client_buf = 0; /* Client buffers are too big. */
|
|
int many_scripts = 0; /* Script cache has too many scripts. */
|
|
int num_reports = 0;
|
|
struct serverMemOverhead *mh = getMemoryOverheadData();
|
|
|
|
if (mh->total_allocated < (1024 * 1024 * 5)) {
|
|
empty = 1;
|
|
num_reports++;
|
|
} else {
|
|
/* Peak is > 150% of current used memory? */
|
|
if (((float)mh->peak_allocated / mh->total_allocated) > 1.5) {
|
|
big_peak = 1;
|
|
num_reports++;
|
|
}
|
|
|
|
/* Fragmentation is higher than 1.4 and 10MB ?*/
|
|
if (mh->total_frag > 1.4 && mh->total_frag_bytes > 10 << 20) {
|
|
high_frag = 1;
|
|
num_reports++;
|
|
}
|
|
|
|
/* External fragmentation is higher than 1.1 and 10MB? */
|
|
if (mh->allocator_frag > 1.1 && mh->allocator_frag_bytes > 10 << 20) {
|
|
high_alloc_frag = 1;
|
|
num_reports++;
|
|
}
|
|
|
|
/* Allocator rss is higher than 1.1 and 10MB ? */
|
|
if (mh->allocator_rss > 1.1 && mh->allocator_rss_bytes > 10 << 20) {
|
|
high_alloc_rss = 1;
|
|
num_reports++;
|
|
}
|
|
|
|
/* Non-Allocator rss is higher than 1.1 and 10MB ? */
|
|
if (mh->rss_extra > 1.1 && mh->rss_extra_bytes > 10 << 20) {
|
|
high_proc_rss = 1;
|
|
num_reports++;
|
|
}
|
|
|
|
/* Clients using more than 200k each average? */
|
|
long num_replicas = listLength(server.replicas);
|
|
long numclients = listLength(server.clients) - num_replicas;
|
|
if (mh->clients_normal / numclients > (1024 * 200)) {
|
|
big_client_buf = 1;
|
|
num_reports++;
|
|
}
|
|
|
|
/* Replicas using more than 10 MB each? */
|
|
if (num_replicas > 0 && mh->clients_replicas > (1024 * 1024 * 10)) {
|
|
big_replica_buf = 1;
|
|
num_reports++;
|
|
}
|
|
|
|
/* Too many scripts are cached? */
|
|
if (dictSize(evalScriptsDict()) > 1000) {
|
|
many_scripts = 1;
|
|
num_reports++;
|
|
}
|
|
}
|
|
|
|
sds s;
|
|
if (num_reports == 0) {
|
|
s = sdsnew("Hi Sam, I can't find any memory issue in your instance. "
|
|
"I can only account for what occurs on this base.\n");
|
|
} else if (empty == 1) {
|
|
s = sdsnew("Hi Sam, this instance is empty or is using very little memory, "
|
|
"my issues detector can't be used in these conditions. "
|
|
"Please, leave for your mission on Earth and fill it with some data. "
|
|
"The new Sam and I will be back to our programming as soon as I "
|
|
"finished rebooting.\n");
|
|
} else {
|
|
s = sdsnew("Sam, I detected a few issues in this Valkey instance memory implants:\n\n");
|
|
if (big_peak) {
|
|
s = sdscat(s,
|
|
" * Peak memory: In the past this instance used more than 150% the memory that is currently "
|
|
"using. The allocator is normally not able to release memory after a peak, so you can expect to "
|
|
"see a big fragmentation ratio, however this is actually harmless and is only due to the memory "
|
|
"peak, and if the Valkey instance Resident Set Size (RSS) is currently bigger than expected, "
|
|
"the memory will be used as soon as you fill the Valkey instance with more data. If the memory "
|
|
"peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE "
|
|
"command, otherwise the only other option is to shutdown and restart the instance.\n\n");
|
|
}
|
|
if (high_frag) {
|
|
s = sdscatprintf(
|
|
s,
|
|
" * High total RSS: This instance has a memory fragmentation and RSS overhead greater than 1.4 (this "
|
|
"means that the Resident Set Size of the Valkey process is much larger than the sum of the logical "
|
|
"allocations Valkey performed). This problem is usually due either to a large peak memory (check if "
|
|
"there is a peak memory entry above in the report) or may result from a workload that causes the "
|
|
"allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. "
|
|
"Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The "
|
|
"currently used allocator is \"%s\".\n\n",
|
|
ZMALLOC_LIB);
|
|
}
|
|
if (high_alloc_frag) {
|
|
s = sdscatprintf(
|
|
s, " * High allocator fragmentation: This instance has an allocator external fragmentation greater "
|
|
"than 1.1. This problem is usually due either to a large peak memory (check if there is a peak "
|
|
"memory entry above in the report) or may result from a workload that causes the allocator to "
|
|
"fragment memory a lot. You can try enabling 'activedefrag' config option.\n\n");
|
|
}
|
|
if (high_alloc_rss) {
|
|
s = sdscatprintf(
|
|
s, " * High allocator RSS overhead: This instance has an RSS memory overhead is greater than 1.1 (this "
|
|
"means that the Resident Set Size of the allocator is much larger than the sum what the allocator "
|
|
"actually holds). This problem is usually due to a large peak memory (check if there is a peak "
|
|
"memory entry above in the report), you can try the MEMORY PURGE command to reclaim it.\n\n");
|
|
}
|
|
if (high_proc_rss) {
|
|
s = sdscatprintf(
|
|
s, " * High process RSS overhead: This instance has non-allocator RSS memory overhead is greater than "
|
|
"1.1 (this means that the Resident Set Size of the Valkey process is much larger than the RSS the "
|
|
"allocator holds). This problem may be due to Lua scripts or Modules.\n\n");
|
|
}
|
|
if (big_replica_buf) {
|
|
s = sdscat(s,
|
|
" * Big replica buffers: The replica output buffers in this instance are greater than 10MB for "
|
|
"each replica (on average). This likely means that there is some replica instance that is "
|
|
"struggling receiving data, either because it is too slow or because of networking issues. As a "
|
|
"result, data piles on the primary output buffers. Please try to identify what replica is not "
|
|
"receiving data correctly and why. You can use the INFO output in order to check the replicas "
|
|
"delays and the CLIENT LIST command to check the output buffers of each replica.\n\n");
|
|
}
|
|
if (big_client_buf) {
|
|
s = sdscat(s, " * Big client buffers: The clients output buffers in this instance are greater than 200K "
|
|
"per client (on average). This may result from different causes, like Pub/Sub clients "
|
|
"subscribed to channels bot not receiving data fast enough, so that data piles on the Valkey "
|
|
"instance output buffer, or clients sending commands with large replies or very large "
|
|
"sequences of commands in the same pipeline. Please use the CLIENT LIST command in order to "
|
|
"investigate the issue if it causes problems in your instance, or to understand better why "
|
|
"certain clients are using a big amount of memory.\n\n");
|
|
}
|
|
if (many_scripts) {
|
|
s = sdscat(s, " * Many scripts: There seem to be many cached scripts in this instance (more than 1000). "
|
|
"This may be because scripts are generated and `EVAL`ed, instead of being parameterized "
|
|
"(with KEYS and ARGV), `SCRIPT LOAD`ed and `EVALSHA`ed. Unless `SCRIPT FLUSH` is called "
|
|
"periodically, the scripts' caches may end up consuming most of your memory.\n\n");
|
|
}
|
|
s = sdscat(s, "I'm here to keep you safe, Sam. I want to help you.\n");
|
|
}
|
|
freeMemoryOverheadData(mh);
|
|
return s;
|
|
}
|
|
|
|
/* Set the object LRU/LFU depending on server.maxmemory_policy.
|
|
* The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
|
|
* The lru_idle and lru_clock args are only relevant if policy
|
|
* is MAXMEMORY_FLAG_LRU.
|
|
* Either or both of them may be <0, in that case, nothing is set. */
|
|
int objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle, long long lru_clock, int lru_multiplier) {
|
|
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
|
|
if (lfu_freq >= 0) {
|
|
serverAssert(lfu_freq <= 255);
|
|
val->lru = (LFUGetTimeInMinutes() << 8) | lfu_freq;
|
|
return 1;
|
|
}
|
|
} else if (lru_idle >= 0) {
|
|
/* Provided LRU idle time is in seconds. Scale
|
|
* according to the LRU clock resolution this
|
|
* instance was compiled with (normally 1000 ms, so the
|
|
* below statement will expand to lru_idle*1000/1000. */
|
|
lru_idle = lru_idle * lru_multiplier / LRU_CLOCK_RESOLUTION;
|
|
long lru_abs = lru_clock - lru_idle; /* Absolute access time. */
|
|
/* If the LRU field underflows (since lru_clock is a wrapping clock),
|
|
* we need to make it positive again. This be handled by the unwrapping
|
|
* code in estimateObjectIdleTime. I.e. imagine a day when lru_clock
|
|
* wrap arounds (happens once in some 6 months), and becomes a low
|
|
* value, like 10, an lru_idle of 1000 should be near LRU_CLOCK_MAX. */
|
|
if (lru_abs < 0) lru_abs += LRU_CLOCK_MAX;
|
|
val->lru = lru_abs;
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* ======================= The OBJECT and MEMORY commands =================== */
|
|
|
|
/* This is a helper function for the OBJECT command. We need to lookup keys
|
|
* without any modification of LRU or other parameters. */
|
|
robj *objectCommandLookup(client *c, robj *key) {
|
|
return lookupKeyReadWithFlags(c->db, key, LOOKUP_NOTOUCH | LOOKUP_NONOTIFY);
|
|
}
|
|
|
|
robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) {
|
|
robj *o = objectCommandLookup(c, key);
|
|
if (!o) addReplyOrErrorObject(c, reply);
|
|
return o;
|
|
}
|
|
|
|
/* Object command allows to inspect the internals of an Object.
|
|
* Usage: OBJECT <refcount|encoding|idletime|freq> <key> */
|
|
void objectCommand(client *c) {
|
|
robj *o;
|
|
|
|
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr, "help")) {
|
|
const char *help[] = {"ENCODING <key>",
|
|
" Return the kind of internal representation used in order to store the value",
|
|
" associated with a <key>.",
|
|
"FREQ <key>",
|
|
" Return the access frequency index of the <key>. The returned integer is",
|
|
" proportional to the logarithm of the recent access frequency of the key.",
|
|
"IDLETIME <key>",
|
|
" Return the idle time of the <key>, that is the approximated number of",
|
|
" seconds elapsed since the last access to the key.",
|
|
"REFCOUNT <key>",
|
|
" Return the number of references of the value associated with the specified",
|
|
" <key>.",
|
|
NULL};
|
|
addReplyHelp(c, help);
|
|
} else if (!strcasecmp(c->argv[1]->ptr, "refcount") && c->argc == 3) {
|
|
if ((o = objectCommandLookupOrReply(c, c->argv[2], shared.null[c->resp])) == NULL) return;
|
|
addReplyLongLong(c, o->refcount);
|
|
} else if (!strcasecmp(c->argv[1]->ptr, "encoding") && c->argc == 3) {
|
|
if ((o = objectCommandLookupOrReply(c, c->argv[2], shared.null[c->resp])) == NULL) return;
|
|
addReplyBulkCString(c, strEncoding(o->encoding));
|
|
} else if (!strcasecmp(c->argv[1]->ptr, "idletime") && c->argc == 3) {
|
|
if ((o = objectCommandLookupOrReply(c, c->argv[2], shared.null[c->resp])) == NULL) return;
|
|
if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
|
|
addReplyError(c, "An LFU maxmemory policy is selected, idle time not tracked. Please note that when "
|
|
"switching between policies at runtime LRU and LFU data will take some time to adjust.");
|
|
return;
|
|
}
|
|
addReplyLongLong(c, estimateObjectIdleTime(o) / 1000);
|
|
} else if (!strcasecmp(c->argv[1]->ptr, "freq") && c->argc == 3) {
|
|
if ((o = objectCommandLookupOrReply(c, c->argv[2], shared.null[c->resp])) == NULL) return;
|
|
if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) {
|
|
addReplyError(c,
|
|
"An LFU maxmemory policy is not selected, access frequency not tracked. Please note that "
|
|
"when switching between policies at runtime LRU and LFU data will take some time to adjust.");
|
|
return;
|
|
}
|
|
/* LFUDecrAndReturn should be called
|
|
* in case of the key has not been accessed for a long time,
|
|
* because we update the access time only
|
|
* when the key is read or overwritten. */
|
|
addReplyLongLong(c, LFUDecrAndReturn(o));
|
|
} else {
|
|
addReplySubcommandSyntaxError(c);
|
|
}
|
|
}
|
|
|
|
/* The memory command will eventually be a complete interface for the
|
|
* memory introspection capabilities of the server.
|
|
*
|
|
* Usage: MEMORY usage <key> */
|
|
void memoryCommand(client *c) {
|
|
if (!strcasecmp(c->argv[1]->ptr, "help") && c->argc == 2) {
|
|
/* clang-format off */
|
|
const char *help[] = {
|
|
"DOCTOR",
|
|
" Return memory problems reports.",
|
|
"MALLOC-STATS",
|
|
" Return internal statistics report from the memory allocator.",
|
|
"PURGE",
|
|
" Attempt to purge dirty pages for reclamation by the allocator.",
|
|
"STATS",
|
|
" Return information about the memory usage of the server.",
|
|
"USAGE <key> [SAMPLES <count>]",
|
|
" Return memory in bytes used by <key> and its value. Nested values are",
|
|
" sampled up to <count> times (default: 5, 0 means sample all).",
|
|
NULL
|
|
};
|
|
/* clang-format on */
|
|
addReplyHelp(c, help);
|
|
} else if (!strcasecmp(c->argv[1]->ptr, "usage") && c->argc >= 3) {
|
|
dictEntry *de;
|
|
long long samples = OBJ_COMPUTE_SIZE_DEF_SAMPLES;
|
|
for (int j = 3; j < c->argc; j++) {
|
|
if (!strcasecmp(c->argv[j]->ptr, "samples") && j + 1 < c->argc) {
|
|
if (getLongLongFromObjectOrReply(c, c->argv[j + 1], &samples, NULL) == C_ERR) return;
|
|
if (samples < 0) {
|
|
addReplyErrorObject(c, shared.syntaxerr);
|
|
return;
|
|
}
|
|
if (samples == 0) samples = LLONG_MAX;
|
|
j++; /* skip option argument. */
|
|
} else {
|
|
addReplyErrorObject(c, shared.syntaxerr);
|
|
return;
|
|
}
|
|
}
|
|
if ((de = dbFind(c->db, c->argv[2]->ptr)) == NULL) {
|
|
addReplyNull(c);
|
|
return;
|
|
}
|
|
size_t usage = objectComputeSize(c->argv[2], dictGetVal(de), samples, c->db->id);
|
|
usage += sdsZmallocSize(dictGetKey(de));
|
|
usage += dictEntryMemUsage();
|
|
addReplyLongLong(c, usage);
|
|
} else if (!strcasecmp(c->argv[1]->ptr, "stats") && c->argc == 2) {
|
|
struct serverMemOverhead *mh = getMemoryOverheadData();
|
|
|
|
addReplyMapLen(c, 31 + mh->num_dbs);
|
|
|
|
addReplyBulkCString(c, "peak.allocated");
|
|
addReplyLongLong(c, mh->peak_allocated);
|
|
|
|
addReplyBulkCString(c, "total.allocated");
|
|
addReplyLongLong(c, mh->total_allocated);
|
|
|
|
addReplyBulkCString(c, "startup.allocated");
|
|
addReplyLongLong(c, mh->startup_allocated);
|
|
|
|
addReplyBulkCString(c, "replication.backlog");
|
|
addReplyLongLong(c, mh->repl_backlog);
|
|
|
|
addReplyBulkCString(c, "clients.slaves");
|
|
addReplyLongLong(c, mh->clients_replicas);
|
|
|
|
addReplyBulkCString(c, "clients.normal");
|
|
addReplyLongLong(c, mh->clients_normal);
|
|
|
|
addReplyBulkCString(c, "cluster.links");
|
|
addReplyLongLong(c, mh->cluster_links);
|
|
|
|
addReplyBulkCString(c, "aof.buffer");
|
|
addReplyLongLong(c, mh->aof_buffer);
|
|
|
|
addReplyBulkCString(c, "lua.caches");
|
|
addReplyLongLong(c, mh->lua_caches);
|
|
|
|
addReplyBulkCString(c, "functions.caches");
|
|
addReplyLongLong(c, mh->functions_caches);
|
|
|
|
for (size_t j = 0; j < mh->num_dbs; j++) {
|
|
char dbname[32];
|
|
snprintf(dbname, sizeof(dbname), "db.%zd", mh->db[j].dbid);
|
|
addReplyBulkCString(c, dbname);
|
|
addReplyMapLen(c, 2);
|
|
|
|
addReplyBulkCString(c, "overhead.hashtable.main");
|
|
addReplyLongLong(c, mh->db[j].overhead_ht_main);
|
|
|
|
addReplyBulkCString(c, "overhead.hashtable.expires");
|
|
addReplyLongLong(c, mh->db[j].overhead_ht_expires);
|
|
}
|
|
|
|
addReplyBulkCString(c, "overhead.db.hashtable.lut");
|
|
addReplyLongLong(c, mh->overhead_db_hashtable_lut);
|
|
|
|
addReplyBulkCString(c, "overhead.db.hashtable.rehashing");
|
|
addReplyLongLong(c, mh->overhead_db_hashtable_rehashing);
|
|
|
|
addReplyBulkCString(c, "overhead.total");
|
|
addReplyLongLong(c, mh->overhead_total);
|
|
|
|
addReplyBulkCString(c, "db.dict.rehashing.count");
|
|
addReplyLongLong(c, mh->db_dict_rehashing_count);
|
|
|
|
addReplyBulkCString(c, "keys.count");
|
|
addReplyLongLong(c, mh->total_keys);
|
|
|
|
addReplyBulkCString(c, "keys.bytes-per-key");
|
|
addReplyLongLong(c, mh->bytes_per_key);
|
|
|
|
addReplyBulkCString(c, "dataset.bytes");
|
|
addReplyLongLong(c, mh->dataset);
|
|
|
|
addReplyBulkCString(c, "dataset.percentage");
|
|
addReplyDouble(c, mh->dataset_perc);
|
|
|
|
addReplyBulkCString(c, "peak.percentage");
|
|
addReplyDouble(c, mh->peak_perc);
|
|
|
|
addReplyBulkCString(c, "allocator.allocated");
|
|
addReplyLongLong(c, server.cron_malloc_stats.allocator_allocated);
|
|
|
|
addReplyBulkCString(c, "allocator.active");
|
|
addReplyLongLong(c, server.cron_malloc_stats.allocator_active);
|
|
|
|
addReplyBulkCString(c, "allocator.resident");
|
|
addReplyLongLong(c, server.cron_malloc_stats.allocator_resident);
|
|
|
|
addReplyBulkCString(c, "allocator.muzzy");
|
|
addReplyLongLong(c, server.cron_malloc_stats.allocator_muzzy);
|
|
|
|
addReplyBulkCString(c, "allocator-fragmentation.ratio");
|
|
addReplyDouble(c, mh->allocator_frag);
|
|
|
|
addReplyBulkCString(c, "allocator-fragmentation.bytes");
|
|
addReplyLongLong(c, mh->allocator_frag_bytes);
|
|
|
|
addReplyBulkCString(c, "allocator-rss.ratio");
|
|
addReplyDouble(c, mh->allocator_rss);
|
|
|
|
addReplyBulkCString(c, "allocator-rss.bytes");
|
|
addReplyLongLong(c, mh->allocator_rss_bytes);
|
|
|
|
addReplyBulkCString(c, "rss-overhead.ratio");
|
|
addReplyDouble(c, mh->rss_extra);
|
|
|
|
addReplyBulkCString(c, "rss-overhead.bytes");
|
|
addReplyLongLong(c, mh->rss_extra_bytes);
|
|
|
|
addReplyBulkCString(c, "fragmentation"); /* this is the total RSS overhead, including fragmentation */
|
|
addReplyDouble(c, mh->total_frag); /* it is kept here for backwards compatibility */
|
|
|
|
addReplyBulkCString(c, "fragmentation.bytes");
|
|
addReplyLongLong(c, mh->total_frag_bytes);
|
|
|
|
freeMemoryOverheadData(mh);
|
|
} else if (!strcasecmp(c->argv[1]->ptr, "malloc-stats") && c->argc == 2) {
|
|
#if defined(USE_JEMALLOC)
|
|
sds info = sdsempty();
|
|
je_malloc_stats_print(inputCatSds, &info, NULL);
|
|
addReplyVerbatim(c, info, sdslen(info), "txt");
|
|
sdsfree(info);
|
|
#else
|
|
addReplyBulkCString(c, "Stats not supported for the current allocator");
|
|
#endif
|
|
} else if (!strcasecmp(c->argv[1]->ptr, "doctor") && c->argc == 2) {
|
|
sds report = getMemoryDoctorReport();
|
|
addReplyVerbatim(c, report, sdslen(report), "txt");
|
|
sdsfree(report);
|
|
} else if (!strcasecmp(c->argv[1]->ptr, "purge") && c->argc == 2) {
|
|
if (jemalloc_purge() == 0)
|
|
addReply(c, shared.ok);
|
|
else
|
|
addReplyError(c, "Error purging dirty pages");
|
|
} else {
|
|
addReplySubcommandSyntaxError(c);
|
|
}
|
|
}
|