From 4cae99e785ee3302adc37144f07f737b9b7bfc7d Mon Sep 17 00:00:00 2001 From: Chen Tianjie Date: Fri, 1 Mar 2024 13:41:24 +0800 Subject: [PATCH] Add overhead of all DBs and rehashing dict count to info. (#12913) Sometimes we need to make fast judgement about why Redis is suddenly taking more memory. One of the reasons is main DB's dicts doing rehashing. We may use `MEMORY STATS` to monitor the overhead memory of each DB, but there still lacks a total sum to show an overall trend. So this PR adds the total overhead of all DBs to `INFO MEMORY` section, together with the total count of rehashing DB dicts, providing some intuitive metrics about main dicts rehashing. This PR adds the following metrics to INFO MEMORY * `mem_overhead_db_hashtable_rehashing` - only size of ht[0] in dictionaries we're rehashing (i.e. the memory that's gonna get released soon) and a similar ones to MEMORY STATS: * `overhead.db.hashtable.lut` (complements the existing `overhead.hashtable.main` and `overhead.hashtable.expires` which also counts the `dictEntry` structs too) * `overhead.db.hashtable.rehashing` - temporary rehashing overhead. * `db.dict.rehashing.count` - number of top level dictionaries being rehashed. --------- Co-authored-by: zhaozhao.zz Co-authored-by: Oran Agra --- src/commands/memory-stats.json | 11 ++++++++++- src/kvstore.c | 30 ++++++++++++++++++++++++---- src/kvstore.h | 4 ++++ src/object.c | 21 ++++++++++++++++++-- src/server.c | 1 + src/server.h | 3 +++ tests/unit/info.tcl | 36 ++++++++++++++++++++++++++++++++++ tests/unit/other.tcl | 17 +--------------- 8 files changed, 100 insertions(+), 23 deletions(-) diff --git a/src/commands/memory-stats.json b/src/commands/memory-stats.json index 916c4bdcb..98e49b7d2 100644 --- a/src/commands/memory-stats.json +++ b/src/commands/memory-stats.json @@ -47,9 +47,18 @@ "functions.caches": { "type": "integer" }, + "overhead.db.hashtable.lut": { + "type": "integer" + }, + "overhead.db.hashtable.rehashing": { + "type": "integer" + }, "overhead.total": { "type": "integer" }, + "db.dict.rehashing.count": { + "type": "integer" + }, "keys.count": { "type": "integer" }, @@ -103,7 +112,7 @@ } }, "patternProperties": { - "^db.": { + "^db\\.\\d+$": { "type": "object", "properties": { "overhead.hashtable.main": { diff --git a/src/kvstore.c b/src/kvstore.c index 61f7e43ac..624af75be 100644 --- a/src/kvstore.c +++ b/src/kvstore.c @@ -61,6 +61,8 @@ struct _kvstore { unsigned long long key_count; /* Total number of keys in this kvstore. */ unsigned long long bucket_count; /* Total number of buckets in this kvstore across dictionaries. */ unsigned long long *dict_size_index; /* Binary indexed tree (BIT) that describes cumulative key frequencies up until given dict-index. */ + size_t overhead_hashtable_lut; /* The overhead of all dictionaries. */ + size_t overhead_hashtable_rehashing; /* The overhead of dictionaries rehashing. */ }; /* Structure for kvstore iterator that allows iterating across multiple dicts. */ @@ -191,11 +193,11 @@ static void kvstoreDictRehashingStarted(dict *d) { listAddNodeTail(kvs->rehashing, d); metadata->rehashing_node = listLast(kvs->rehashing); - if (kvs->num_dicts == 1) - return; unsigned long long from, to; dictRehashingInfo(d, &from, &to); kvs->bucket_count += to; /* Started rehashing (Add the new ht size) */ + kvs->overhead_hashtable_lut += to; + kvs->overhead_hashtable_rehashing += from; } /* Remove dictionary from the rehashing list. @@ -210,11 +212,11 @@ static void kvstoreDictRehashingCompleted(dict *d) { metadata->rehashing_node = NULL; } - if (kvs->num_dicts == 1) - return; unsigned long long from, to; dictRehashingInfo(d, &from, &to); kvs->bucket_count -= from; /* Finished rehashing (Remove the old ht size) */ + kvs->overhead_hashtable_lut -= from; + kvs->overhead_hashtable_rehashing -= from; } /* Returns the size of the DB dict metadata in bytes. */ @@ -264,6 +266,8 @@ kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) { kvs->resize_cursor = 0; kvs->dict_size_index = kvs->num_dicts > 1? zcalloc(sizeof(unsigned long long) * (kvs->num_dicts + 1)) : NULL; kvs->bucket_count = 0; + kvs->overhead_hashtable_lut = 0; + kvs->overhead_hashtable_rehashing = 0; return kvs; } @@ -288,6 +292,8 @@ void kvstoreEmpty(kvstore *kvs, void(callback)(dict*)) { kvs->bucket_count = 0; if (kvs->dict_size_index) memset(kvs->dict_size_index, 0, sizeof(unsigned long long) * (kvs->num_dicts + 1)); + kvs->overhead_hashtable_lut = 0; + kvs->overhead_hashtable_rehashing = 0; } void kvstoreRelease(kvstore *kvs) { @@ -529,6 +535,10 @@ int kvstoreNumNonEmptyDicts(kvstore *kvs) { return kvs->non_empty_dicts; } +int kvstoreNumAllocatedDicts(kvstore *kvs) { + return kvs->allocated_dicts; +} + int kvstoreNumDicts(kvstore *kvs) { return kvs->num_dicts; } @@ -629,6 +639,18 @@ uint64_t kvstoreIncrementallyRehash(kvstore *kvs, uint64_t threshold_us) { return elapsed_us; } +size_t kvstoreOverheadHashtableLut(kvstore *kvs) { + return kvs->overhead_hashtable_lut * sizeof(dictEntry *); +} + +size_t kvstoreOverheadHashtableRehashing(kvstore *kvs) { + return kvs->overhead_hashtable_rehashing * sizeof(dictEntry *); +} + +unsigned long kvstoreDictRehashingCount(kvstore *kvs) { + return listLength(kvs->rehashing); +} + unsigned long kvstoreDictSize(kvstore *kvs, int didx) { dict *d = kvstoreGetDict(kvs, didx); diff --git a/src/kvstore.h b/src/kvstore.h index b1a6a6783..fdd06ba92 100644 --- a/src/kvstore.h +++ b/src/kvstore.h @@ -30,6 +30,7 @@ void kvstoreGetStats(kvstore *kvs, char *buf, size_t bufsize, int full); int kvstoreFindDictIndexByKeyIndex(kvstore *kvs, unsigned long target); int kvstoreGetNextNonEmptyDictIndex(kvstore *kvs, int didx); int kvstoreNumNonEmptyDicts(kvstore *kvs); +int kvstoreNumAllocatedDicts(kvstore *kvs); int kvstoreNumDicts(kvstore *kvs); uint64_t kvstoreGetHash(kvstore *kvs, const void *key); @@ -43,6 +44,9 @@ dictEntry *kvstoreIteratorNext(kvstoreIterator *kvs_it); /* Rehashing */ void kvstoreTryResizeDicts(kvstore *kvs, int limit); uint64_t kvstoreIncrementallyRehash(kvstore *kvs, uint64_t threshold_us); +size_t kvstoreOverheadHashtableLut(kvstore *kvs); +size_t kvstoreOverheadHashtableRehashing(kvstore *kvs); +unsigned long kvstoreDictRehashingCount(kvstore *kvs); /* Specific dict access by dict-index */ unsigned long kvstoreDictSize(kvstore *kvs, int didx); diff --git a/src/object.c b/src/object.c index d5bb74f8f..ee3564657 100644 --- a/src/object.c +++ b/src/object.c @@ -1246,8 +1246,9 @@ struct redisMemOverhead *getMemoryOverheadData(void) { for (j = 0; j < server.dbnum; j++) { redisDb *db = server.db+j; + if (!kvstoreNumAllocatedDicts(db->keys)) continue; + unsigned long long keyscount = kvstoreSize(db->keys); - if (keyscount == 0) continue; mh->total_keys += keyscount; mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1)); @@ -1263,6 +1264,13 @@ struct redisMemOverhead *getMemoryOverheadData(void) { mem_total+=mem; mh->num_dbs++; + + mh->overhead_db_hashtable_lut += kvstoreOverheadHashtableLut(db->keys); + mh->overhead_db_hashtable_lut += kvstoreOverheadHashtableLut(db->expires); + mh->overhead_db_hashtable_rehashing += kvstoreOverheadHashtableRehashing(db->keys); + mh->overhead_db_hashtable_rehashing += kvstoreOverheadHashtableRehashing(db->expires); + mh->db_dict_rehashing_count += kvstoreDictRehashingCount(db->keys); + mh->db_dict_rehashing_count += kvstoreDictRehashingCount(db->expires); } mh->overhead_total = mem_total; @@ -1556,7 +1564,7 @@ NULL } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) { struct redisMemOverhead *mh = getMemoryOverheadData(); - addReplyMapLen(c,28+mh->num_dbs); + addReplyMapLen(c,31+mh->num_dbs); addReplyBulkCString(c,"peak.allocated"); addReplyLongLong(c,mh->peak_allocated); @@ -1601,9 +1609,18 @@ NULL addReplyLongLong(c,mh->db[j].overhead_ht_expires); } + addReplyBulkCString(c,"overhead.db.hashtable.lut"); + addReplyLongLong(c, mh->overhead_db_hashtable_lut); + + addReplyBulkCString(c,"overhead.db.hashtable.rehashing"); + addReplyLongLong(c, mh->overhead_db_hashtable_rehashing); + addReplyBulkCString(c,"overhead.total"); addReplyLongLong(c,mh->overhead_total); + addReplyBulkCString(c,"db.dict.rehashing.count"); + addReplyLongLong(c, mh->db_dict_rehashing_count); + addReplyBulkCString(c,"keys.count"); addReplyLongLong(c,mh->total_keys); diff --git a/src/server.c b/src/server.c index adcc9641c..d1093feba 100644 --- a/src/server.c +++ b/src/server.c @@ -5685,6 +5685,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) { "mem_cluster_links:%zu\r\n", mh->cluster_links, "mem_aof_buffer:%zu\r\n", mh->aof_buffer, "mem_allocator:%s\r\n", ZMALLOC_LIB, + "mem_overhead_db_hashtable_rehashing:%zu\r\n", mh->overhead_db_hashtable_rehashing, "active_defrag_running:%d\r\n", server.active_defrag_running, "lazyfree_pending_objects:%zu\r\n", lazyfreeGetPendingObjectsCount(), "lazyfreed_objects:%zu\r\n", lazyfreeGetFreedObjectsCount())); diff --git a/src/server.h b/src/server.h index 09ab74075..de490f702 100644 --- a/src/server.h +++ b/src/server.h @@ -1422,6 +1422,9 @@ struct redisMemOverhead { float rss_extra; size_t rss_extra_bytes; size_t num_dbs; + size_t overhead_db_hashtable_lut; + size_t overhead_db_hashtable_rehashing; + unsigned long db_dict_rehashing_count; struct { size_t dbid; size_t overhead_ht_main; diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index cb4397cc8..fdd736ee2 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -465,3 +465,39 @@ start_server {tags {"info" "external:skip"}} { } } } + +start_server {tags {"info" "external:skip"}} { + test {memory: database and pubsub overhead and rehashing dict count} { + r flushall + set info_mem [r info memory] + set mem_stats [r memory stats] + assert_equal [getInfoProperty $info_mem mem_overhead_db_hashtable_rehashing] {0} + assert_equal [dict get $mem_stats overhead.db.hashtable.lut] {0} + assert_equal [dict get $mem_stats overhead.db.hashtable.rehashing] {0} + assert_equal [dict get $mem_stats db.dict.rehashing.count] {0} + # Initial dict expand is not rehashing + r set a b + set info_mem [r info memory] + set mem_stats [r memory stats] + assert_equal [getInfoProperty $info_mem mem_overhead_db_hashtable_rehashing] {0} + assert_range [dict get $mem_stats overhead.db.hashtable.lut] 1 64 + assert_equal [dict get $mem_stats overhead.db.hashtable.rehashing] {0} + assert_equal [dict get $mem_stats db.dict.rehashing.count] {0} + # set 4 more keys to trigger rehashing + # get the info within a transaction to make sure the rehashing is not completed + r multi + r set b c + r set c d + r set d e + r set e f + r info memory + r memory stats + set res [r exec] + set info_mem [lindex $res 4] + set mem_stats [lindex $res 5] + assert_range [getInfoProperty $info_mem mem_overhead_db_hashtable_rehashing] 1 64 + assert_range [dict get $mem_stats overhead.db.hashtable.lut] 1 192 + assert_range [dict get $mem_stats overhead.db.hashtable.rehashing] 1 64 + assert_equal [dict get $mem_stats db.dict.rehashing.count] {1} + } +} diff --git a/tests/unit/other.tcl b/tests/unit/other.tcl index b42abaafe..1ba0e62fa 100644 --- a/tests/unit/other.tcl +++ b/tests/unit/other.tcl @@ -489,19 +489,6 @@ start_cluster 1 0 {tags {"other external:skip cluster slow"}} { } {} {needs:debug} } -proc get_overhead_hashtable_main {} { - set main 0 - set stats [r memory stats] - set list_stats [split $stats " "] - for {set j 0} {$j < [llength $list_stats]} {incr j} { - if {[string equal -nocase "\{overhead.hashtable.main" [lindex $list_stats $j]]} { - set main [lindex $list_stats [expr $j+1]] - break - } - } - return $main -} - start_server {tags {"other external:skip"}} { test "Redis can resize empty dict" { # Write and then delete 128 keys, creating an empty dict @@ -512,12 +499,10 @@ start_server {tags {"other external:skip"}} { for {set j 1} {$j <= 128} {incr j} { r del $j{b} } - # Set a key to enable overhead display of db 0 - r set a b # The dict containing 128 keys must have expanded, # its hash table itself takes a lot more than 400 bytes wait_for_condition 100 50 { - [get_overhead_hashtable_main] < 400 + [dict get [r memory stats] db.9 overhead.hashtable.main] < 400 } else { fail "dict did not resize in time" }