Merge pull request #204 from Snapchat/acope-evict-main

Force evictions when system available memory is low
This commit is contained in:
Alex Cope 2023-07-10 06:38:23 -07:00 committed by GitHub Enterprise
commit 72b47eddc8
7 changed files with 111 additions and 8 deletions

View File

@ -1145,6 +1145,11 @@ acllog-max-len 128
#
# active-expire-effort 1
# Force evictions when used system memory reaches X% of total system memory.
# This is useful as a safeguard to prevent OOM kills (0 to disable).
#
# force-eviction-percent 0
############################# LAZY FREEING ####################################
# KeyDB has two primitives to delete keys. One is called DEL and is a blocking

View File

@ -384,7 +384,7 @@ endif
REDIS_SERVER_NAME=keydb-server$(PROG_SUFFIX)
REDIS_SENTINEL_NAME=keydb-sentinel$(PROG_SUFFIX)
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o t_nhash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o acl.o storage.o rdb-s3.o fastlock.o new.o tracking.o cron.o connection.o tls.o sha256.o motd_server.o timeout.o setcpuaffinity.o AsyncWorkQueue.o snapshot.o storage/teststorageprovider.o keydbutils.o StorageCache.o monotonic.o cli_common.o mt19937-64.o $(ASM_OBJ) $(STORAGE_OBJ)
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o t_nhash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o acl.o storage.o rdb-s3.o fastlock.o new.o tracking.o cron.o connection.o tls.o sha256.o motd_server.o timeout.o setcpuaffinity.o AsyncWorkQueue.o snapshot.o storage/teststorageprovider.o keydbutils.o StorageCache.o monotonic.o cli_common.o mt19937-64.o meminfo.o $(ASM_OBJ) $(STORAGE_OBJ)
KEYDB_SERVER_OBJ=SnapshotPayloadParseState.o
REDIS_CLI_NAME=keydb-cli$(PROG_SUFFIX)
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o redis-cli-cpphelper.o zmalloc.o release.o anet.o ae.o crcspeed.o crc64.o siphash.o crc16.o storage-lite.o fastlock.o motd_client.o monotonic.o cli_common.o mt19937-64.o $(ASM_OBJ)

View File

@ -2968,6 +2968,7 @@ standardConfig configs[] = {
createSizeTConfig("semi-ordered-set-bucket-size", NULL, MODIFIABLE_CONFIG, 0, 1024, g_semiOrderedSetTargetBucketSize, 0, INTEGER_CONFIG, NULL, NULL),
createSDSConfig("availability-zone", NULL, MODIFIABLE_CONFIG, 0, g_pserver->sdsAvailabilityZone, "", NULL, NULL),
createIntConfig("overload-protect-percent", NULL, MODIFIABLE_CONFIG, 0, 200, g_pserver->overload_protect_threshold, 0, INTEGER_CONFIG, NULL, NULL),
createIntConfig("force-eviction-percent", NULL, MODIFIABLE_CONFIG, 0, 100, g_pserver->force_eviction_percent, 0, INTEGER_CONFIG, NULL, NULL),
#ifdef USE_OPENSSL
createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->tls_port, 0, INTEGER_CONFIG, NULL, updateTLSPort), /* TCP port. */

View File

@ -411,8 +411,13 @@ size_t freeMemoryGetNotCountedMemory(void) {
* memory currently used. May be > 1 if we are over the memory
* limit.
* (Populated both for C_ERR and C_OK)
*
* 'reason' the reason why the memory limit was exceeded
* EVICT_REASON_USER: reported user memory exceeded maxmemory
* EVICT_REASON_SYS: available system memory under configurable threshold
* (Populated when C_ERR is returned)
*/
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level, bool fQuickCycle, bool fPreSnapshot) {
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level, EvictReason *reason, bool fQuickCycle, bool fPreSnapshot) {
size_t mem_reported, mem_used, mem_tofree;
/* Check if we are over the memory usage limit. If we are not, no need
@ -421,10 +426,22 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev
if (total) *total = mem_reported;
size_t maxmemory = g_pserver->maxmemory;
if (fPreSnapshot)
maxmemory = static_cast<size_t>(maxmemory * 0.9); // derate memory by 10% since we won't be able to free during snapshot
maxmemory = static_cast<size_t>(maxmemory*0.9); // derate memory by 10% since we won't be able to free during snapshot
if (g_pserver->FRdbSaveInProgress())
maxmemory = static_cast<size_t>(maxmemory*1.2);
/* If available system memory is below a certain threshold, force eviction */
long long sys_available_mem_buffer = 0;
if (g_pserver->force_eviction_percent && g_pserver->cron_malloc_stats.sys_total) {
float available_mem_ratio = (float)(100 - g_pserver->force_eviction_percent)/100;
size_t min_available_mem = static_cast<size_t>(g_pserver->cron_malloc_stats.sys_total * available_mem_ratio);
sys_available_mem_buffer = static_cast<long>(g_pserver->cron_malloc_stats.sys_available - min_available_mem);
if (sys_available_mem_buffer < 0) {
long long mem_threshold = mem_reported + sys_available_mem_buffer;
maxmemory = ((long long)maxmemory < mem_threshold) ? maxmemory : static_cast<size_t>(mem_threshold);
}
}
/* We may return ASAP if there is no need to compute the level. */
int return_ok_asap = !maxmemory || mem_reported <= maxmemory;
if (return_ok_asap && !level) return C_OK;
@ -435,6 +452,12 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev
size_t overhead = freeMemoryGetNotCountedMemory();
mem_used = (mem_used > overhead) ? mem_used-overhead : 0;
/* If system available memory is too low, we want to force evictions no matter
* what so we also offset the overhead from maxmemory. */
if (sys_available_mem_buffer < 0) {
maxmemory = (maxmemory > overhead) ? maxmemory-overhead : 0;
}
/* Compute the ratio of memory usage. */
if (level) {
if (!maxmemory) {
@ -459,6 +482,8 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev
if (logical) *logical = mem_used;
if (tofree) *tofree = mem_tofree;
if (reason) *reason = sys_available_mem_buffer < 0 ? EvictReason::System : EvictReason::User;
return C_ERR;
}
@ -604,6 +629,12 @@ static unsigned long evictionTimeLimitUs() {
return ULONG_MAX; /* No limit to eviction time */
}
static void updateSysAvailableMemory() {
if (g_pserver->force_eviction_percent) {
g_pserver->cron_malloc_stats.sys_available = getMemAvailable();
}
}
/* Check that memory usage is within the current "maxmemory" limit. If over
* "maxmemory", attempt to free memory by evicting data (if it's safe to do so).
*
@ -641,10 +672,11 @@ int performEvictions(bool fPreSnapshot) {
const bool fEvictToStorage = !cserver.delete_on_evict && g_pserver->db[0]->FStorageProvider();
int result = EVICT_FAIL;
int ckeysFailed = 0;
EvictReason evictReason;
std::unique_ptr<FreeMemoryLazyFree> splazy = std::make_unique<FreeMemoryLazyFree>();
if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL,false,fPreSnapshot) == C_OK)
if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL,&evictReason,false,fPreSnapshot) == C_OK)
return EVICT_OK;
if (g_pserver->maxmemory_policy == MAXMEMORY_NO_EVICTION)
@ -827,6 +859,9 @@ int performEvictions(bool fPreSnapshot) {
* across the dbAsyncDelete() call, while the thread can
* release the memory all the time. */
if (g_pserver->lazyfree_lazy_eviction) {
if (evictReason == EvictReason::System) {
updateSysAvailableMemory();
}
if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
break;
}
@ -854,9 +889,13 @@ int performEvictions(bool fPreSnapshot) {
if (splazy != nullptr && splazy->memory_queued() > 0 && !serverTL->gcEpoch.isReset()) {
g_pserver->garbageCollector.enqueue(serverTL->gcEpoch, std::move(splazy));
}
}
cant_free:
if (mem_freed > 0 && evictReason == EvictReason::System) {
updateSysAvailableMemory();
}
if (g_pserver->m_pstorageFactory)
{
if (mem_reported < g_pserver->maxmemory*1.2) {

31
src/meminfo.cpp Normal file
View File

@ -0,0 +1,31 @@
#include <string>
#include <fstream>
static size_t getMemKey(std::string key) {
# ifdef __linux__
std::string token;
std::ifstream f("/proc/meminfo");
while (f >> token) {
if (token == key) {
size_t mem_val;
if (f >> mem_val) {
return mem_val * 1024; // values are in kB
} else {
return 0;
}
f.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
}
}
return 0;
# else
return 0;
# endif
}
size_t getMemAvailable() {
return getMemKey("MemAvailable:");
}
size_t getMemTotal() {
return getMemKey("MemTotal:");
}

View File

@ -70,6 +70,7 @@
#ifdef __linux__
#include <sys/prctl.h>
#include <sys/mman.h>
#include <sys/sysinfo.h>
#endif
int g_fTestMode = false;
@ -2312,6 +2313,10 @@ void cronUpdateMemoryStats() {
g_pserver->cron_malloc_stats.allocator_active = g_pserver->cron_malloc_stats.allocator_resident;
if (!g_pserver->cron_malloc_stats.allocator_allocated)
g_pserver->cron_malloc_stats.allocator_allocated = g_pserver->cron_malloc_stats.zmalloc_used;
if (g_pserver->force_eviction_percent) {
g_pserver->cron_malloc_stats.sys_available = getMemAvailable();
}
}
}
@ -4031,6 +4036,8 @@ void initServer(void) {
g_pserver->cron_malloc_stats.allocator_allocated = 0;
g_pserver->cron_malloc_stats.allocator_active = 0;
g_pserver->cron_malloc_stats.allocator_resident = 0;
g_pserver->cron_malloc_stats.sys_available = 0;
g_pserver->cron_malloc_stats.sys_total = g_pserver->force_eviction_percent ? getMemTotal() : 0;
g_pserver->lastbgsave_status = C_OK;
g_pserver->aof_last_write_status = C_OK;
g_pserver->aof_last_write_errno = 0;
@ -4038,6 +4045,7 @@ void initServer(void) {
g_pserver->mvcc_tstamp = 0;
/* Create the timer callback, this is our way to process many background
* operations incrementally, like clients timeout, eviction of unaccessed
* expired keys and so forth. */
@ -5729,6 +5737,7 @@ sds genRedisInfoString(const char *section) {
const char *evict_policy = evictPolicyToString();
long long memory_lua = g_pserver->lua ? (long long)lua_gc(g_pserver->lua,LUA_GCCOUNT,0)*1024 : 0;
struct redisMemOverhead *mh = getMemoryOverheadData();
char available_system_mem[64] = "unavailable";
/* Peak memory is updated from time to time by serverCron() so it
* may happen that the instantaneous value is slightly bigger than
@ -5737,6 +5746,10 @@ sds genRedisInfoString(const char *section) {
if (zmalloc_used > g_pserver->stat_peak_memory)
g_pserver->stat_peak_memory = zmalloc_used;
if (g_pserver->cron_malloc_stats.sys_available) {
snprintf(available_system_mem, 64, "%lu", g_pserver->cron_malloc_stats.sys_available);
}
bytesToHuman(hmem,zmalloc_used,sizeof(hmem));
bytesToHuman(peak_hmem,g_pserver->stat_peak_memory,sizeof(peak_hmem));
bytesToHuman(total_system_hmem,total_system_mem,sizeof(total_system_hmem));
@ -5789,7 +5802,8 @@ sds genRedisInfoString(const char *section) {
"active_defrag_running:%d\r\n"
"lazyfree_pending_objects:%zu\r\n"
"lazyfreed_objects:%zu\r\n"
"storage_provider:%s\r\n",
"storage_provider:%s\r\n"
"available_system_memory:%s\r\n",
zmalloc_used,
hmem,
g_pserver->cron_malloc_stats.process_rss,
@ -5834,7 +5848,8 @@ sds genRedisInfoString(const char *section) {
g_pserver->active_defrag_running,
lazyfreeGetPendingObjectsCount(),
lazyfreeGetFreedObjectsCount(),
g_pserver->m_pstorageFactory ? g_pserver->m_pstorageFactory->name() : "none"
g_pserver->m_pstorageFactory ? g_pserver->m_pstorageFactory->name() : "none",
available_system_mem
);
freeMemoryOverheadData(mh);
}

View File

@ -2014,6 +2014,8 @@ struct malloc_stats {
size_t allocator_allocated;
size_t allocator_active;
size_t allocator_resident;
size_t sys_total;
size_t sys_available;
};
typedef struct socketFds {
@ -2576,6 +2578,7 @@ struct redisServer {
int maxmemory_policy; /* Policy for key eviction */
int maxmemory_samples; /* Precision of random sampling */
int maxmemory_eviction_tenacity;/* Aggressiveness of eviction processing */
int force_eviction_percent; /* Force eviction when this percent of system memory is remaining */
int lfu_log_factor; /* LFU logarithmic counter factor. */
int lfu_decay_time; /* LFU counter decay factor. */
long long proto_max_bulk_len; /* Protocol bulk length maximum size. */
@ -2871,6 +2874,12 @@ typedef struct {
#define OBJ_HASH_KEY 1
#define OBJ_HASH_VALUE 2
/* Used in evict.cpp */
enum class EvictReason {
User, /* User memory exceeded limit */
System /* System memory exceeded limit */
};
/*-----------------------------------------------------------------------------
* Extern declarations
*----------------------------------------------------------------------------*/
@ -3375,7 +3384,7 @@ int zslLexValueGteMin(sds value, zlexrangespec *spec);
int zslLexValueLteMax(sds value, zlexrangespec *spec);
/* Core functions */
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level, bool fQuickCycle = false, bool fPreSnapshot=false);
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level, EvictReason *reason=nullptr, bool fQuickCycle=false, bool fPreSnapshot=false);
size_t freeMemoryGetNotCountedMemory();
int overMaxmemoryAfterAlloc(size_t moremem);
int processCommand(client *c, int callFlags);
@ -3660,6 +3669,9 @@ unsigned long LFUDecrAndReturn(robj_roptr o);
#define EVICT_FAIL 2
int performEvictions(bool fPreSnapshot);
/* meminfo.cpp -- get memory info from /proc/memoryinfo for linux distros */
size_t getMemAvailable();
size_t getMemTotal();
/* Keys hashing / comparison functions for dict.c hash tables. */
uint64_t dictSdsHash(const void *key);