diff --git a/keydb.conf b/keydb.conf index 59ddf5abb..02e97d4be 100644 --- a/keydb.conf +++ b/keydb.conf @@ -1145,6 +1145,11 @@ acllog-max-len 128 # # active-expire-effort 1 +# Force evictions when used system memory reaches X% of total system memory. +# This is useful as a safeguard to prevent OOM kills (0 to disable). +# +# force-eviction-percent 0 + ############################# LAZY FREEING #################################### # KeyDB has two primitives to delete keys. One is called DEL and is a blocking diff --git a/src/Makefile b/src/Makefile index 01c24b0df..a3cd741f4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -384,7 +384,7 @@ endif REDIS_SERVER_NAME=keydb-server$(PROG_SUFFIX) REDIS_SENTINEL_NAME=keydb-sentinel$(PROG_SUFFIX) -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o t_nhash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o acl.o storage.o rdb-s3.o fastlock.o new.o tracking.o cron.o connection.o tls.o sha256.o motd_server.o timeout.o setcpuaffinity.o AsyncWorkQueue.o snapshot.o storage/teststorageprovider.o keydbutils.o StorageCache.o monotonic.o cli_common.o mt19937-64.o $(ASM_OBJ) $(STORAGE_OBJ) +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o t_nhash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o acl.o storage.o rdb-s3.o fastlock.o new.o tracking.o cron.o connection.o tls.o sha256.o motd_server.o timeout.o setcpuaffinity.o AsyncWorkQueue.o snapshot.o storage/teststorageprovider.o keydbutils.o StorageCache.o monotonic.o cli_common.o mt19937-64.o meminfo.o $(ASM_OBJ) $(STORAGE_OBJ) KEYDB_SERVER_OBJ=SnapshotPayloadParseState.o REDIS_CLI_NAME=keydb-cli$(PROG_SUFFIX) REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o redis-cli-cpphelper.o zmalloc.o release.o anet.o ae.o crcspeed.o crc64.o siphash.o crc16.o storage-lite.o fastlock.o motd_client.o monotonic.o cli_common.o mt19937-64.o $(ASM_OBJ) diff --git a/src/config.cpp b/src/config.cpp index 5d1827c5c..dbe7377bc 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -2968,6 +2968,7 @@ standardConfig configs[] = { createSizeTConfig("semi-ordered-set-bucket-size", NULL, MODIFIABLE_CONFIG, 0, 1024, g_semiOrderedSetTargetBucketSize, 0, INTEGER_CONFIG, NULL, NULL), createSDSConfig("availability-zone", NULL, MODIFIABLE_CONFIG, 0, g_pserver->sdsAvailabilityZone, "", NULL, NULL), createIntConfig("overload-protect-percent", NULL, MODIFIABLE_CONFIG, 0, 200, g_pserver->overload_protect_threshold, 0, INTEGER_CONFIG, NULL, NULL), + createIntConfig("force-eviction-percent", NULL, MODIFIABLE_CONFIG, 0, 100, g_pserver->force_eviction_percent, 0, INTEGER_CONFIG, NULL, NULL), #ifdef USE_OPENSSL createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->tls_port, 0, INTEGER_CONFIG, NULL, updateTLSPort), /* TCP port. */ diff --git a/src/evict.cpp b/src/evict.cpp index 94bc132c1..740a6d2b2 100644 --- a/src/evict.cpp +++ b/src/evict.cpp @@ -411,8 +411,13 @@ size_t freeMemoryGetNotCountedMemory(void) { * memory currently used. May be > 1 if we are over the memory * limit. * (Populated both for C_ERR and C_OK) + * + * 'reason' the reason why the memory limit was exceeded + * EVICT_REASON_USER: reported user memory exceeded maxmemory + * EVICT_REASON_SYS: available system memory under configurable threshold + * (Populated when C_ERR is returned) */ -int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level, bool fQuickCycle, bool fPreSnapshot) { +int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level, EvictReason *reason, bool fQuickCycle, bool fPreSnapshot) { size_t mem_reported, mem_used, mem_tofree; /* Check if we are over the memory usage limit. If we are not, no need @@ -421,10 +426,22 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev if (total) *total = mem_reported; size_t maxmemory = g_pserver->maxmemory; if (fPreSnapshot) - maxmemory = static_cast(maxmemory * 0.9); // derate memory by 10% since we won't be able to free during snapshot + maxmemory = static_cast(maxmemory*0.9); // derate memory by 10% since we won't be able to free during snapshot if (g_pserver->FRdbSaveInProgress()) maxmemory = static_cast(maxmemory*1.2); + /* If available system memory is below a certain threshold, force eviction */ + long long sys_available_mem_buffer = 0; + if (g_pserver->force_eviction_percent && g_pserver->cron_malloc_stats.sys_total) { + float available_mem_ratio = (float)(100 - g_pserver->force_eviction_percent)/100; + size_t min_available_mem = static_cast(g_pserver->cron_malloc_stats.sys_total * available_mem_ratio); + sys_available_mem_buffer = static_cast(g_pserver->cron_malloc_stats.sys_available - min_available_mem); + if (sys_available_mem_buffer < 0) { + long long mem_threshold = mem_reported + sys_available_mem_buffer; + maxmemory = ((long long)maxmemory < mem_threshold) ? maxmemory : static_cast(mem_threshold); + } + } + /* We may return ASAP if there is no need to compute the level. */ int return_ok_asap = !maxmemory || mem_reported <= maxmemory; if (return_ok_asap && !level) return C_OK; @@ -435,6 +452,12 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev size_t overhead = freeMemoryGetNotCountedMemory(); mem_used = (mem_used > overhead) ? mem_used-overhead : 0; + /* If system available memory is too low, we want to force evictions no matter + * what so we also offset the overhead from maxmemory. */ + if (sys_available_mem_buffer < 0) { + maxmemory = (maxmemory > overhead) ? maxmemory-overhead : 0; + } + /* Compute the ratio of memory usage. */ if (level) { if (!maxmemory) { @@ -459,6 +482,8 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev if (logical) *logical = mem_used; if (tofree) *tofree = mem_tofree; + if (reason) *reason = sys_available_mem_buffer < 0 ? EvictReason::System : EvictReason::User; + return C_ERR; } @@ -604,6 +629,12 @@ static unsigned long evictionTimeLimitUs() { return ULONG_MAX; /* No limit to eviction time */ } +static void updateSysAvailableMemory() { + if (g_pserver->force_eviction_percent) { + g_pserver->cron_malloc_stats.sys_available = getMemAvailable(); + } +} + /* Check that memory usage is within the current "maxmemory" limit. If over * "maxmemory", attempt to free memory by evicting data (if it's safe to do so). * @@ -641,10 +672,11 @@ int performEvictions(bool fPreSnapshot) { const bool fEvictToStorage = !cserver.delete_on_evict && g_pserver->db[0]->FStorageProvider(); int result = EVICT_FAIL; int ckeysFailed = 0; + EvictReason evictReason; std::unique_ptr splazy = std::make_unique(); - if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL,false,fPreSnapshot) == C_OK) + if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL,&evictReason,false,fPreSnapshot) == C_OK) return EVICT_OK; if (g_pserver->maxmemory_policy == MAXMEMORY_NO_EVICTION) @@ -827,6 +859,9 @@ int performEvictions(bool fPreSnapshot) { * across the dbAsyncDelete() call, while the thread can * release the memory all the time. */ if (g_pserver->lazyfree_lazy_eviction) { + if (evictReason == EvictReason::System) { + updateSysAvailableMemory(); + } if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) { break; } @@ -854,9 +889,13 @@ int performEvictions(bool fPreSnapshot) { if (splazy != nullptr && splazy->memory_queued() > 0 && !serverTL->gcEpoch.isReset()) { g_pserver->garbageCollector.enqueue(serverTL->gcEpoch, std::move(splazy)); - } + } cant_free: + if (mem_freed > 0 && evictReason == EvictReason::System) { + updateSysAvailableMemory(); + } + if (g_pserver->m_pstorageFactory) { if (mem_reported < g_pserver->maxmemory*1.2) { diff --git a/src/meminfo.cpp b/src/meminfo.cpp new file mode 100644 index 000000000..5808c05b0 --- /dev/null +++ b/src/meminfo.cpp @@ -0,0 +1,31 @@ +#include +#include + +static size_t getMemKey(std::string key) { +# ifdef __linux__ + std::string token; + std::ifstream f("/proc/meminfo"); + while (f >> token) { + if (token == key) { + size_t mem_val; + if (f >> mem_val) { + return mem_val * 1024; // values are in kB + } else { + return 0; + } + f.ignore(std::numeric_limits::max(), '\n'); + } + } + return 0; +# else + return 0; +# endif +} + +size_t getMemAvailable() { + return getMemKey("MemAvailable:"); +} + +size_t getMemTotal() { + return getMemKey("MemTotal:"); +} \ No newline at end of file diff --git a/src/server.cpp b/src/server.cpp index c2a988ae4..8ab67ad40 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -70,6 +70,7 @@ #ifdef __linux__ #include #include +#include #endif int g_fTestMode = false; @@ -2312,6 +2313,10 @@ void cronUpdateMemoryStats() { g_pserver->cron_malloc_stats.allocator_active = g_pserver->cron_malloc_stats.allocator_resident; if (!g_pserver->cron_malloc_stats.allocator_allocated) g_pserver->cron_malloc_stats.allocator_allocated = g_pserver->cron_malloc_stats.zmalloc_used; + + if (g_pserver->force_eviction_percent) { + g_pserver->cron_malloc_stats.sys_available = getMemAvailable(); + } } } @@ -4031,6 +4036,8 @@ void initServer(void) { g_pserver->cron_malloc_stats.allocator_allocated = 0; g_pserver->cron_malloc_stats.allocator_active = 0; g_pserver->cron_malloc_stats.allocator_resident = 0; + g_pserver->cron_malloc_stats.sys_available = 0; + g_pserver->cron_malloc_stats.sys_total = g_pserver->force_eviction_percent ? getMemTotal() : 0; g_pserver->lastbgsave_status = C_OK; g_pserver->aof_last_write_status = C_OK; g_pserver->aof_last_write_errno = 0; @@ -4038,6 +4045,7 @@ void initServer(void) { g_pserver->mvcc_tstamp = 0; + /* Create the timer callback, this is our way to process many background * operations incrementally, like clients timeout, eviction of unaccessed * expired keys and so forth. */ @@ -5729,6 +5737,7 @@ sds genRedisInfoString(const char *section) { const char *evict_policy = evictPolicyToString(); long long memory_lua = g_pserver->lua ? (long long)lua_gc(g_pserver->lua,LUA_GCCOUNT,0)*1024 : 0; struct redisMemOverhead *mh = getMemoryOverheadData(); + char available_system_mem[64] = "unavailable"; /* Peak memory is updated from time to time by serverCron() so it * may happen that the instantaneous value is slightly bigger than @@ -5737,6 +5746,10 @@ sds genRedisInfoString(const char *section) { if (zmalloc_used > g_pserver->stat_peak_memory) g_pserver->stat_peak_memory = zmalloc_used; + if (g_pserver->cron_malloc_stats.sys_available) { + snprintf(available_system_mem, 64, "%lu", g_pserver->cron_malloc_stats.sys_available); + } + bytesToHuman(hmem,zmalloc_used,sizeof(hmem)); bytesToHuman(peak_hmem,g_pserver->stat_peak_memory,sizeof(peak_hmem)); bytesToHuman(total_system_hmem,total_system_mem,sizeof(total_system_hmem)); @@ -5789,7 +5802,8 @@ sds genRedisInfoString(const char *section) { "active_defrag_running:%d\r\n" "lazyfree_pending_objects:%zu\r\n" "lazyfreed_objects:%zu\r\n" - "storage_provider:%s\r\n", + "storage_provider:%s\r\n" + "available_system_memory:%s\r\n", zmalloc_used, hmem, g_pserver->cron_malloc_stats.process_rss, @@ -5834,7 +5848,8 @@ sds genRedisInfoString(const char *section) { g_pserver->active_defrag_running, lazyfreeGetPendingObjectsCount(), lazyfreeGetFreedObjectsCount(), - g_pserver->m_pstorageFactory ? g_pserver->m_pstorageFactory->name() : "none" + g_pserver->m_pstorageFactory ? g_pserver->m_pstorageFactory->name() : "none", + available_system_mem ); freeMemoryOverheadData(mh); } diff --git a/src/server.h b/src/server.h index 422cb9495..9ad1aab8d 100644 --- a/src/server.h +++ b/src/server.h @@ -2014,6 +2014,8 @@ struct malloc_stats { size_t allocator_allocated; size_t allocator_active; size_t allocator_resident; + size_t sys_total; + size_t sys_available; }; typedef struct socketFds { @@ -2576,6 +2578,7 @@ struct redisServer { int maxmemory_policy; /* Policy for key eviction */ int maxmemory_samples; /* Precision of random sampling */ int maxmemory_eviction_tenacity;/* Aggressiveness of eviction processing */ + int force_eviction_percent; /* Force eviction when this percent of system memory is remaining */ int lfu_log_factor; /* LFU logarithmic counter factor. */ int lfu_decay_time; /* LFU counter decay factor. */ long long proto_max_bulk_len; /* Protocol bulk length maximum size. */ @@ -2871,6 +2874,12 @@ typedef struct { #define OBJ_HASH_KEY 1 #define OBJ_HASH_VALUE 2 +/* Used in evict.cpp */ +enum class EvictReason { + User, /* User memory exceeded limit */ + System /* System memory exceeded limit */ +}; + /*----------------------------------------------------------------------------- * Extern declarations *----------------------------------------------------------------------------*/ @@ -3375,7 +3384,7 @@ int zslLexValueGteMin(sds value, zlexrangespec *spec); int zslLexValueLteMax(sds value, zlexrangespec *spec); /* Core functions */ -int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level, bool fQuickCycle = false, bool fPreSnapshot=false); +int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level, EvictReason *reason=nullptr, bool fQuickCycle=false, bool fPreSnapshot=false); size_t freeMemoryGetNotCountedMemory(); int overMaxmemoryAfterAlloc(size_t moremem); int processCommand(client *c, int callFlags); @@ -3660,6 +3669,9 @@ unsigned long LFUDecrAndReturn(robj_roptr o); #define EVICT_FAIL 2 int performEvictions(bool fPreSnapshot); +/* meminfo.cpp -- get memory info from /proc/memoryinfo for linux distros */ +size_t getMemAvailable(); +size_t getMemTotal(); /* Keys hashing / comparison functions for dict.c hash tables. */ uint64_t dictSdsHash(const void *key);