Incremental eviction processing (#7653)

Rather than blindly evicting until maxmemory limit is achieved, this
update adds a time limit to eviction.  While over the maxmemory limit,
eviction will process before each command AND as a timeProc when no
commands are running.

This will reduce the latency impact on many cases, especially pathological
cases like massive used memory increase during dict rehashing.

There is a risk that some other edge cases (like massive pipelined use
of MGET) could cause Redis memory usage to keep growing despite the
eviction attempts, so a new maxmemory-eviction-tenacity config is
introduced to let users mitigate that.
This commit is contained in:
Jim Brunner 2020-09-15 23:16:01 -07:00 committed by GitHub
parent 6ff741b5cf
commit 810e28a397
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 163 additions and 97 deletions

View File

@ -354,7 +354,7 @@ Inside server.c you can find code that handles other vital things of the Redis s
* `call()` is used in order to call a given command in the context of a given client. * `call()` is used in order to call a given command in the context of a given client.
* `activeExpireCycle()` handles eviction of keys with a time to live set via the `EXPIRE` command. * `activeExpireCycle()` handles eviction of keys with a time to live set via the `EXPIRE` command.
* `freeMemoryIfNeeded()` is called when a new write command should be performed but Redis is out of memory according to the `maxmemory` directive. * `performEvictions()` is called when a new write command should be performed but Redis is out of memory according to the `maxmemory` directive.
* The global variable `redisCommandTable` defines all the Redis commands, specifying the name of the command, the function implementing the command, the number of arguments required, and other properties of each command. * The global variable `redisCommandTable` defines all the Redis commands, specifying the name of the command, the function implementing the command, the number of arguments required, and other properties of each command.
networking.c networking.c

View File

@ -912,6 +912,14 @@ acllog-max-len 128
# #
# maxmemory-samples 5 # maxmemory-samples 5
# Eviction processing is designed to function well with the default setting.
# If there is an unusually large amount of write traffic, this value may need to
# be increased. Decreasing this value may reduce latency at the risk of
# eviction processing effectiveness
# 0 = minimum latency, 10 = default, 100 = process without regard to latency
#
# maxmemory-eviction-tenacity 10
# Starting from Redis 5, by default a replica will ignore its maxmemory setting # Starting from Redis 5, by default a replica will ignore its maxmemory setting
# (unless it is promoted to master after a failover or manually). It means # (unless it is promoted to master after a failover or manually). It means
# that the eviction of keys will be just handled by the master, sending the # that the eviction of keys will be just handled by the master, sending the

View File

@ -2139,7 +2139,7 @@ static int updateMaxmemory(long long val, long long prev, char **err) {
if ((unsigned long long)val < used) { if ((unsigned long long)val < used) {
serverLog(LL_WARNING,"WARNING: the new maxmemory value set via CONFIG SET (%llu) is smaller than the current memory usage (%zu). This will result in key eviction and/or the inability to accept new write commands depending on the maxmemory-policy.", server.maxmemory, used); serverLog(LL_WARNING,"WARNING: the new maxmemory value set via CONFIG SET (%llu) is smaller than the current memory usage (%zu). This will result in key eviction and/or the inability to accept new write commands depending on the maxmemory-policy.", server.maxmemory, used);
} }
freeMemoryIfNeededAndSafe(); performEvictions();
} }
return 1; return 1;
} }
@ -2327,6 +2327,7 @@ standardConfig configs[] = {
createIntConfig("replica-priority", "slave-priority", MODIFIABLE_CONFIG, 0, INT_MAX, server.slave_priority, 100, INTEGER_CONFIG, NULL, NULL), createIntConfig("replica-priority", "slave-priority", MODIFIABLE_CONFIG, 0, INT_MAX, server.slave_priority, 100, INTEGER_CONFIG, NULL, NULL),
createIntConfig("repl-diskless-sync-delay", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_diskless_sync_delay, 5, INTEGER_CONFIG, NULL, NULL), createIntConfig("repl-diskless-sync-delay", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_diskless_sync_delay, 5, INTEGER_CONFIG, NULL, NULL),
createIntConfig("maxmemory-samples", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, server.maxmemory_samples, 5, INTEGER_CONFIG, NULL, NULL), createIntConfig("maxmemory-samples", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, server.maxmemory_samples, 5, INTEGER_CONFIG, NULL, NULL),
createIntConfig("maxmemory-eviction-tenacity", NULL, MODIFIABLE_CONFIG, 0, 100, server.maxmemory_eviction_tenacity, 10, INTEGER_CONFIG, NULL, NULL),
createIntConfig("timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.maxidletime, 0, INTEGER_CONFIG, NULL, NULL), /* Default client timeout: infinite */ createIntConfig("timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.maxidletime, 0, INTEGER_CONFIG, NULL, NULL), /* Default client timeout: infinite */
createIntConfig("replica-announce-port", "slave-announce-port", MODIFIABLE_CONFIG, 0, 65535, server.slave_announce_port, 0, INTEGER_CONFIG, NULL, NULL), createIntConfig("replica-announce-port", "slave-announce-port", MODIFIABLE_CONFIG, 0, 65535, server.slave_announce_port, 0, INTEGER_CONFIG, NULL, NULL),
createIntConfig("tcp-backlog", NULL, IMMUTABLE_CONFIG, 0, INT_MAX, server.tcp_backlog, 511, INTEGER_CONFIG, NULL, NULL), /* TCP listen backlog. */ createIntConfig("tcp-backlog", NULL, IMMUTABLE_CONFIG, 0, INT_MAX, server.tcp_backlog, 511, INTEGER_CONFIG, NULL, NULL), /* TCP listen backlog. */

View File

@ -33,13 +33,14 @@
#include "server.h" #include "server.h"
#include "bio.h" #include "bio.h"
#include "atomicvar.h" #include "atomicvar.h"
#include <math.h>
/* ---------------------------------------------------------------------------- /* ----------------------------------------------------------------------------
* Data structures * Data structures
* --------------------------------------------------------------------------*/ * --------------------------------------------------------------------------*/
/* To improve the quality of the LRU approximation we take a set of keys /* To improve the quality of the LRU approximation we take a set of keys
* that are good candidate for eviction across freeMemoryIfNeeded() calls. * that are good candidate for eviction across performEvictions() calls.
* *
* Entries inside the eviction pool are taken ordered by idle time, putting * Entries inside the eviction pool are taken ordered by idle time, putting
* greater idle times to the right (ascending order). * greater idle times to the right (ascending order).
@ -97,25 +98,7 @@ unsigned long long estimateObjectIdleTime(robj *o) {
} }
} }
/* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config /* LRU approximation algorithm
* file to limit the max memory used by the server, before processing a
* command.
*
* The goal of the function is to free enough memory to keep Redis under the
* configured memory limit.
*
* The function starts calculating how many bytes should be freed to keep
* Redis under the limit, and enters a loop selecting the best keys to
* evict accordingly to the configured policy.
*
* If all the bytes needed to return back under the limit were freed the
* function returns C_OK, otherwise C_ERR is returned, and the caller
* should block the execution of commands that will result in more memory
* used by the server.
*
* ------------------------------------------------------------------------
*
* LRU approximation algorithm
* *
* Redis uses an approximation of the LRU algorithm that runs in constant * Redis uses an approximation of the LRU algorithm that runs in constant
* memory. Every time there is a key to expire, we sample N keys (with * memory. Every time there is a key to expire, we sample N keys (with
@ -150,7 +133,7 @@ void evictionPoolAlloc(void) {
EvictionPoolLRU = ep; EvictionPoolLRU = ep;
} }
/* This is an helper function for freeMemoryIfNeeded(), it is used in order /* This is an helper function for performEvictions(), it is used in order
* to populate the evictionPool with a few entries every time we want to * to populate the evictionPool with a few entries every time we want to
* expire a key. Keys with idle time smaller than one of the current * expire a key. Keys with idle time smaller than one of the current
* keys are added. Keys are always added if there are free entries. * keys are added. Keys are always added if there are free entries.
@ -341,11 +324,6 @@ unsigned long LFUDecrAndReturn(robj *o) {
return counter; return counter;
} }
/* ----------------------------------------------------------------------------
* The external API for eviction: freeMemoryIfNeeded() is called by the
* server when there is data to add in order to make space if needed.
* --------------------------------------------------------------------------*/
/* We don't want to count AOF buffers and slaves output buffers as /* We don't want to count AOF buffers and slaves output buffers as
* used memory: the eviction should use mostly data size. This function * used memory: the eviction should use mostly data size. This function
* returns the sum of AOF and slaves buffer. */ * returns the sum of AOF and slaves buffer. */
@ -434,39 +412,113 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev
return C_ERR; return C_ERR;
} }
/* This function is periodically called to see if there is memory to free
* according to the current "maxmemory" settings. In case we are over the /* The evictionTimeProc is started when "maxmemory" has been breached and
* memory limit, the function will try to free some memory to return back * could not immediately be resolved. This will spin the event loop with short
* under the limit. * eviction cycles until the "maxmemory" condition has resolved or there are no
* * more evictable items. */
* The function returns C_OK if we are under the memory limit or if we static int isEvictionProcRunning = 0;
* were over the limit, but the attempt to free memory was successful. static int evictionTimeProc(
* Otherwise if we are over the memory limit, but not enough memory struct aeEventLoop *eventLoop, long long id, void *clientData) {
* was freed to return back under the limit, the function returns C_ERR. */ UNUSED(eventLoop);
int freeMemoryIfNeeded(void) { UNUSED(id);
int keys_freed = 0; UNUSED(clientData);
if (performEvictions() == EVICT_RUNNING) return 0; /* keep evicting */
/* For EVICT_OK - things are good, no need to keep evicting.
* For EVICT_FAIL - there is nothing left to evict. */
isEvictionProcRunning = 0;
return AE_NOMORE;
}
/* Check if it's safe to perform evictions.
* Returns 1 if evictions can be performed
* Returns 0 if eviction processing should be skipped
*/
static int isSafeToPerformEvictions(void) {
/* - There must be no script in timeout condition.
* - Nor we are loading data right now. */
if (server.lua_timedout || server.loading) return 0;
/* By default replicas should ignore maxmemory /* By default replicas should ignore maxmemory
* and just be masters exact copies. */ * and just be masters exact copies. */
if (server.masterhost && server.repl_slave_ignore_maxmemory) return C_OK; if (server.masterhost && server.repl_slave_ignore_maxmemory) return 0;
size_t mem_reported, mem_tofree, mem_freed;
mstime_t latency, eviction_latency, lazyfree_latency;
long long delta;
int slaves = listLength(server.slaves);
int result = C_ERR;
/* When clients are paused the dataset should be static not just from the /* When clients are paused the dataset should be static not just from the
* POV of clients not being able to write, but also from the POV of * POV of clients not being able to write, but also from the POV of
* expires and evictions of keys not being performed. */ * expires and evictions of keys not being performed. */
if (clientsArePaused()) return C_OK; if (clientsArePaused()) return 0;
return 1;
}
/* Algorithm for converting tenacity (0-100) to a time limit. */
static unsigned long evictionTimeLimitUs() {
serverAssert(server.maxmemory_eviction_tenacity >= 0);
serverAssert(server.maxmemory_eviction_tenacity <= 100);
if (server.maxmemory_eviction_tenacity <= 10) {
/* A linear progression from 0..500us */
return 50uL * server.maxmemory_eviction_tenacity;
}
if (server.maxmemory_eviction_tenacity < 100) {
/* A 15% geometric progression, resulting in a limit of ~2 min at tenacity==99 */
return (unsigned long)(500.0 * pow(1.15, server.maxmemory_eviction_tenacity - 10.0));
}
return ULONG_MAX; /* No limit to eviction time */
}
/* Check that memory usage is within the current "maxmemory" limit. If over
* "maxmemory", attempt to free memory by evicting data (if it's safe to do so).
*
* It's possible for Redis to suddenly be significantly over the "maxmemory"
* setting. This can happen if there is a large allocation (like a hash table
* resize) or even if the "maxmemory" setting is manually adjusted. Because of
* this, it's important to evict for a managed period of time - otherwise Redis
* would become unresponsive while evicting.
*
* The goal of this function is to improve the memory situation - not to
* immediately resolve it. In the case that some items have been evicted but
* the "maxmemory" limit has not been achieved, an aeTimeProc will be started
* which will continue to evict items until memory limits are achieved or
* nothing more is evictable.
*
* This should be called before execution of commands. If EVICT_FAIL is
* returned, commands which will result in increased memory usage should be
* rejected.
*
* Returns:
* EVICT_OK - memory is OK or it's not possible to perform evictions now
* EVICT_RUNNING - memory is over the limit, but eviction is still processing
* EVICT_FAIL - memory is over the limit, and there's nothing to evict
* */
int performEvictions(void) {
if (!isSafeToPerformEvictions()) return EVICT_OK;
int keys_freed = 0;
size_t mem_reported, mem_tofree, mem_freed;
mstime_t latency, eviction_latency;
long long delta;
int slaves = listLength(server.slaves);
int result = EVICT_FAIL;
if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK) if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
return C_OK; return EVICT_OK;
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
return EVICT_FAIL; /* We need to free memory, but policy forbids. */
unsigned long eviction_time_limit_us = evictionTimeLimitUs();
mem_freed = 0; mem_freed = 0;
latencyStartMonitor(latency); latencyStartMonitor(latency);
if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
goto cant_free; /* We need to free memory, but policy forbids. */ monotime evictionTimer;
elapsedStart(&evictionTimer);
while (mem_freed < mem_tofree) { while (mem_freed < mem_tofree) {
int j, k, i; int j, k, i;
@ -581,60 +633,61 @@ int freeMemoryIfNeeded(void) {
decrRefCount(keyobj); decrRefCount(keyobj);
keys_freed++; keys_freed++;
/* When the memory to free starts to be big enough, we may if (keys_freed % 16 == 0) {
* start spending so much time here that is impossible to /* When the memory to free starts to be big enough, we may
* deliver data to the slaves fast enough, so we force the * start spending so much time here that is impossible to
* transmission here inside the loop. */ * deliver data to the replicas fast enough, so we force the
if (slaves) flushSlavesOutputBuffers(); * transmission here inside the loop. */
if (slaves) flushSlavesOutputBuffers();
/* Normally our stop condition is the ability to release /* Normally our stop condition is the ability to release
* a fixed, pre-computed amount of memory. However when we * a fixed, pre-computed amount of memory. However when we
* are deleting objects in another thread, it's better to * are deleting objects in another thread, it's better to
* check, from time to time, if we already reached our target * check, from time to time, if we already reached our target
* memory, since the "mem_freed" amount is computed only * memory, since the "mem_freed" amount is computed only
* across the dbAsyncDelete() call, while the thread can * across the dbAsyncDelete() call, while the thread can
* release the memory all the time. */ * release the memory all the time. */
if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) { if (server.lazyfree_lazy_eviction) {
if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) { if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
/* Let's satisfy our stop condition. */ break;
mem_freed = mem_tofree; }
}
/* After some time, exit the loop early - even if memory limit
* hasn't been reached. If we suddenly need to free a lot of
* memory, don't want to spend too much time here. */
if (elapsedUs(evictionTimer) > eviction_time_limit_us) {
// We still need to free memory - start eviction timer proc
if (!isEvictionProcRunning) {
isEvictionProcRunning = 1;
aeCreateTimeEvent(server.el, 0,
evictionTimeProc, NULL, NULL);
}
break;
} }
} }
} else { } else {
goto cant_free; /* nothing to free... */ goto cant_free; /* nothing to free... */
} }
} }
result = C_OK; /* at this point, the memory is OK, or we have reached the time limit */
result = (isEvictionProcRunning) ? EVICT_RUNNING : EVICT_OK;
cant_free: cant_free:
/* We are here if we are not able to reclaim memory. There is only one if (result == EVICT_FAIL) {
* last thing we can try: check if the lazyfree thread has jobs in queue /* At this point, we have run out of evictable items. It's possible
* and wait... */ * that some items are being freed in the lazyfree thread. Perform a
if (result != C_OK) { * short wait here if such jobs exist, but don't wait long. */
latencyStartMonitor(lazyfree_latency); if (bioPendingJobsOfType(BIO_LAZY_FREE)) {
while(bioPendingJobsOfType(BIO_LAZY_FREE)) { usleep(eviction_time_limit_us);
if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) { if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
result = C_OK; result = EVICT_OK;
break;
} }
usleep(1000);
} }
latencyEndMonitor(lazyfree_latency);
latencyAddSampleIfNeeded("eviction-lazyfree",lazyfree_latency);
} }
latencyEndMonitor(latency); latencyEndMonitor(latency);
latencyAddSampleIfNeeded("eviction-cycle",latency); latencyAddSampleIfNeeded("eviction-cycle",latency);
return result; return result;
} }
/* This is a wrapper for freeMemoryIfNeeded() that only really calls the
* function if right now there are the conditions to do so safely:
*
* - There must be no script in timeout condition.
* - Nor we are loading data right now.
*
*/
int freeMemoryIfNeededAndSafe(void) {
if (server.lua_timedout || server.loading) return C_OK;
return freeMemoryIfNeeded();
}

View File

@ -1830,8 +1830,8 @@ int processCommandAndResetClient(client *c) {
} }
if (server.current_client == NULL) deadclient = 1; if (server.current_client == NULL) deadclient = 1;
server.current_client = NULL; server.current_client = NULL;
/* freeMemoryIfNeeded may flush slave output buffers. This may /* performEvictions may flush slave output buffers. This may
* result into a slave, that may be the active client, to be * result in a slave, that may be the active client, to be
* freed. */ * freed. */
return deadclient ? C_ERR : C_OK; return deadclient ? C_ERR : C_OK;
} }
@ -2797,7 +2797,7 @@ void asyncCloseClientOnOutputBufferLimitReached(client *c) {
} }
} }
/* Helper function used by freeMemoryIfNeeded() in order to flush slaves /* Helper function used by performEvictions() in order to flush slaves
* output buffers without returning control to the event loop. * output buffers without returning control to the event loop.
* This is also called by SHUTDOWN for a best-effort attempt to send * This is also called by SHUTDOWN for a best-effort attempt to send
* slaves the latest writes. */ * slaves the latest writes. */

View File

@ -3681,9 +3681,9 @@ int processCommand(client *c) {
* condition, to avoid mixing the propagation of scripts with the * condition, to avoid mixing the propagation of scripts with the
* propagation of DELs due to eviction. */ * propagation of DELs due to eviction. */
if (server.maxmemory && !server.lua_timedout) { if (server.maxmemory && !server.lua_timedout) {
int out_of_memory = freeMemoryIfNeededAndSafe() == C_ERR; int out_of_memory = (performEvictions() == EVICT_FAIL);
/* freeMemoryIfNeeded may flush slave output buffers. This may result /* performEvictions may flush slave output buffers. This may result
* into a slave, that may be the active client, to be freed. */ * in a slave, that may be the active client, to be freed. */
if (server.current_client == NULL) return C_ERR; if (server.current_client == NULL) return C_ERR;
int reject_cmd_on_oom = is_denyoom_command; int reject_cmd_on_oom = is_denyoom_command;

View File

@ -1358,6 +1358,7 @@ struct redisServer {
unsigned long long maxmemory; /* Max number of memory bytes to use */ unsigned long long maxmemory; /* Max number of memory bytes to use */
int maxmemory_policy; /* Policy for key eviction */ int maxmemory_policy; /* Policy for key eviction */
int maxmemory_samples; /* Precision of random sampling */ int maxmemory_samples; /* Precision of random sampling */
int maxmemory_eviction_tenacity;/* Aggressiveness of eviction processing */
int lfu_log_factor; /* LFU logarithmic counter factor. */ int lfu_log_factor; /* LFU logarithmic counter factor. */
int lfu_decay_time; /* LFU counter decay factor. */ int lfu_decay_time; /* LFU counter decay factor. */
long long proto_max_bulk_len; /* Protocol bulk length maximum size. */ long long proto_max_bulk_len; /* Protocol bulk length maximum size. */
@ -1996,8 +1997,6 @@ int zslLexValueLteMax(sds value, zlexrangespec *spec);
/* Core functions */ /* Core functions */
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level); int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level);
size_t freeMemoryGetNotCountedMemory(); size_t freeMemoryGetNotCountedMemory();
int freeMemoryIfNeeded(void);
int freeMemoryIfNeededAndSafe(void);
int processCommand(client *c); int processCommand(client *c);
void setupSignalHandlers(void); void setupSignalHandlers(void);
void removeSignalHandlers(void); void removeSignalHandlers(void);
@ -2234,6 +2233,11 @@ void evictionPoolAlloc(void);
unsigned long LFUGetTimeInMinutes(void); unsigned long LFUGetTimeInMinutes(void);
uint8_t LFULogIncr(uint8_t value); uint8_t LFULogIncr(uint8_t value);
unsigned long LFUDecrAndReturn(robj *o); unsigned long LFUDecrAndReturn(robj *o);
#define EVICT_OK 0
#define EVICT_RUNNING 1
#define EVICT_FAIL 2
int performEvictions(void);
/* Keys hashing / comparison functions for dict.c hash tables. */ /* Keys hashing / comparison functions for dict.c hash tables. */
uint64_t dictSdsHash(const void *key); uint64_t dictSdsHash(const void *key);