diff --git a/redis.conf b/redis.conf index 561aaa9db..119f7b6f9 100644 --- a/redis.conf +++ b/redis.conf @@ -1088,6 +1088,19 @@ oom-score-adj no oom-score-adj-values 0 200 800 + +#################### KERNEL transparent hugepage CONTROL ###################### + +# Usually the kernel Transparent Huge Pages control is set to "madvise" or +# or "never" by default (/sys/kernel/mm/transparent_hugepage/enabled), in which +# case this config has no effect. On systems in which it is set to "always", +# redis will attempt to disable it specifically for the redis process in order +# to avoid latency problems specifically with fork(2) and CoW. +# If for some reason you prefer to keep it enabled, you can set this config to +# "no" and the kernel global to "always". + +disable-thp yes + ############################## APPEND ONLY MODE ############################### # By default Redis asynchronously dumps the dataset on disk. This mode is diff --git a/src/config.c b/src/config.c index aaee2db65..07ed91e5f 100644 --- a/src/config.c +++ b/src/config.c @@ -2314,6 +2314,7 @@ standardConfig configs[] = { createBoolConfig("crash-memcheck-enabled", NULL, MODIFIABLE_CONFIG, server.memcheck_enabled, 1, NULL, NULL), createBoolConfig("use-exit-on-panic", NULL, MODIFIABLE_CONFIG, server.use_exit_on_panic, 0, NULL, NULL), createBoolConfig("oom-score-adj", NULL, MODIFIABLE_CONFIG, server.oom_score_adj, 0, NULL, updateOOMScoreAdj), + createBoolConfig("disable-thp", NULL, MODIFIABLE_CONFIG, server.disable_thp, 1, NULL, NULL), /* String Configs */ createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL), diff --git a/src/latency.c b/src/latency.c index 6148543c8..c661c5d6d 100644 --- a/src/latency.c +++ b/src/latency.c @@ -59,6 +59,7 @@ dictType latencyTimeSeriesDictType = { /* ------------------------- Utility functions ------------------------------ */ #ifdef __linux__ +#include /* Returns 1 if Transparent Huge Pages support is enabled in the kernel. * Otherwise (or if we are unable to check) 0 is returned. */ int THPIsEnabled(void) { @@ -73,6 +74,21 @@ int THPIsEnabled(void) { fclose(fp); return (strstr(buf,"[always]") != NULL) ? 1 : 0; } + +/* since linux-3.5, kernel supports to set the state of the "THP disable" flag + * for the calling thread. PR_SET_THP_DISABLE is defined in linux/prctl.h */ +int THPDisable(void) { + int ret = -EINVAL; + + if (!server.disable_thp) + return ret; + +#ifdef PR_SET_THP_DISABLE + ret = prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0); +#endif + + return ret; +} #endif /* Report the amount of AnonHugePages in smap, in bytes. If the return diff --git a/src/latency.h b/src/latency.h index 76640cfce..b17f403a5 100644 --- a/src/latency.h +++ b/src/latency.h @@ -64,6 +64,7 @@ struct latencyStats { void latencyMonitorInit(void); void latencyAddSample(const char *event, mstime_t latency); int THPIsEnabled(void); +int THPDisable(void); /* Latency monitoring macros. */ diff --git a/src/server.c b/src/server.c index 67d050ea0..093c20630 100644 --- a/src/server.c +++ b/src/server.c @@ -4874,7 +4874,7 @@ void linuxMemoryWarnings(void) { if (linuxOvercommitMemoryValue() == 0) { serverLog(LL_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); } - if (THPIsEnabled()) { + if (THPIsEnabled() && THPDisable()) { serverLog(LL_WARNING,"WARNING you have Transparent Huge Pages (THP) support enabled in your kernel. This will create latency and memory usage issues with Redis. To fix this issue run the command 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled' as root, and add it to your /etc/rc.local in order to retain the setting after a reboot. Redis must be restarted after THP is disabled (set to 'madvise' or 'never')."); } } diff --git a/src/server.h b/src/server.h index 5d53d9473..b68d06497 100644 --- a/src/server.h +++ b/src/server.h @@ -1381,6 +1381,7 @@ struct redisServer { int oom_score_adj_base; /* Base oom_score_adj value, as observed on startup */ int oom_score_adj_values[CONFIG_OOM_COUNT]; /* Linux oom_score_adj configuration */ int oom_score_adj; /* If true, oom_score_adj is managed */ + int disable_thp; /* If true, disable THP by syscall */ /* Blocked clients */ unsigned int blocked_clients; /* # of clients executing a blocking cmd.*/ unsigned int blocked_clients_by_type[BLOCKED_NUM];