diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index c06d73440..44386f5ff 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -689,6 +689,52 @@ jobs: if: true && !contains(github.event.inputs.skiptests, 'unittest') run: ./src/valkey-unit-tests --accurate + test-sanitizer-force-defrag: + runs-on: ubuntu-latest + if: | + (github.event_name == 'workflow_dispatch' || + (github.event_name == 'schedule' && github.repository == 'valkey-io/valkey') || + (github.event_name == 'pull_request' && github.event.pull_request.base.ref != 'unstable')) && + !contains(github.event.inputs.skipjobs, 'sanitizer') + timeout-minutes: 14400 + strategy: + fail-fast: false + steps: + - name: prep + if: github.event_name == 'workflow_dispatch' + run: | + echo "GITHUB_REPOSITORY=${{github.event.inputs.use_repo}}" >> $GITHUB_ENV + echo "GITHUB_HEAD_REF=${{github.event.inputs.use_git_ref}}" >> $GITHUB_ENV + echo "skipjobs: ${{github.event.inputs.skipjobs}}" + echo "skiptests: ${{github.event.inputs.skiptests}}" + echo "test_args: ${{github.event.inputs.test_args}}" + echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}" + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + repository: ${{ env.GITHUB_REPOSITORY }} + ref: ${{ env.GITHUB_HEAD_REF }} + - name: make + run: make all-with-unit-tests OPT=-O3 SANITIZER=address DEBUG_FORCE_DEFRAG=yes USE_JEMALLOC=no SERVER_CFLAGS='-Werror' + - name: testprep + run: | + sudo apt-get update + sudo apt-get install tcl8.6 tclx -y + - name: test + if: true && !contains(github.event.inputs.skiptests, 'valkey') + run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}} + - name: module api test + if: true && !contains(github.event.inputs.skiptests, 'modules') + run: CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}} + - name: sentinel tests + if: true && !contains(github.event.inputs.skiptests, 'sentinel') + run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}} + - name: cluster tests + if: true && !contains(github.event.inputs.skiptests, 'cluster') + run: ./runtest-cluster ${{github.event.inputs.cluster_test_args}} + - name: unittest + if: true && !contains(github.event.inputs.skiptests, 'unittest') + run: ./src/valkey-unit-tests + test-rpm-distros-jemalloc: if: | (github.event_name == 'workflow_dispatch' || diff --git a/CMakeLists.txt b/CMakeLists.txt index 77d0c4e7d..55b18cb99 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,3 +41,4 @@ unset(BUILD_UNIT_TESTS CACHE) unset(BUILD_TEST_MODULES CACHE) unset(BUILD_EXAMPLE_MODULES CACHE) unset(USE_TLS CACHE) +unset(DEBUG_FORCE_DEFRAG CACHE) diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt index c904b9403..3f5b04dc2 100644 --- a/deps/CMakeLists.txt +++ b/deps/CMakeLists.txt @@ -1,4 +1,6 @@ -add_subdirectory(jemalloc) +if (USE_JEMALLOC) + add_subdirectory(jemalloc) +endif () add_subdirectory(lua) # Set hiredis options. We need to disable the defaults set in the OPTION(..) we do this by setting them in the CACHE diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b87dff3db..90d7e25cf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,12 @@ if (VALKEY_RELEASE_BUILD) set_property(TARGET valkey-server PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) endif () +if (DEBUG_FORCE_DEFRAG) + message(STATUS "Forcing Active Defrag run on valkey-server") + target_compile_definitions(valkey-server PRIVATE DEBUG_FORCE_DEFRAG) + target_compile_definitions(valkey-server PRIVATE HAVE_DEFRAG) +endif () + if (BUILD_SANITIZER) # 'BUILD_SANITIZER' is defined in ValkeySetup module (based on user input) # If defined, the variables 'VALKEY_SANITAIZER_CFLAGS' and 'VALKEY_SANITAIZER_LDFLAGS' diff --git a/src/Makefile b/src/Makefile index 8552deb3d..e52f4f08d 100644 --- a/src/Makefile +++ b/src/Makefile @@ -130,6 +130,11 @@ ifdef REDIS_LDFLAGS SERVER_LDFLAGS := $(REDIS_LDFLAGS) endif +# Special case of forcing defrag to run even though we have no Jemlloc support +ifeq ($(DEBUG_FORCE_DEFRAG), yes) + SERVER_CFLAGS +=-DHAVE_DEFRAG -DDEBUG_FORCE_DEFRAG +endif + FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(SERVER_CFLAGS) FINAL_LDFLAGS=$(LDFLAGS) $(OPT) $(SERVER_LDFLAGS) $(DEBUG) FINAL_LIBS=-lm diff --git a/src/allocator_defrag.c b/src/allocator_defrag.c index b2330c95e..5e805b304 100644 --- a/src/allocator_defrag.c +++ b/src/allocator_defrag.c @@ -43,12 +43,10 @@ * the other component to ensure both are using the same allocator configuration. */ -#include +#include "server.h" #include "serverassert.h" #include "allocator_defrag.h" -#define UNUSED(x) (void)(x) - #if defined(HAVE_DEFRAG) && defined(USE_JEMALLOC) #define STRINGIFY_(x) #x @@ -402,8 +400,56 @@ int allocatorShouldDefrag(void *ptr) { je_cb.bin_info[binind].nregs - SLAB_NFREE(out, 0)); } -#else +/* Utility function to get the fragmentation ratio from jemalloc. + * It is critical to do that by comparing only heap maps that belong to + * jemalloc, and skip ones the jemalloc keeps as spare. Since we use this + * fragmentation ratio in order to decide if a defrag action should be taken + * or not, a false detection can cause the defragmenter to waste a lot of CPU + * without the possibility of getting any results. */ +float getAllocatorFragmentation(size_t *out_frag_bytes) { + size_t resident, active, allocated, frag_smallbins_bytes; + zmalloc_get_allocator_info(&allocated, &active, &resident, NULL, NULL); + frag_smallbins_bytes = allocatorDefragGetFragSmallbins(); + /* Calculate the fragmentation ratio as the proportion of wasted memory in small + * bins (which are defraggable) relative to the total allocated memory (including large bins). + * This is because otherwise, if most of the memory usage is large bins, we may show high percentage, + * despite the fact it's not a lot of memory for the user. */ + float frag_pct = (float)frag_smallbins_bytes / allocated * 100; + float rss_pct = ((float)resident / allocated) * 100 - 100; + size_t rss_bytes = resident - allocated; + if (out_frag_bytes) *out_frag_bytes = frag_smallbins_bytes; + serverLog(LL_DEBUG, "allocated=%zu, active=%zu, resident=%zu, frag=%.2f%% (%.2f%% rss), frag_bytes=%zu (%zu rss)", + allocated, active, resident, frag_pct, rss_pct, frag_smallbins_bytes, rss_bytes); + return frag_pct; +} +#elif defined(DEBUG_FORCE_DEFRAG) +int allocatorDefragInit(void) { + return 0; +} +void allocatorDefragFree(void *ptr, size_t size) { + UNUSED(size); + zfree(ptr); +} +__attribute__((malloc)) void *allocatorDefragAlloc(size_t size) { + return zmalloc(size); + return NULL; +} +unsigned long allocatorDefragGetFragSmallbins(void) { + return 0; +} + +int allocatorShouldDefrag(void *ptr) { + UNUSED(ptr); + return 1; +} + +float getAllocatorFragmentation(size_t *out_frag_bytes) { + *out_frag_bytes = server.active_defrag_ignore_bytes + 1; + return server.active_defrag_threshold_upper; +} + +#else int allocatorDefragInit(void) { return -1; } @@ -423,4 +469,9 @@ int allocatorShouldDefrag(void *ptr) { UNUSED(ptr); return 0; } + +float getAllocatorFragmentation(size_t *out_frag_bytes) { + UNUSED(out_frag_bytes); + return 0; +} #endif diff --git a/src/allocator_defrag.h b/src/allocator_defrag.h index 7fb56208b..7947bef72 100644 --- a/src/allocator_defrag.h +++ b/src/allocator_defrag.h @@ -5,10 +5,11 @@ #include /* We can enable the server defrag capabilities only if we are using Jemalloc * and the version that has the experimental.utilization namespace in mallctl . */ -#if defined(JEMALLOC_VERSION_MAJOR) && \ - (JEMALLOC_VERSION_MAJOR > 5 || \ - (JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR > 2) || \ - (JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR == 2 && JEMALLOC_VERSION_BUGFIX >= 1)) +#if (defined(JEMALLOC_VERSION_MAJOR) && \ + (JEMALLOC_VERSION_MAJOR > 5 || \ + (JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR > 2) || \ + (JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR == 2 && JEMALLOC_VERSION_BUGFIX >= 1))) || \ + defined(DEBUG_FORCE_DEFRAG) #define HAVE_DEFRAG #endif #endif @@ -18,5 +19,6 @@ void allocatorDefragFree(void *ptr, size_t size); __attribute__((malloc)) void *allocatorDefragAlloc(size_t size); unsigned long allocatorDefragGetFragSmallbins(void); int allocatorShouldDefrag(void *ptr); +float getAllocatorFragmentation(size_t *out_frag_bytes); #endif /* __ALLOCATOR_DEFRAG_H */ diff --git a/src/config.c b/src/config.c index cc0f8d2dd..e1cee3f95 100644 --- a/src/config.c +++ b/src/config.c @@ -3186,7 +3186,7 @@ standardConfig static_configs[] = { createBoolConfig("replica-read-only", "slave-read-only", DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_replica_ro, 1, NULL, NULL), createBoolConfig("replica-ignore-maxmemory", "slave-ignore-maxmemory", MODIFIABLE_CONFIG, server.repl_replica_ignore_maxmemory, 1, NULL, NULL), createBoolConfig("jemalloc-bg-thread", NULL, MODIFIABLE_CONFIG, server.jemalloc_bg_thread, 1, NULL, updateJemallocBgThread), - createBoolConfig("activedefrag", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.active_defrag_enabled, 0, isValidActiveDefrag, NULL), + createBoolConfig("activedefrag", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.active_defrag_enabled, CONFIG_ACTIVE_DEFRAG_DEFAULT, isValidActiveDefrag, NULL), createBoolConfig("syslog-enabled", NULL, IMMUTABLE_CONFIG, server.syslog_enabled, 0, NULL, NULL), createBoolConfig("cluster-enabled", NULL, IMMUTABLE_CONFIG, server.cluster_enabled, 0, NULL, NULL), createBoolConfig("appendonly", NULL, MODIFIABLE_CONFIG | DENY_LOADING_CONFIG, server.aof_enabled, 0, NULL, updateAppendonly), diff --git a/src/defrag.c b/src/defrag.c index 8e7fc8449..6522d9aa7 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -149,11 +149,6 @@ static_assert(offsetof(defragPubSubCtx, kvstate) == 0, "defragStageKvstoreHelper static list *defrag_later; static unsigned long defrag_later_cursor; - -/* this method was added to jemalloc in order to help us understand which - * pointers are worthwhile moving and which aren't */ -int je_get_defrag_hint(void *ptr); - /* Defrag function which allocates and copies memory if needed, but DOESN'T free the old block. * It is the responsibility of the caller to free the old block if a non-NULL value (new block) * is returned. (Returns NULL if no relocation was needed.) @@ -824,29 +819,6 @@ static void dbKeysScanCallback(void *privdata, void *elemref) { server.stat_active_defrag_scanned++; } -/* Utility function to get the fragmentation ratio from jemalloc. - * It is critical to do that by comparing only heap maps that belong to - * jemalloc, and skip ones the jemalloc keeps as spare. Since we use this - * fragmentation ratio in order to decide if a defrag action should be taken - * or not, a false detection can cause the defragmenter to waste a lot of CPU - * without the possibility of getting any results. */ -static float getAllocatorFragmentation(size_t *out_frag_bytes) { - size_t resident, active, allocated, frag_smallbins_bytes; - zmalloc_get_allocator_info(&allocated, &active, &resident, NULL, NULL); - frag_smallbins_bytes = allocatorDefragGetFragSmallbins(); - /* Calculate the fragmentation ratio as the proportion of wasted memory in small - * bins (which are defraggable) relative to the total allocated memory (including large bins). - * This is because otherwise, if most of the memory usage is large bins, we may show high percentage, - * despite the fact it's not a lot of memory for the user. */ - float frag_pct = (float)frag_smallbins_bytes / allocated * 100; - float rss_pct = ((float)resident / allocated) * 100 - 100; - size_t rss_bytes = resident - allocated; - if (out_frag_bytes) *out_frag_bytes = frag_smallbins_bytes; - serverLog(LL_DEBUG, "allocated=%zu, active=%zu, resident=%zu, frag=%.2f%% (%.2f%% rss), frag_bytes=%zu (%zu rss)", - allocated, active, resident, frag_pct, rss_pct, frag_smallbins_bytes, rss_bytes); - return frag_pct; -} - /* Defrag scan callback for a pubsub channels hashtable. */ static void defragPubsubScanCallback(void *privdata, void *elemref) { defragPubSubCtx *ctx = privdata; diff --git a/src/server.h b/src/server.h index dc4d2e880..1aafcaeb5 100644 --- a/src/server.h +++ b/src/server.h @@ -148,6 +148,11 @@ struct hdr_histogram; #define DEFAULT_WAIT_BEFORE_RDB_CLIENT_FREE 60 /* Grace period in seconds for replica main \ * channel to establish psync. */ #define LOADING_PROCESS_EVENTS_INTERVAL_DEFAULT 100 /* Default: 0.1 seconds */ +#if !defined(DEBUG_FORCE_DEFRAG) +#define CONFIG_ACTIVE_DEFRAG_DEFAULT 0 +#else +#define CONFIG_ACTIVE_DEFRAG_DEFAULT 1 +#endif /* Bucket sizes for client eviction pools. Each bucket stores clients with * memory usage of up to twice the size of the bucket below it. */ diff --git a/tests/support/server.tcl b/tests/support/server.tcl index 725733904..8c545d900 100644 --- a/tests/support/server.tcl +++ b/tests/support/server.tcl @@ -221,6 +221,11 @@ proc tags_acceptable {tags err_return} { return 0 } + if {$::debug_defrag && [lsearch $tags "debug_defrag:skip"] >= 0} { + set err "Not supported on server compiled with DEBUG_FORCE_DEFRAG option" + return 0 + } + if {$::singledb && [lsearch $tags "singledb:skip"] >= 0} { set err "Not supported on singledb" return 0 diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 1f0658071..8a4125e48 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -92,6 +92,7 @@ set ::large_memory 0 set ::log_req_res 0 set ::force_resp3 0 set ::solo_tests_count 0 +set ::debug_defrag 0 # Set to 1 when we are running in client mode. The server test uses a # server-client model to run tests simultaneously. The server instance @@ -607,6 +608,7 @@ proc print_help_screen {} { "--ignore-encoding Don't validate object encoding." "--ignore-digest Don't use debug digest validations." "--large-memory Run tests using over 100mb." + "--debug-defrag Indicate the test is running against server compiled with DEBUG_FORCE_DEFRAG option" "--help Print this help screen." } "\n"] } @@ -748,6 +750,8 @@ for {set j 0} {$j < [llength $argv]} {incr j} { set ::ignoreencoding 1 } elseif {$opt eq {--ignore-digest}} { set ::ignoredigest 1 + } elseif {$opt eq {--debug-defrag}} { + set ::debug_defrag 1 } elseif {$opt eq {--help}} { print_help_screen exit 0 diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index e50faba62..a27043fa8 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -10,7 +10,7 @@ proc latency_percentiles_usec {cmd} { return [latencyrstat_percentiles $cmd r] } -start_server {tags {"info" "external:skip"}} { +start_server {tags {"info" "external:skip" "debug_defrag:skip"}} { start_server {} { test {latencystats: disable/enable} {