From f20774eced7eba5dfb9cf9b76ed5d6aa3950177c Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 5 Feb 2024 22:56:46 +0800 Subject: [PATCH] Fix active expire timeout when db done the scanning (#13030) When db->expires_cursor==0, it means the DB is done the scanning, we should exit the loop to avoid the useless scanning. It is easy to see the active expire timeout in the modified test, for example, let's assume that there is only 1 expired key in the DB, and the size / buckets ratio is less than 1%, which means that we will skip it in isExpiryDictValidForSamplingCb, and the return value of expires_cursor is 0. Because `data.sampled == 0` is always true, so `repeat` is also always true, we will keep scanning the DB, but every time it is skipped by the previous judgment (expires_cursor = 0), until the timelimit is finally exhausted. --- src/expire.c | 13 ++++++++----- tests/unit/expire.tcl | 3 +++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/expire.c b/src/expire.c index cf846c802..a1006809c 100644 --- a/src/expire.c +++ b/src/expire.c @@ -245,6 +245,7 @@ void activeExpireCycle(int type) { redisDb *db = server.db+(current_db % server.dbnum); data.db = db; + int db_done = 0; /* The scan of the current DB is done? */ int update_avg_ttl_times = 0, repeat = 0; /* Increment the DB now so we are sure if we run out of time @@ -295,6 +296,7 @@ void activeExpireCycle(int type) { while (data.sampled < num && checked_buckets < max_buckets) { db->expires_cursor = dbScan(db, DB_EXPIRES, db->expires_cursor, -1, expireScanCallback, isExpiryDictValidForSamplingCb, &data); if (db->expires_cursor == 0) { + db_done = 1; break; } checked_buckets++; @@ -305,7 +307,11 @@ void activeExpireCycle(int type) { /* If find keys with ttl not yet expired, we need to update the average TTL stats once. */ if (data.ttl_samples - origin_ttl_samples > 0) update_avg_ttl_times++; - repeat = data.sampled == 0 || (data.expired * 100 / data.sampled) > config_cycle_acceptable_stale; + /* We don't repeat the cycle for the current database if the db is done + * for scanning or an acceptable number of stale keys (logically expired + * but yet not reclaimed). */ + repeat = db_done ? 0 : (data.sampled == 0 || (data.expired * 100 / data.sampled) > config_cycle_acceptable_stale); + /* We can't block forever here even if there are many keys to * expire. So after a given amount of microseconds return to the * caller waiting for the other active expire cycle. */ @@ -321,7 +327,7 @@ void activeExpireCycle(int type) { if (db->avg_ttl == 0) { db->avg_ttl = avg_ttl; } else { - /* Thr origin code is as follow. + /* The origin code is as follow. * for (int i = 0; i < update_avg_ttl_times; i++) { * db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50); * } @@ -348,9 +354,6 @@ void activeExpireCycle(int type) { } } } - /* We don't repeat the cycle for the current database if there are - * an acceptable amount of stale keys (logically expired but yet - * not reclaimed). */ } while (repeat); } diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index 5aa763578..f05a4407d 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -898,5 +898,8 @@ start_cluster 1 0 {tags {"expire external:skip cluster slow"}} { flush stdout fail "Keys did not actively expire." } + + # Make sure we don't have any timeouts. + assert_equal 0 [s 0 expired_time_cap_reached_count] } {} {needs:debug} }