diff --git a/src/expire.c b/src/expire.c index 3d0bae249..af9eaebf1 100644 --- a/src/expire.c +++ b/src/expire.c @@ -78,24 +78,40 @@ int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) { * it will get more aggressive to avoid that too much memory is used by * keys that can be removed from the keyspace. * - * No more than CRON_DBS_PER_CALL databases are tested at every - * iteration. + * Every expire cycle tests multiple databases: the next call will start + * again from the next db, with the exception of exists for time limit: in that + * case we restart again from the last database we were processing. Anyway + * no more than CRON_DBS_PER_CALL databases are tested at every iteration. * - * This kind of call is used when Redis detects that timelimit_exit is - * true, so there is more work to do, and we do it more incrementally from - * the beforeSleep() function of the event loop. + * The function can perform more or less work, depending on the "type" + * argument. It can execute a "fast cycle" or a "slow cycle". The slow + * cycle is the main way we collect expired cycles: this happens with + * the "server.hz" frequency (usually 10 hertz). * - * Expire cycle type: + * However the slow cycle can exit for timeout, since it used too much time. + * For this reason the function is also invoked to perform a fast cycle + * at every event loop cycle, in the beforeSleep() function. The fast cycle + * will try to perform less work, but will do it much more often. + * + * The following are the details of the two expire cycles and their stop + * conditions: * * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION * microseconds, and is not repeated again before the same amount of time. + * The cycle will also refuse to run at all if the latest slow cycle did not + * terminate because of a time limit condition. * * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is * executed, where the time limit is a percentage of the REDIS_HZ period - * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. */ + * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. In the + * fast cycle, the check of every database is interrupted once the number + * of already expired keys in the database is estimated to be lower than + * a given percentage, in order to avoid doing too much work to gain too + * little memory. + */ -#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */ +#define ACTIVE_EXPIRE_CYCLE_BUCKETS_PER_LOOP 20 /* HT buckets checked. */ #define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */ #define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Percentage of CPU to use. */ @@ -152,7 +168,9 @@ void activeExpireCycle(int type) { long total_expired = 0; for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) { - int expired; + /* Expired and checked in a single loop. */ + unsigned long expired, sampled; + redisDb *db = server.db+(current_db % server.dbnum); /* Increment the DB now so we are sure if we run out of time @@ -176,8 +194,8 @@ void activeExpireCycle(int type) { slots = dictSlots(db->expires); now = mstime(); - /* When there are less than 1% filled slots getting random - * keys is expensive, so stop here waiting for better times... + /* When there are less than 1% filled slots, sampling the key + * space is expensive, so stop here waiting for better times... * The dictionary will be resized asap. */ if (num && slots > DICT_HT_INITIAL_SIZE && (num*100/slots < 1)) break; @@ -185,27 +203,47 @@ void activeExpireCycle(int type) { /* The main collection cycle. Sample random keys among keys * with an expire set, checking for expired ones. */ expired = 0; + sampled = 0; ttl_sum = 0; ttl_samples = 0; - if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP) - num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP; + if (num > ACTIVE_EXPIRE_CYCLE_BUCKETS_PER_LOOP) + num = ACTIVE_EXPIRE_CYCLE_BUCKETS_PER_LOOP; - while (num--) { - dictEntry *de; - long long ttl; + /* Here we access the low level representation of the hash table + * for speed concerns: this makes this code coupled with dict.c, + * but it hardly changed in ten years. */ + while (sampled < num) { + for (int table = 0; table < 2; table++) { + if (table == 1 && !dictIsRehashing(db->expires)) break; - if ((de = dictGetRandomKey(db->expires)) == NULL) break; - ttl = dictGetSignedIntegerVal(de)-now; - if (activeExpireCycleTryExpire(db,de,now)) expired++; - if (ttl > 0) { - /* We want the average TTL of keys yet not expired. */ - ttl_sum += ttl; - ttl_samples++; + unsigned long idx = db->expires_cursor; + idx &= db->expires->ht[table].sizemask; + dictEntry *de = db->expires->ht[table].table[idx]; + long long ttl; + + /* Scan the current bucket of the current table. */ + while(de) { + /* Get the next entry now since this entry may get + * deleted. */ + dictEntry *e = de; + de = de->next; + + ttl = dictGetSignedIntegerVal(e)-now; + if (activeExpireCycleTryExpire(db,e,now)) expired++; + if (ttl > 0) { + /* We want the average TTL of keys yet + * not expired. */ + ttl_sum += ttl; + ttl_samples++; + } + sampled++; + } } - total_sampled++; + db->expires_cursor++; } total_expired += expired; + total_sampled += sampled; /* Update the average TTL stats for this database. */ if (ttl_samples) { @@ -229,9 +267,10 @@ void activeExpireCycle(int type) { break; } } - /* We don't repeat the cycle if there are less than 25% of keys - * found expired in the current DB. */ - } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4); + /* We don't repeat the cycle for the current database if there are + * less than 25% of keys found expired in the current DB. */ + // printf("[%d] Expired %d, sampled %d\n", type, (int) expired, (int) sampled); + } while (expired > sampled/4); } elapsed = ustime()-start;