From 29ca87955ec71ddf4525c52d86781222e1901c4f Mon Sep 17 00:00:00 2001 From: Chen Tianjie Date: Sat, 13 May 2023 01:13:15 +0800 Subject: [PATCH] Add basic eventloop latency measurement. (#11963) The measured latency(duration) includes the list below, which can be shown by `INFO STATS`. ``` eventloop_cycles // ever increasing counter eventloop_duration_sum // cumulative duration of eventloop in microseconds eventloop_duration_cmd_sum // cumulative duration of executing commands in microseconds instantaneous_eventloop_cycles_per_sec // average eventloop count per second in recent 1.6s instantaneous_eventloop_duration_usec // average single eventloop duration in recent 1.6s ``` Also added some experimental metrics, which are shown only when `INFO DEBUG` is called. This section isn't included in the default INFO, or even in `INFO ALL` and the fields in this section can change in the future without considering backwards compatibility. ``` eventloop_duration_aof_sum // cumulative duration of writing AOF eventloop_duration_cron_sum // cumulative duration cron jobs (serverCron, beforeSleep excluding IO and AOF) eventloop_cmd_per_cycle_max // max number of commands executed in one eventloop eventloop_duration_max // max duration of one eventloop ``` All of these are being reset by CONFIG RESETSTAT --- src/latency.c | 11 ++++ src/latency.h | 16 +++++ src/server.c | 142 ++++++++++++++++++++++++++++++++++---------- src/server.h | 17 +++++- tests/unit/info.tcl | 63 ++++++++++++++++++++ 5 files changed, 215 insertions(+), 34 deletions(-) diff --git a/src/latency.c b/src/latency.c index 3aaf16b5e..d46890e82 100644 --- a/src/latency.c +++ b/src/latency.c @@ -726,3 +726,14 @@ nodataerr: "No samples available for event '%s'", (char*) c->argv[2]->ptr); } +void durationAddSample(int type, monotime duration) { + if (type >= EL_DURATION_TYPE_NUM) { + return; + } + durationStats* ds = &server.duration_stats[type]; + ds->cnt++; + ds->sum += duration; + if (duration > ds->max) { + ds->max = duration; + } +} diff --git a/src/latency.h b/src/latency.h index 6f2a854dc..13503d5c0 100644 --- a/src/latency.h +++ b/src/latency.h @@ -89,4 +89,20 @@ void latencyAddSample(const char *event, mstime_t latency); #define latencyRemoveNestedEvent(event_var,nested_var) \ event_var += nested_var; +typedef struct durationStats { + unsigned long long cnt; + unsigned long long sum; + unsigned long long max; +} durationStats; + +typedef enum { + EL_DURATION_TYPE_EL = 0, // cumulative time duration metric of the whole eventloop + EL_DURATION_TYPE_CMD, // cumulative time duration metric of executing commands + EL_DURATION_TYPE_AOF, // cumulative time duration metric of flushing AOF in eventloop + EL_DURATION_TYPE_CRON, // cumulative time duration metric of cron (serverCron and beforeSleep, but excluding IO and AOF) + EL_DURATION_TYPE_NUM +} DurationType; + +void durationAddSample(int type, monotime duration); + #endif /* __LATENCY_H */ diff --git a/src/server.c b/src/server.c index b6ed1d9c5..9389e707f 100644 --- a/src/server.c +++ b/src/server.c @@ -694,22 +694,24 @@ int allPersistenceDisabled(void) { /* ======================= Cron: called every 100 ms ======================== */ -/* Add a sample to the operations per second array of samples. */ -void trackInstantaneousMetric(int metric, long long current_reading) { - long long now = mstime(); - long long t = now - server.inst_metric[metric].last_sample_time; - long long ops = current_reading - - server.inst_metric[metric].last_sample_count; - long long ops_sec; - - ops_sec = t > 0 ? (ops*1000/t) : 0; - - server.inst_metric[metric].samples[server.inst_metric[metric].idx] = - ops_sec; - server.inst_metric[metric].idx++; - server.inst_metric[metric].idx %= STATS_METRIC_SAMPLES; - server.inst_metric[metric].last_sample_time = now; - server.inst_metric[metric].last_sample_count = current_reading; +/* Add a sample to the instantaneous metric. This function computes the quotient + * of the increment of value and base, which is useful to record operation count + * per second, or the average time consumption of an operation. + * + * current_value - The dividend + * current_base - The divisor + * */ +void trackInstantaneousMetric(int metric, long long current_value, long long current_base, long long factor) { + if (server.inst_metric[metric].last_sample_base > 0) { + long long base = current_base - server.inst_metric[metric].last_sample_base; + long long value = current_value - server.inst_metric[metric].last_sample_value; + long long avg = base > 0 ? (value * factor / base) : 0; + server.inst_metric[metric].samples[server.inst_metric[metric].idx] = avg; + server.inst_metric[metric].idx++; + server.inst_metric[metric].idx %= STATS_METRIC_SAMPLES; + } + server.inst_metric[metric].last_sample_base = current_base; + server.inst_metric[metric].last_sample_value = current_value; } /* Return the mean of all the samples. */ @@ -1285,6 +1287,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { /* for debug purposes: skip actual cron work if pause_cron is on */ if (server.pause_cron) return 1000/server.hz; + monotime cron_start = getMonotonicUs(); + run_with_period(100) { long long stat_net_input_bytes, stat_net_output_bytes; long long stat_net_repl_input_bytes, stat_net_repl_output_bytes; @@ -1292,16 +1296,21 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { atomicGet(server.stat_net_output_bytes, stat_net_output_bytes); atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes); atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes); - - trackInstantaneousMetric(STATS_METRIC_COMMAND,server.stat_numcommands); - trackInstantaneousMetric(STATS_METRIC_NET_INPUT, - stat_net_input_bytes + stat_net_repl_input_bytes); - trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT, - stat_net_output_bytes + stat_net_repl_output_bytes); - trackInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION, - stat_net_repl_input_bytes); - trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION, - stat_net_repl_output_bytes); + monotime current_time = getMonotonicUs(); + long long factor = 1000000; // us + trackInstantaneousMetric(STATS_METRIC_COMMAND, server.stat_numcommands, current_time, factor); + trackInstantaneousMetric(STATS_METRIC_NET_INPUT, stat_net_input_bytes + stat_net_repl_input_bytes, + current_time, factor); + trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT, stat_net_output_bytes + stat_net_repl_output_bytes, + current_time, factor); + trackInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION, stat_net_repl_input_bytes, current_time, + factor); + trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION, stat_net_repl_output_bytes, + current_time, factor); + trackInstantaneousMetric(STATS_METRIC_EL_CYCLE, server.duration_stats[EL_DURATION_TYPE_EL].cnt, + current_time, factor); + trackInstantaneousMetric(STATS_METRIC_EL_DURATION, server.duration_stats[EL_DURATION_TYPE_EL].sum, + server.duration_stats[EL_DURATION_TYPE_EL].cnt, 1); } /* We have just LRU_BITS bits per object for LRU information. @@ -1514,6 +1523,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { &ei); server.cronloops++; + + server.el_cron_duration = getMonotonicUs() - cron_start; + return 1000/server.hz; } @@ -1649,6 +1661,11 @@ void beforeSleep(struct aeEventLoop *eventLoop) { /* If any connection type(typical TLS) still has pending unread data don't sleep at all. */ aeSetDontWait(server.el, connTypeHasPendingData()); + /* Record cron time in beforeSleep, which is the sum of active-expire, active-defrag and all other + * tasks done by cron and beforeSleep, but excluding read, write and AOF, that are counted by other + * sets of metrics. */ + monotime cron_start_time_before_aof = getMonotonicUs(); + /* Call the Redis Cluster before sleep function. Note that this function * may change the state of Redis Cluster (from ok to fail or vice versa), * so it's a good idea to call it before serving the unblocked clients @@ -1715,12 +1732,20 @@ void beforeSleep(struct aeEventLoop *eventLoop) { * since the unblocked clients may write data. */ handleClientsBlockedOnKeys(); + /* Record time consumption of AOF writing. */ + monotime aof_start_time = getMonotonicUs(); + /* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing below. */ + monotime duration_before_aof = aof_start_time - cron_start_time_before_aof; + /* Write the AOF buffer on disk, * must be done before handleClientsWithPendingWritesUsingThreads, * in case of appendfsync=always. */ if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) flushAppendOnlyFile(0); + /* Record time consumption of AOF writing. */ + durationAddSample(EL_DURATION_TYPE_AOF, getMonotonicUs() - aof_start_time); + /* Update the fsynced replica offset. * If an initial rewrite is in progress then not all data is guaranteed to have actually been * persisted to disk yet, so we cannot update the field. We will wait for the rewrite to complete. */ @@ -1733,6 +1758,9 @@ void beforeSleep(struct aeEventLoop *eventLoop) { /* Handle writes with pending output buffers. */ handleClientsWithPendingWritesUsingThreads(); + /* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing above. */ + monotime cron_start_time_after_write = getMonotonicUs(); + /* Close clients that need to be closed asynchronous */ freeClientsInAsyncFreeQueue(); @@ -1744,6 +1772,25 @@ void beforeSleep(struct aeEventLoop *eventLoop) { /* Disconnect some clients if they are consuming too much memory. */ evictClients(); + /* Record cron time in beforeSleep. */ + monotime duration_after_write = getMonotonicUs() - cron_start_time_after_write; + + /* Record eventloop latency. */ + if (server.el_start > 0) { + monotime el_duration = getMonotonicUs() - server.el_start; + durationAddSample(EL_DURATION_TYPE_EL, el_duration); + } + server.el_cron_duration += duration_before_aof + duration_after_write; + durationAddSample(EL_DURATION_TYPE_CRON, server.el_cron_duration); + server.el_cron_duration = 0; + /* Record max command count per cycle. */ + if (server.stat_numcommands > server.el_cmd_cnt_start) { + long long el_command_cnt = server.stat_numcommands - server.el_cmd_cnt_start; + if (el_command_cnt > server.el_cmd_cnt_max) { + server.el_cmd_cnt_max = el_command_cnt; + } + } + /* Before we are going to sleep, let the threads access the dataset by * releasing the GIL. Redis main thread will not touch anything at this * time. */ @@ -1774,6 +1821,10 @@ void afterSleep(struct aeEventLoop *eventLoop) { latencyEndMonitor(latency); latencyAddSampleIfNeeded("module-acquire-GIL",latency); } + /* Set the eventloop start time. */ + server.el_start = getMonotonicUs(); + /* Set the eventloop command count at start. */ + server.el_cmd_cnt_start = server.stat_numcommands; } /* Update the time cache. */ @@ -2494,8 +2545,8 @@ void resetServerStats(void) { atomicSet(server.stat_total_writes_processed, 0); for (j = 0; j < STATS_METRIC_COUNT; j++) { server.inst_metric[j].idx = 0; - server.inst_metric[j].last_sample_time = mstime(); - server.inst_metric[j].last_sample_count = 0; + server.inst_metric[j].last_sample_base = 0; + server.inst_metric[j].last_sample_value = 0; memset(server.inst_metric[j].samples,0, sizeof(server.inst_metric[j].samples)); } @@ -2512,6 +2563,8 @@ void resetServerStats(void) { server.aof_delayed_fsync = 0; server.stat_reply_buffer_shrinks = 0; server.stat_reply_buffer_expands = 0; + memset(server.duration_stats, 0, sizeof(durationStats) * EL_DURATION_TYPE_NUM); + server.el_cmd_cnt_max = 0; lazyfreeResetStats(); } @@ -3122,7 +3175,7 @@ struct redisCommand *lookupCommandBySdsLogic(dict *commands, sds s) { sds *strings = sdssplitlen(s,sdslen(s),"|",1,&argc); if (strings == NULL) return NULL; - if (argc > 2) { + if (argc < 1 || argc > 2) { /* Currently we support just one level of subcommands */ sdsfreesplitres(strings,argc); return NULL; @@ -3531,6 +3584,8 @@ void call(client *c, int flags) { char *latency_event = (real_cmd->flags & CMD_FAST) ? "fast-command" : "command"; latencyAddSampleIfNeeded(latency_event,duration/1000); + if (server.execution_nesting == 0) + durationAddSample(EL_DURATION_TYPE_CMD, duration); } /* Log the command into the Slow log if needed. @@ -5887,7 +5942,12 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) { "io_threaded_reads_processed:%lld\r\n" "io_threaded_writes_processed:%lld\r\n" "reply_buffer_shrinks:%lld\r\n" - "reply_buffer_expands:%lld\r\n", + "reply_buffer_expands:%lld\r\n" + "eventloop_cycles:%llu\r\n" + "eventloop_duration_sum:%llu\r\n" + "eventloop_duration_cmd_sum:%llu\r\n" + "instantaneous_eventloop_cycles_per_sec:%llu\r\n" + "instantaneous_eventloop_duration_usec:%llu\r\n", server.stat_numconnections, server.stat_numcommands, getInstantaneousMetric(STATS_METRIC_COMMAND), @@ -5937,7 +5997,12 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) { server.stat_io_reads_processed, server.stat_io_writes_processed, server.stat_reply_buffer_shrinks, - server.stat_reply_buffer_expands); + server.stat_reply_buffer_expands, + server.duration_stats[EL_DURATION_TYPE_EL].cnt, + server.duration_stats[EL_DURATION_TYPE_EL].sum, + server.duration_stats[EL_DURATION_TYPE_CMD].sum, + getInstantaneousMetric(STATS_METRIC_EL_CYCLE), + getInstantaneousMetric(STATS_METRIC_EL_DURATION)); info = genRedisInfoStringACLStats(info); } @@ -6187,6 +6252,21 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) { 0, /* not a crash report */ sections); } + + if (dictFind(section_dict, "debug") != NULL) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Debug\r\n" + "eventloop_duration_aof_sum:%llu\r\n" + "eventloop_duration_cron_sum:%llu\r\n" + "eventloop_duration_max:%llu\r\n" + "eventloop_cmd_per_cycle_max:%lld\r\n", + server.duration_stats[EL_DURATION_TYPE_AOF].sum, + server.duration_stats[EL_DURATION_TYPE_CRON].sum, + server.duration_stats[EL_DURATION_TYPE_EL].max, + server.el_cmd_cnt_max); + } + return info; } diff --git a/src/server.h b/src/server.h index af6847f13..5d37cd823 100644 --- a/src/server.h +++ b/src/server.h @@ -168,7 +168,9 @@ struct hdr_histogram; #define STATS_METRIC_NET_OUTPUT 2 /* Bytes written to network. */ #define STATS_METRIC_NET_INPUT_REPLICATION 3 /* Bytes read to network during replication. */ #define STATS_METRIC_NET_OUTPUT_REPLICATION 4 /* Bytes written to network during replication. */ -#define STATS_METRIC_COUNT 5 +#define STATS_METRIC_EL_CYCLE 5 /* Number of eventloop cycled. */ +#define STATS_METRIC_EL_DURATION 6 /* Eventloop duration. */ +#define STATS_METRIC_COUNT 7 /* Protocol and I/O related defines */ #define PROTO_IOBUF_LEN (1024*16) /* Generic I/O buffer size */ @@ -1694,13 +1696,22 @@ struct redisServer { /* The following two are used to track instantaneous metrics, like * number of operations per second, network traffic. */ struct { - long long last_sample_time; /* Timestamp of last sample in ms */ - long long last_sample_count;/* Count in last sample */ + long long last_sample_base; /* The divisor of last sample window */ + long long last_sample_value; /* The dividend of last sample window */ long long samples[STATS_METRIC_SAMPLES]; int idx; } inst_metric[STATS_METRIC_COUNT]; long long stat_reply_buffer_shrinks; /* Total number of output buffer shrinks */ long long stat_reply_buffer_expands; /* Total number of output buffer expands */ + monotime el_start; + /* The following two are used to record the max number of commands executed in one eventloop. + * Note that commands in transactions are also counted. */ + long long el_cmd_cnt_start; + long long el_cmd_cnt_max; + /* The sum of active-expire, active-defrag and all other tasks done by cron and beforeSleep, + but excluding read, write and AOF, which are counted by other sets of metrics. */ + monotime el_cron_duration; + durationStats duration_stats[EL_DURATION_TYPE_NUM]; /* Configuration */ int verbosity; /* Loglevel in redis.conf */ diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index 759e5bc0b..eb5eee4de 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -274,5 +274,68 @@ start_server {tags {"info" "external:skip"}} { $rd close } + test {stats: eventloop metrics} { + set info1 [r info stats] + set cycle1 [getInfoProperty $info1 eventloop_cycles] + set el_sum1 [getInfoProperty $info1 eventloop_duration_sum] + set cmd_sum1 [getInfoProperty $info1 eventloop_duration_cmd_sum] + assert_morethan $cycle1 0 + assert_morethan $el_sum1 0 + assert_morethan $cmd_sum1 0 + after 110 ;# default hz is 10, wait for a cron tick. + set info2 [r info stats] + set cycle2 [getInfoProperty $info2 eventloop_cycles] + set el_sum2 [getInfoProperty $info2 eventloop_duration_sum] + set cmd_sum2 [getInfoProperty $info2 eventloop_duration_cmd_sum] + assert_morethan $cycle2 $cycle1 + assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance + assert_morethan $el_sum2 $el_sum1 + assert_lessthan $el_sum2 [expr $el_sum1+5000] ;# we expect roughly 100ms here, but allow some tolerance + assert_morethan $cmd_sum2 $cmd_sum1 + assert_lessthan $cmd_sum2 [expr $cmd_sum1+3000] ;# we expect about tens of ms here, but allow some tolerance + } + + test {stats: instantaneous metrics} { + r config resetstat + after 1600 ;# hz is 10, wait for 16 cron tick so that sample array is fulfilled + set value [s instantaneous_eventloop_cycles_per_sec] + assert_morethan $value 0 + assert_lessthan $value 15 ;# default hz is 10 + set value [s instantaneous_eventloop_duration_usec] + assert_morethan $value 0 + assert_lessthan $value 1000 ;# default hz is 10, so duration < 1000 / 10, allow some tolerance + } + + test {stats: debug metrics} { + # make sure debug info is hidden + set info [r info] + assert_equal [getInfoProperty $info eventloop_duration_aof_sum] {} + set info_all [r info all] + assert_equal [getInfoProperty $info_all eventloop_duration_aof_sum] {} + + set info1 [r info debug] + + set aof1 [getInfoProperty $info1 eventloop_duration_aof_sum] + assert {$aof1 >= 0} + set cron1 [getInfoProperty $info1 eventloop_duration_cron_sum] + assert {$cron1 > 0} + set cycle_max1 [getInfoProperty $info1 eventloop_cmd_per_cycle_max] + assert {$cycle_max1 > 0} + set duration_max1 [getInfoProperty $info1 eventloop_duration_max] + assert {$duration_max1 > 0} + + after 110 ;# hz is 10, wait for a cron tick. + set info2 [r info debug] + + set aof2 [getInfoProperty $info2 eventloop_duration_aof_sum] + assert {$aof2 >= $aof1} ;# AOF is disabled, we expect $aof2 == $aof1, but allow some tolerance. + set cron2 [getInfoProperty $info2 eventloop_duration_cron_sum] + assert_morethan $cron2 $cron1 + set cycle_max2 [getInfoProperty $info2 eventloop_cmd_per_cycle_max] + assert {$cycle_max2 >= $cycle_max1} + set duration_max2 [getInfoProperty $info2 eventloop_duration_max] + assert {$duration_max2 >= $duration_max1} + } + } }