Add basic eventloop latency measurement. (#11963)
The measured latency(duration) includes the list below, which can be shown by `INFO STATS`. ``` eventloop_cycles // ever increasing counter eventloop_duration_sum // cumulative duration of eventloop in microseconds eventloop_duration_cmd_sum // cumulative duration of executing commands in microseconds instantaneous_eventloop_cycles_per_sec // average eventloop count per second in recent 1.6s instantaneous_eventloop_duration_usec // average single eventloop duration in recent 1.6s ``` Also added some experimental metrics, which are shown only when `INFO DEBUG` is called. This section isn't included in the default INFO, or even in `INFO ALL` and the fields in this section can change in the future without considering backwards compatibility. ``` eventloop_duration_aof_sum // cumulative duration of writing AOF eventloop_duration_cron_sum // cumulative duration cron jobs (serverCron, beforeSleep excluding IO and AOF) eventloop_cmd_per_cycle_max // max number of commands executed in one eventloop eventloop_duration_max // max duration of one eventloop ``` All of these are being reset by CONFIG RESETSTAT
This commit is contained in:
parent
38b8440b39
commit
29ca87955e
@ -726,3 +726,14 @@ nodataerr:
|
|||||||
"No samples available for event '%s'", (char*) c->argv[2]->ptr);
|
"No samples available for event '%s'", (char*) c->argv[2]->ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void durationAddSample(int type, monotime duration) {
|
||||||
|
if (type >= EL_DURATION_TYPE_NUM) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
durationStats* ds = &server.duration_stats[type];
|
||||||
|
ds->cnt++;
|
||||||
|
ds->sum += duration;
|
||||||
|
if (duration > ds->max) {
|
||||||
|
ds->max = duration;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -89,4 +89,20 @@ void latencyAddSample(const char *event, mstime_t latency);
|
|||||||
#define latencyRemoveNestedEvent(event_var,nested_var) \
|
#define latencyRemoveNestedEvent(event_var,nested_var) \
|
||||||
event_var += nested_var;
|
event_var += nested_var;
|
||||||
|
|
||||||
|
typedef struct durationStats {
|
||||||
|
unsigned long long cnt;
|
||||||
|
unsigned long long sum;
|
||||||
|
unsigned long long max;
|
||||||
|
} durationStats;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
EL_DURATION_TYPE_EL = 0, // cumulative time duration metric of the whole eventloop
|
||||||
|
EL_DURATION_TYPE_CMD, // cumulative time duration metric of executing commands
|
||||||
|
EL_DURATION_TYPE_AOF, // cumulative time duration metric of flushing AOF in eventloop
|
||||||
|
EL_DURATION_TYPE_CRON, // cumulative time duration metric of cron (serverCron and beforeSleep, but excluding IO and AOF)
|
||||||
|
EL_DURATION_TYPE_NUM
|
||||||
|
} DurationType;
|
||||||
|
|
||||||
|
void durationAddSample(int type, monotime duration);
|
||||||
|
|
||||||
#endif /* __LATENCY_H */
|
#endif /* __LATENCY_H */
|
||||||
|
138
src/server.c
138
src/server.c
@ -694,22 +694,24 @@ int allPersistenceDisabled(void) {
|
|||||||
|
|
||||||
/* ======================= Cron: called every 100 ms ======================== */
|
/* ======================= Cron: called every 100 ms ======================== */
|
||||||
|
|
||||||
/* Add a sample to the operations per second array of samples. */
|
/* Add a sample to the instantaneous metric. This function computes the quotient
|
||||||
void trackInstantaneousMetric(int metric, long long current_reading) {
|
* of the increment of value and base, which is useful to record operation count
|
||||||
long long now = mstime();
|
* per second, or the average time consumption of an operation.
|
||||||
long long t = now - server.inst_metric[metric].last_sample_time;
|
*
|
||||||
long long ops = current_reading -
|
* current_value - The dividend
|
||||||
server.inst_metric[metric].last_sample_count;
|
* current_base - The divisor
|
||||||
long long ops_sec;
|
* */
|
||||||
|
void trackInstantaneousMetric(int metric, long long current_value, long long current_base, long long factor) {
|
||||||
ops_sec = t > 0 ? (ops*1000/t) : 0;
|
if (server.inst_metric[metric].last_sample_base > 0) {
|
||||||
|
long long base = current_base - server.inst_metric[metric].last_sample_base;
|
||||||
server.inst_metric[metric].samples[server.inst_metric[metric].idx] =
|
long long value = current_value - server.inst_metric[metric].last_sample_value;
|
||||||
ops_sec;
|
long long avg = base > 0 ? (value * factor / base) : 0;
|
||||||
|
server.inst_metric[metric].samples[server.inst_metric[metric].idx] = avg;
|
||||||
server.inst_metric[metric].idx++;
|
server.inst_metric[metric].idx++;
|
||||||
server.inst_metric[metric].idx %= STATS_METRIC_SAMPLES;
|
server.inst_metric[metric].idx %= STATS_METRIC_SAMPLES;
|
||||||
server.inst_metric[metric].last_sample_time = now;
|
}
|
||||||
server.inst_metric[metric].last_sample_count = current_reading;
|
server.inst_metric[metric].last_sample_base = current_base;
|
||||||
|
server.inst_metric[metric].last_sample_value = current_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return the mean of all the samples. */
|
/* Return the mean of all the samples. */
|
||||||
@ -1285,6 +1287,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
|
|||||||
/* for debug purposes: skip actual cron work if pause_cron is on */
|
/* for debug purposes: skip actual cron work if pause_cron is on */
|
||||||
if (server.pause_cron) return 1000/server.hz;
|
if (server.pause_cron) return 1000/server.hz;
|
||||||
|
|
||||||
|
monotime cron_start = getMonotonicUs();
|
||||||
|
|
||||||
run_with_period(100) {
|
run_with_period(100) {
|
||||||
long long stat_net_input_bytes, stat_net_output_bytes;
|
long long stat_net_input_bytes, stat_net_output_bytes;
|
||||||
long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
|
long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
|
||||||
@ -1292,16 +1296,21 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
|
|||||||
atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
|
atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
|
||||||
atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
|
atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
|
||||||
atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
|
atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
|
||||||
|
monotime current_time = getMonotonicUs();
|
||||||
trackInstantaneousMetric(STATS_METRIC_COMMAND,server.stat_numcommands);
|
long long factor = 1000000; // us
|
||||||
trackInstantaneousMetric(STATS_METRIC_NET_INPUT,
|
trackInstantaneousMetric(STATS_METRIC_COMMAND, server.stat_numcommands, current_time, factor);
|
||||||
stat_net_input_bytes + stat_net_repl_input_bytes);
|
trackInstantaneousMetric(STATS_METRIC_NET_INPUT, stat_net_input_bytes + stat_net_repl_input_bytes,
|
||||||
trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT,
|
current_time, factor);
|
||||||
stat_net_output_bytes + stat_net_repl_output_bytes);
|
trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT, stat_net_output_bytes + stat_net_repl_output_bytes,
|
||||||
trackInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION,
|
current_time, factor);
|
||||||
stat_net_repl_input_bytes);
|
trackInstantaneousMetric(STATS_METRIC_NET_INPUT_REPLICATION, stat_net_repl_input_bytes, current_time,
|
||||||
trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION,
|
factor);
|
||||||
stat_net_repl_output_bytes);
|
trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT_REPLICATION, stat_net_repl_output_bytes,
|
||||||
|
current_time, factor);
|
||||||
|
trackInstantaneousMetric(STATS_METRIC_EL_CYCLE, server.duration_stats[EL_DURATION_TYPE_EL].cnt,
|
||||||
|
current_time, factor);
|
||||||
|
trackInstantaneousMetric(STATS_METRIC_EL_DURATION, server.duration_stats[EL_DURATION_TYPE_EL].sum,
|
||||||
|
server.duration_stats[EL_DURATION_TYPE_EL].cnt, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We have just LRU_BITS bits per object for LRU information.
|
/* We have just LRU_BITS bits per object for LRU information.
|
||||||
@ -1514,6 +1523,9 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
|
|||||||
&ei);
|
&ei);
|
||||||
|
|
||||||
server.cronloops++;
|
server.cronloops++;
|
||||||
|
|
||||||
|
server.el_cron_duration = getMonotonicUs() - cron_start;
|
||||||
|
|
||||||
return 1000/server.hz;
|
return 1000/server.hz;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1649,6 +1661,11 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
|
|||||||
/* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
|
/* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
|
||||||
aeSetDontWait(server.el, connTypeHasPendingData());
|
aeSetDontWait(server.el, connTypeHasPendingData());
|
||||||
|
|
||||||
|
/* Record cron time in beforeSleep, which is the sum of active-expire, active-defrag and all other
|
||||||
|
* tasks done by cron and beforeSleep, but excluding read, write and AOF, that are counted by other
|
||||||
|
* sets of metrics. */
|
||||||
|
monotime cron_start_time_before_aof = getMonotonicUs();
|
||||||
|
|
||||||
/* Call the Redis Cluster before sleep function. Note that this function
|
/* Call the Redis Cluster before sleep function. Note that this function
|
||||||
* may change the state of Redis Cluster (from ok to fail or vice versa),
|
* may change the state of Redis Cluster (from ok to fail or vice versa),
|
||||||
* so it's a good idea to call it before serving the unblocked clients
|
* so it's a good idea to call it before serving the unblocked clients
|
||||||
@ -1715,12 +1732,20 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
|
|||||||
* since the unblocked clients may write data. */
|
* since the unblocked clients may write data. */
|
||||||
handleClientsBlockedOnKeys();
|
handleClientsBlockedOnKeys();
|
||||||
|
|
||||||
|
/* Record time consumption of AOF writing. */
|
||||||
|
monotime aof_start_time = getMonotonicUs();
|
||||||
|
/* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing below. */
|
||||||
|
monotime duration_before_aof = aof_start_time - cron_start_time_before_aof;
|
||||||
|
|
||||||
/* Write the AOF buffer on disk,
|
/* Write the AOF buffer on disk,
|
||||||
* must be done before handleClientsWithPendingWritesUsingThreads,
|
* must be done before handleClientsWithPendingWritesUsingThreads,
|
||||||
* in case of appendfsync=always. */
|
* in case of appendfsync=always. */
|
||||||
if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
|
if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
|
||||||
flushAppendOnlyFile(0);
|
flushAppendOnlyFile(0);
|
||||||
|
|
||||||
|
/* Record time consumption of AOF writing. */
|
||||||
|
durationAddSample(EL_DURATION_TYPE_AOF, getMonotonicUs() - aof_start_time);
|
||||||
|
|
||||||
/* Update the fsynced replica offset.
|
/* Update the fsynced replica offset.
|
||||||
* If an initial rewrite is in progress then not all data is guaranteed to have actually been
|
* If an initial rewrite is in progress then not all data is guaranteed to have actually been
|
||||||
* persisted to disk yet, so we cannot update the field. We will wait for the rewrite to complete. */
|
* persisted to disk yet, so we cannot update the field. We will wait for the rewrite to complete. */
|
||||||
@ -1733,6 +1758,9 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
|
|||||||
/* Handle writes with pending output buffers. */
|
/* Handle writes with pending output buffers. */
|
||||||
handleClientsWithPendingWritesUsingThreads();
|
handleClientsWithPendingWritesUsingThreads();
|
||||||
|
|
||||||
|
/* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing above. */
|
||||||
|
monotime cron_start_time_after_write = getMonotonicUs();
|
||||||
|
|
||||||
/* Close clients that need to be closed asynchronous */
|
/* Close clients that need to be closed asynchronous */
|
||||||
freeClientsInAsyncFreeQueue();
|
freeClientsInAsyncFreeQueue();
|
||||||
|
|
||||||
@ -1744,6 +1772,25 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
|
|||||||
/* Disconnect some clients if they are consuming too much memory. */
|
/* Disconnect some clients if they are consuming too much memory. */
|
||||||
evictClients();
|
evictClients();
|
||||||
|
|
||||||
|
/* Record cron time in beforeSleep. */
|
||||||
|
monotime duration_after_write = getMonotonicUs() - cron_start_time_after_write;
|
||||||
|
|
||||||
|
/* Record eventloop latency. */
|
||||||
|
if (server.el_start > 0) {
|
||||||
|
monotime el_duration = getMonotonicUs() - server.el_start;
|
||||||
|
durationAddSample(EL_DURATION_TYPE_EL, el_duration);
|
||||||
|
}
|
||||||
|
server.el_cron_duration += duration_before_aof + duration_after_write;
|
||||||
|
durationAddSample(EL_DURATION_TYPE_CRON, server.el_cron_duration);
|
||||||
|
server.el_cron_duration = 0;
|
||||||
|
/* Record max command count per cycle. */
|
||||||
|
if (server.stat_numcommands > server.el_cmd_cnt_start) {
|
||||||
|
long long el_command_cnt = server.stat_numcommands - server.el_cmd_cnt_start;
|
||||||
|
if (el_command_cnt > server.el_cmd_cnt_max) {
|
||||||
|
server.el_cmd_cnt_max = el_command_cnt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Before we are going to sleep, let the threads access the dataset by
|
/* Before we are going to sleep, let the threads access the dataset by
|
||||||
* releasing the GIL. Redis main thread will not touch anything at this
|
* releasing the GIL. Redis main thread will not touch anything at this
|
||||||
* time. */
|
* time. */
|
||||||
@ -1774,6 +1821,10 @@ void afterSleep(struct aeEventLoop *eventLoop) {
|
|||||||
latencyEndMonitor(latency);
|
latencyEndMonitor(latency);
|
||||||
latencyAddSampleIfNeeded("module-acquire-GIL",latency);
|
latencyAddSampleIfNeeded("module-acquire-GIL",latency);
|
||||||
}
|
}
|
||||||
|
/* Set the eventloop start time. */
|
||||||
|
server.el_start = getMonotonicUs();
|
||||||
|
/* Set the eventloop command count at start. */
|
||||||
|
server.el_cmd_cnt_start = server.stat_numcommands;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Update the time cache. */
|
/* Update the time cache. */
|
||||||
@ -2494,8 +2545,8 @@ void resetServerStats(void) {
|
|||||||
atomicSet(server.stat_total_writes_processed, 0);
|
atomicSet(server.stat_total_writes_processed, 0);
|
||||||
for (j = 0; j < STATS_METRIC_COUNT; j++) {
|
for (j = 0; j < STATS_METRIC_COUNT; j++) {
|
||||||
server.inst_metric[j].idx = 0;
|
server.inst_metric[j].idx = 0;
|
||||||
server.inst_metric[j].last_sample_time = mstime();
|
server.inst_metric[j].last_sample_base = 0;
|
||||||
server.inst_metric[j].last_sample_count = 0;
|
server.inst_metric[j].last_sample_value = 0;
|
||||||
memset(server.inst_metric[j].samples,0,
|
memset(server.inst_metric[j].samples,0,
|
||||||
sizeof(server.inst_metric[j].samples));
|
sizeof(server.inst_metric[j].samples));
|
||||||
}
|
}
|
||||||
@ -2512,6 +2563,8 @@ void resetServerStats(void) {
|
|||||||
server.aof_delayed_fsync = 0;
|
server.aof_delayed_fsync = 0;
|
||||||
server.stat_reply_buffer_shrinks = 0;
|
server.stat_reply_buffer_shrinks = 0;
|
||||||
server.stat_reply_buffer_expands = 0;
|
server.stat_reply_buffer_expands = 0;
|
||||||
|
memset(server.duration_stats, 0, sizeof(durationStats) * EL_DURATION_TYPE_NUM);
|
||||||
|
server.el_cmd_cnt_max = 0;
|
||||||
lazyfreeResetStats();
|
lazyfreeResetStats();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3122,7 +3175,7 @@ struct redisCommand *lookupCommandBySdsLogic(dict *commands, sds s) {
|
|||||||
sds *strings = sdssplitlen(s,sdslen(s),"|",1,&argc);
|
sds *strings = sdssplitlen(s,sdslen(s),"|",1,&argc);
|
||||||
if (strings == NULL)
|
if (strings == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (argc > 2) {
|
if (argc < 1 || argc > 2) {
|
||||||
/* Currently we support just one level of subcommands */
|
/* Currently we support just one level of subcommands */
|
||||||
sdsfreesplitres(strings,argc);
|
sdsfreesplitres(strings,argc);
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -3531,6 +3584,8 @@ void call(client *c, int flags) {
|
|||||||
char *latency_event = (real_cmd->flags & CMD_FAST) ?
|
char *latency_event = (real_cmd->flags & CMD_FAST) ?
|
||||||
"fast-command" : "command";
|
"fast-command" : "command";
|
||||||
latencyAddSampleIfNeeded(latency_event,duration/1000);
|
latencyAddSampleIfNeeded(latency_event,duration/1000);
|
||||||
|
if (server.execution_nesting == 0)
|
||||||
|
durationAddSample(EL_DURATION_TYPE_CMD, duration);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Log the command into the Slow log if needed.
|
/* Log the command into the Slow log if needed.
|
||||||
@ -5887,7 +5942,12 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
|
|||||||
"io_threaded_reads_processed:%lld\r\n"
|
"io_threaded_reads_processed:%lld\r\n"
|
||||||
"io_threaded_writes_processed:%lld\r\n"
|
"io_threaded_writes_processed:%lld\r\n"
|
||||||
"reply_buffer_shrinks:%lld\r\n"
|
"reply_buffer_shrinks:%lld\r\n"
|
||||||
"reply_buffer_expands:%lld\r\n",
|
"reply_buffer_expands:%lld\r\n"
|
||||||
|
"eventloop_cycles:%llu\r\n"
|
||||||
|
"eventloop_duration_sum:%llu\r\n"
|
||||||
|
"eventloop_duration_cmd_sum:%llu\r\n"
|
||||||
|
"instantaneous_eventloop_cycles_per_sec:%llu\r\n"
|
||||||
|
"instantaneous_eventloop_duration_usec:%llu\r\n",
|
||||||
server.stat_numconnections,
|
server.stat_numconnections,
|
||||||
server.stat_numcommands,
|
server.stat_numcommands,
|
||||||
getInstantaneousMetric(STATS_METRIC_COMMAND),
|
getInstantaneousMetric(STATS_METRIC_COMMAND),
|
||||||
@ -5937,7 +5997,12 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
|
|||||||
server.stat_io_reads_processed,
|
server.stat_io_reads_processed,
|
||||||
server.stat_io_writes_processed,
|
server.stat_io_writes_processed,
|
||||||
server.stat_reply_buffer_shrinks,
|
server.stat_reply_buffer_shrinks,
|
||||||
server.stat_reply_buffer_expands);
|
server.stat_reply_buffer_expands,
|
||||||
|
server.duration_stats[EL_DURATION_TYPE_EL].cnt,
|
||||||
|
server.duration_stats[EL_DURATION_TYPE_EL].sum,
|
||||||
|
server.duration_stats[EL_DURATION_TYPE_CMD].sum,
|
||||||
|
getInstantaneousMetric(STATS_METRIC_EL_CYCLE),
|
||||||
|
getInstantaneousMetric(STATS_METRIC_EL_DURATION));
|
||||||
info = genRedisInfoStringACLStats(info);
|
info = genRedisInfoStringACLStats(info);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6187,6 +6252,21 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
|
|||||||
0, /* not a crash report */
|
0, /* not a crash report */
|
||||||
sections);
|
sections);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dictFind(section_dict, "debug") != NULL) {
|
||||||
|
if (sections++) info = sdscat(info,"\r\n");
|
||||||
|
info = sdscatprintf(info,
|
||||||
|
"# Debug\r\n"
|
||||||
|
"eventloop_duration_aof_sum:%llu\r\n"
|
||||||
|
"eventloop_duration_cron_sum:%llu\r\n"
|
||||||
|
"eventloop_duration_max:%llu\r\n"
|
||||||
|
"eventloop_cmd_per_cycle_max:%lld\r\n",
|
||||||
|
server.duration_stats[EL_DURATION_TYPE_AOF].sum,
|
||||||
|
server.duration_stats[EL_DURATION_TYPE_CRON].sum,
|
||||||
|
server.duration_stats[EL_DURATION_TYPE_EL].max,
|
||||||
|
server.el_cmd_cnt_max);
|
||||||
|
}
|
||||||
|
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
17
src/server.h
17
src/server.h
@ -168,7 +168,9 @@ struct hdr_histogram;
|
|||||||
#define STATS_METRIC_NET_OUTPUT 2 /* Bytes written to network. */
|
#define STATS_METRIC_NET_OUTPUT 2 /* Bytes written to network. */
|
||||||
#define STATS_METRIC_NET_INPUT_REPLICATION 3 /* Bytes read to network during replication. */
|
#define STATS_METRIC_NET_INPUT_REPLICATION 3 /* Bytes read to network during replication. */
|
||||||
#define STATS_METRIC_NET_OUTPUT_REPLICATION 4 /* Bytes written to network during replication. */
|
#define STATS_METRIC_NET_OUTPUT_REPLICATION 4 /* Bytes written to network during replication. */
|
||||||
#define STATS_METRIC_COUNT 5
|
#define STATS_METRIC_EL_CYCLE 5 /* Number of eventloop cycled. */
|
||||||
|
#define STATS_METRIC_EL_DURATION 6 /* Eventloop duration. */
|
||||||
|
#define STATS_METRIC_COUNT 7
|
||||||
|
|
||||||
/* Protocol and I/O related defines */
|
/* Protocol and I/O related defines */
|
||||||
#define PROTO_IOBUF_LEN (1024*16) /* Generic I/O buffer size */
|
#define PROTO_IOBUF_LEN (1024*16) /* Generic I/O buffer size */
|
||||||
@ -1694,13 +1696,22 @@ struct redisServer {
|
|||||||
/* The following two are used to track instantaneous metrics, like
|
/* The following two are used to track instantaneous metrics, like
|
||||||
* number of operations per second, network traffic. */
|
* number of operations per second, network traffic. */
|
||||||
struct {
|
struct {
|
||||||
long long last_sample_time; /* Timestamp of last sample in ms */
|
long long last_sample_base; /* The divisor of last sample window */
|
||||||
long long last_sample_count;/* Count in last sample */
|
long long last_sample_value; /* The dividend of last sample window */
|
||||||
long long samples[STATS_METRIC_SAMPLES];
|
long long samples[STATS_METRIC_SAMPLES];
|
||||||
int idx;
|
int idx;
|
||||||
} inst_metric[STATS_METRIC_COUNT];
|
} inst_metric[STATS_METRIC_COUNT];
|
||||||
long long stat_reply_buffer_shrinks; /* Total number of output buffer shrinks */
|
long long stat_reply_buffer_shrinks; /* Total number of output buffer shrinks */
|
||||||
long long stat_reply_buffer_expands; /* Total number of output buffer expands */
|
long long stat_reply_buffer_expands; /* Total number of output buffer expands */
|
||||||
|
monotime el_start;
|
||||||
|
/* The following two are used to record the max number of commands executed in one eventloop.
|
||||||
|
* Note that commands in transactions are also counted. */
|
||||||
|
long long el_cmd_cnt_start;
|
||||||
|
long long el_cmd_cnt_max;
|
||||||
|
/* The sum of active-expire, active-defrag and all other tasks done by cron and beforeSleep,
|
||||||
|
but excluding read, write and AOF, which are counted by other sets of metrics. */
|
||||||
|
monotime el_cron_duration;
|
||||||
|
durationStats duration_stats[EL_DURATION_TYPE_NUM];
|
||||||
|
|
||||||
/* Configuration */
|
/* Configuration */
|
||||||
int verbosity; /* Loglevel in redis.conf */
|
int verbosity; /* Loglevel in redis.conf */
|
||||||
|
@ -274,5 +274,68 @@ start_server {tags {"info" "external:skip"}} {
|
|||||||
$rd close
|
$rd close
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test {stats: eventloop metrics} {
|
||||||
|
set info1 [r info stats]
|
||||||
|
set cycle1 [getInfoProperty $info1 eventloop_cycles]
|
||||||
|
set el_sum1 [getInfoProperty $info1 eventloop_duration_sum]
|
||||||
|
set cmd_sum1 [getInfoProperty $info1 eventloop_duration_cmd_sum]
|
||||||
|
assert_morethan $cycle1 0
|
||||||
|
assert_morethan $el_sum1 0
|
||||||
|
assert_morethan $cmd_sum1 0
|
||||||
|
after 110 ;# default hz is 10, wait for a cron tick.
|
||||||
|
set info2 [r info stats]
|
||||||
|
set cycle2 [getInfoProperty $info2 eventloop_cycles]
|
||||||
|
set el_sum2 [getInfoProperty $info2 eventloop_duration_sum]
|
||||||
|
set cmd_sum2 [getInfoProperty $info2 eventloop_duration_cmd_sum]
|
||||||
|
assert_morethan $cycle2 $cycle1
|
||||||
|
assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance
|
||||||
|
assert_morethan $el_sum2 $el_sum1
|
||||||
|
assert_lessthan $el_sum2 [expr $el_sum1+5000] ;# we expect roughly 100ms here, but allow some tolerance
|
||||||
|
assert_morethan $cmd_sum2 $cmd_sum1
|
||||||
|
assert_lessthan $cmd_sum2 [expr $cmd_sum1+3000] ;# we expect about tens of ms here, but allow some tolerance
|
||||||
|
}
|
||||||
|
|
||||||
|
test {stats: instantaneous metrics} {
|
||||||
|
r config resetstat
|
||||||
|
after 1600 ;# hz is 10, wait for 16 cron tick so that sample array is fulfilled
|
||||||
|
set value [s instantaneous_eventloop_cycles_per_sec]
|
||||||
|
assert_morethan $value 0
|
||||||
|
assert_lessthan $value 15 ;# default hz is 10
|
||||||
|
set value [s instantaneous_eventloop_duration_usec]
|
||||||
|
assert_morethan $value 0
|
||||||
|
assert_lessthan $value 1000 ;# default hz is 10, so duration < 1000 / 10, allow some tolerance
|
||||||
|
}
|
||||||
|
|
||||||
|
test {stats: debug metrics} {
|
||||||
|
# make sure debug info is hidden
|
||||||
|
set info [r info]
|
||||||
|
assert_equal [getInfoProperty $info eventloop_duration_aof_sum] {}
|
||||||
|
set info_all [r info all]
|
||||||
|
assert_equal [getInfoProperty $info_all eventloop_duration_aof_sum] {}
|
||||||
|
|
||||||
|
set info1 [r info debug]
|
||||||
|
|
||||||
|
set aof1 [getInfoProperty $info1 eventloop_duration_aof_sum]
|
||||||
|
assert {$aof1 >= 0}
|
||||||
|
set cron1 [getInfoProperty $info1 eventloop_duration_cron_sum]
|
||||||
|
assert {$cron1 > 0}
|
||||||
|
set cycle_max1 [getInfoProperty $info1 eventloop_cmd_per_cycle_max]
|
||||||
|
assert {$cycle_max1 > 0}
|
||||||
|
set duration_max1 [getInfoProperty $info1 eventloop_duration_max]
|
||||||
|
assert {$duration_max1 > 0}
|
||||||
|
|
||||||
|
after 110 ;# hz is 10, wait for a cron tick.
|
||||||
|
set info2 [r info debug]
|
||||||
|
|
||||||
|
set aof2 [getInfoProperty $info2 eventloop_duration_aof_sum]
|
||||||
|
assert {$aof2 >= $aof1} ;# AOF is disabled, we expect $aof2 == $aof1, but allow some tolerance.
|
||||||
|
set cron2 [getInfoProperty $info2 eventloop_duration_cron_sum]
|
||||||
|
assert_morethan $cron2 $cron1
|
||||||
|
set cycle_max2 [getInfoProperty $info2 eventloop_cmd_per_cycle_max]
|
||||||
|
assert {$cycle_max2 >= $cycle_max1}
|
||||||
|
set duration_max2 [getInfoProperty $info2 eventloop_duration_max]
|
||||||
|
assert {$duration_max2 >= $duration_max1}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user