Fix slowdown due to child reporting CoW. (#8645)

Reading CoW from /proc/<pid>/smaps can be slow with large processes on
some platforms.

This measures the time it takes to read CoW info and limits the duty
cycle of future updates to roughly 1/100.

As current_cow_size no longer represnets a current, fixed interval value
there is also a new current_cow_size_age field that provides information
about the age of the size value, in seconds.
This commit is contained in:
Yossi Gottlieb 2021-03-22 13:25:58 +02:00 committed by GitHub
parent 2f717c156a
commit c3df27d1ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 53 additions and 12 deletions

View File

@ -33,6 +33,7 @@
typedef struct {
size_t keys;
size_t cow;
monotime cow_updated;
double progress;
childInfoType information_type; /* Type of information */
} child_info_data;
@ -69,18 +70,39 @@ void closeChildInfoPipe(void) {
void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress, char *pname) {
if (server.child_info_pipe[1] == -1) return;
child_info_data data = {0}; /* zero everything, including padding to sattisfy valgrind */
static monotime cow_updated = 0;
static uint64_t cow_update_cost = 0;
static size_t cow = 0;
child_info_data data = {0}; /* zero everything, including padding to satisfy valgrind */
/* When called to report current info, we need to throttle down CoW updates as they
* can be very expensive. To do that, we measure the time it takes to get a reading
* and schedule the next reading to happen not before time*CHILD_COW_COST_FACTOR
* passes. */
monotime now = getMonotonicUs();
if (info_type != CHILD_INFO_TYPE_CURRENT_INFO ||
!cow_updated ||
now - cow_updated > cow_update_cost * CHILD_COW_DUTY_CYCLE)
{
cow = zmalloc_get_private_dirty(-1);
cow_updated = getMonotonicUs();
cow_update_cost = cow_updated - now;
if (cow) {
serverLog((info_type == CHILD_INFO_TYPE_CURRENT_INFO) ? LL_VERBOSE : LL_NOTICE,
"%s: %zu MB of memory used by copy-on-write",
pname, data.cow / (1024 * 1024));
}
}
data.information_type = info_type;
data.keys = keys;
data.cow = zmalloc_get_private_dirty(-1);
data.cow = cow;
data.cow_updated = cow_updated;
data.progress = progress;
if (data.cow) {
serverLog((info_type == CHILD_INFO_TYPE_CURRENT_INFO) ? LL_VERBOSE : LL_NOTICE,
"%s: %zu MB of memory used by copy-on-write",
pname, data.cow/(1024*1024));
}
ssize_t wlen = sizeof(data);
if (write(server.child_info_pipe[1], &data, wlen) != wlen) {
@ -89,9 +111,10 @@ void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress,
}
/* Update Child info. */
void updateChildInfo(childInfoType information_type, size_t cow, size_t keys, double progress) {
void updateChildInfo(childInfoType information_type, size_t cow, monotime cow_updated, size_t keys, double progress) {
if (information_type == CHILD_INFO_TYPE_CURRENT_INFO) {
server.stat_current_cow_bytes = cow;
server.stat_current_cow_updated = cow_updated;
server.stat_current_save_keys_processed = keys;
if (progress != -1) server.stat_module_progress = progress;
} else if (information_type == CHILD_INFO_TYPE_AOF_COW_SIZE) {
@ -107,7 +130,7 @@ void updateChildInfo(childInfoType information_type, size_t cow, size_t keys, do
* if complete data read into the buffer,
* data is stored into *buffer, and returns 1.
* otherwise, the partial data is left in the buffer, waiting for the next read, and returns 0. */
int readChildInfo(childInfoType *information_type, size_t *cow, size_t *keys, double* progress) {
int readChildInfo(childInfoType *information_type, size_t *cow, monotime *cow_updated, size_t *keys, double* progress) {
/* We are using here a static buffer in combination with the server.child_info_nread to handle short reads */
static child_info_data buffer;
ssize_t wlen = sizeof(buffer);
@ -124,6 +147,7 @@ int readChildInfo(childInfoType *information_type, size_t *cow, size_t *keys, do
if (server.child_info_nread == wlen) {
*information_type = buffer.information_type;
*cow = buffer.cow;
*cow_updated = buffer.cow_updated;
*keys = buffer.keys;
*progress = buffer.progress;
return 1;
@ -137,12 +161,13 @@ void receiveChildInfo(void) {
if (server.child_info_pipe[0] == -1) return;
size_t cow;
monotime cow_updated;
size_t keys;
double progress;
childInfoType information_type;
/* Drain the pipe and update child info so that we get the final message. */
while (readChildInfo(&information_type, &cow, &keys, &progress)) {
updateChildInfo(information_type, cow, keys, progress);
while (readChildInfo(&information_type, &cow, &cow_updated, &keys, &progress)) {
updateChildInfo(information_type, cow, cow_updated, keys, progress);
}
}

View File

@ -1614,6 +1614,7 @@ void resetChildState() {
server.child_type = CHILD_TYPE_NONE;
server.child_pid = -1;
server.stat_current_cow_bytes = 0;
server.stat_current_cow_updated = 0;
server.stat_current_save_keys_processed = 0;
server.stat_module_progress = 0;
server.stat_current_save_keys_total = 0;
@ -3267,6 +3268,7 @@ void initServer(void) {
server.stat_starttime = time(NULL);
server.stat_peak_memory = 0;
server.stat_current_cow_bytes = 0;
server.stat_current_cow_updated = 0;
server.stat_current_save_keys_processed = 0;
server.stat_current_save_keys_total = 0;
server.stat_rdb_cow_bytes = 0;
@ -4835,6 +4837,7 @@ sds genRedisInfoString(const char *section) {
"# Persistence\r\n"
"loading:%d\r\n"
"current_cow_size:%zu\r\n"
"current_cow_size_age:%lu\r\n"
"current_fork_perc:%.2f\r\n"
"current_save_keys_processed:%zu\r\n"
"current_save_keys_total:%zu\r\n"
@ -4857,6 +4860,7 @@ sds genRedisInfoString(const char *section) {
"module_fork_last_cow_size:%zu\r\n",
(int)server.loading,
server.stat_current_cow_bytes,
server.stat_current_cow_updated ? (unsigned long) elapsedMs(server.stat_current_cow_updated) / 1000 : 0,
fork_perc,
server.stat_current_save_keys_processed,
server.stat_current_save_keys_total,
@ -5829,6 +5833,7 @@ int redisFork(int purpose) {
server.child_pid = childpid;
server.child_type = purpose;
server.stat_current_cow_bytes = 0;
server.stat_current_cow_updated = 0;
server.stat_current_save_keys_processed = 0;
server.stat_module_progress = 0;
server.stat_current_save_keys_total = dbTotalServerKeyCount();

View File

@ -130,6 +130,11 @@ typedef long long ustime_t; /* microsecond time type. */
* special code. */
#define SERVER_CHILD_NOERROR_RETVAL 255
/* Reading copy-on-write info is sometimes expensive and may slow down child
* processes that report it continuously. We measure the cost of obtaining it
* and hold back additional reading based on this factor. */
#define CHILD_COW_DUTY_CYCLE 100
/* Instantaneous metrics tracking. */
#define STATS_METRIC_SAMPLES 16 /* Number of samples per metric. */
#define STATS_METRIC_COMMAND 0 /* Number of commands executed. */
@ -1279,6 +1284,7 @@ struct redisServer {
redisAtomic long long stat_net_input_bytes; /* Bytes read from network. */
redisAtomic long long stat_net_output_bytes; /* Bytes written to network. */
size_t stat_current_cow_bytes; /* Copy on write bytes while child is active. */
monotime stat_current_cow_updated; /* Last update time of stat_current_cow_bytes */
size_t stat_current_save_keys_processed; /* Processed keys while child is active. */
size_t stat_current_save_keys_total; /* Number of keys when child started. */
size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */

View File

@ -613,6 +613,11 @@ size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
}
#endif
/* Return the total number bytes in pages marked as Private Dirty.
*
* Note: depending on the platform and memory footprint of the process, this
* call can be slow, exceeding 1000ms!
*/
size_t zmalloc_get_private_dirty(long pid) {
return zmalloc_get_smap_bytes_by_field("Private_Dirty:",pid);
}