Fix slowdown due to child reporting CoW. (#8645)
Reading CoW from /proc/<pid>/smaps can be slow with large processes on some platforms. This measures the time it takes to read CoW info and limits the duty cycle of future updates to roughly 1/100. As current_cow_size no longer represnets a current, fixed interval value there is also a new current_cow_size_age field that provides information about the age of the size value, in seconds.
This commit is contained in:
parent
2f717c156a
commit
c3df27d1ea
@ -33,6 +33,7 @@
|
||||
typedef struct {
|
||||
size_t keys;
|
||||
size_t cow;
|
||||
monotime cow_updated;
|
||||
double progress;
|
||||
childInfoType information_type; /* Type of information */
|
||||
} child_info_data;
|
||||
@ -69,18 +70,39 @@ void closeChildInfoPipe(void) {
|
||||
void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress, char *pname) {
|
||||
if (server.child_info_pipe[1] == -1) return;
|
||||
|
||||
child_info_data data = {0}; /* zero everything, including padding to sattisfy valgrind */
|
||||
static monotime cow_updated = 0;
|
||||
static uint64_t cow_update_cost = 0;
|
||||
static size_t cow = 0;
|
||||
|
||||
child_info_data data = {0}; /* zero everything, including padding to satisfy valgrind */
|
||||
|
||||
/* When called to report current info, we need to throttle down CoW updates as they
|
||||
* can be very expensive. To do that, we measure the time it takes to get a reading
|
||||
* and schedule the next reading to happen not before time*CHILD_COW_COST_FACTOR
|
||||
* passes. */
|
||||
|
||||
monotime now = getMonotonicUs();
|
||||
if (info_type != CHILD_INFO_TYPE_CURRENT_INFO ||
|
||||
!cow_updated ||
|
||||
now - cow_updated > cow_update_cost * CHILD_COW_DUTY_CYCLE)
|
||||
{
|
||||
cow = zmalloc_get_private_dirty(-1);
|
||||
cow_updated = getMonotonicUs();
|
||||
cow_update_cost = cow_updated - now;
|
||||
|
||||
if (cow) {
|
||||
serverLog((info_type == CHILD_INFO_TYPE_CURRENT_INFO) ? LL_VERBOSE : LL_NOTICE,
|
||||
"%s: %zu MB of memory used by copy-on-write",
|
||||
pname, data.cow / (1024 * 1024));
|
||||
}
|
||||
}
|
||||
|
||||
data.information_type = info_type;
|
||||
data.keys = keys;
|
||||
data.cow = zmalloc_get_private_dirty(-1);
|
||||
data.cow = cow;
|
||||
data.cow_updated = cow_updated;
|
||||
data.progress = progress;
|
||||
|
||||
if (data.cow) {
|
||||
serverLog((info_type == CHILD_INFO_TYPE_CURRENT_INFO) ? LL_VERBOSE : LL_NOTICE,
|
||||
"%s: %zu MB of memory used by copy-on-write",
|
||||
pname, data.cow/(1024*1024));
|
||||
}
|
||||
|
||||
ssize_t wlen = sizeof(data);
|
||||
|
||||
if (write(server.child_info_pipe[1], &data, wlen) != wlen) {
|
||||
@ -89,9 +111,10 @@ void sendChildInfoGeneric(childInfoType info_type, size_t keys, double progress,
|
||||
}
|
||||
|
||||
/* Update Child info. */
|
||||
void updateChildInfo(childInfoType information_type, size_t cow, size_t keys, double progress) {
|
||||
void updateChildInfo(childInfoType information_type, size_t cow, monotime cow_updated, size_t keys, double progress) {
|
||||
if (information_type == CHILD_INFO_TYPE_CURRENT_INFO) {
|
||||
server.stat_current_cow_bytes = cow;
|
||||
server.stat_current_cow_updated = cow_updated;
|
||||
server.stat_current_save_keys_processed = keys;
|
||||
if (progress != -1) server.stat_module_progress = progress;
|
||||
} else if (information_type == CHILD_INFO_TYPE_AOF_COW_SIZE) {
|
||||
@ -107,7 +130,7 @@ void updateChildInfo(childInfoType information_type, size_t cow, size_t keys, do
|
||||
* if complete data read into the buffer,
|
||||
* data is stored into *buffer, and returns 1.
|
||||
* otherwise, the partial data is left in the buffer, waiting for the next read, and returns 0. */
|
||||
int readChildInfo(childInfoType *information_type, size_t *cow, size_t *keys, double* progress) {
|
||||
int readChildInfo(childInfoType *information_type, size_t *cow, monotime *cow_updated, size_t *keys, double* progress) {
|
||||
/* We are using here a static buffer in combination with the server.child_info_nread to handle short reads */
|
||||
static child_info_data buffer;
|
||||
ssize_t wlen = sizeof(buffer);
|
||||
@ -124,6 +147,7 @@ int readChildInfo(childInfoType *information_type, size_t *cow, size_t *keys, do
|
||||
if (server.child_info_nread == wlen) {
|
||||
*information_type = buffer.information_type;
|
||||
*cow = buffer.cow;
|
||||
*cow_updated = buffer.cow_updated;
|
||||
*keys = buffer.keys;
|
||||
*progress = buffer.progress;
|
||||
return 1;
|
||||
@ -137,12 +161,13 @@ void receiveChildInfo(void) {
|
||||
if (server.child_info_pipe[0] == -1) return;
|
||||
|
||||
size_t cow;
|
||||
monotime cow_updated;
|
||||
size_t keys;
|
||||
double progress;
|
||||
childInfoType information_type;
|
||||
|
||||
/* Drain the pipe and update child info so that we get the final message. */
|
||||
while (readChildInfo(&information_type, &cow, &keys, &progress)) {
|
||||
updateChildInfo(information_type, cow, keys, progress);
|
||||
while (readChildInfo(&information_type, &cow, &cow_updated, &keys, &progress)) {
|
||||
updateChildInfo(information_type, cow, cow_updated, keys, progress);
|
||||
}
|
||||
}
|
||||
|
@ -1614,6 +1614,7 @@ void resetChildState() {
|
||||
server.child_type = CHILD_TYPE_NONE;
|
||||
server.child_pid = -1;
|
||||
server.stat_current_cow_bytes = 0;
|
||||
server.stat_current_cow_updated = 0;
|
||||
server.stat_current_save_keys_processed = 0;
|
||||
server.stat_module_progress = 0;
|
||||
server.stat_current_save_keys_total = 0;
|
||||
@ -3267,6 +3268,7 @@ void initServer(void) {
|
||||
server.stat_starttime = time(NULL);
|
||||
server.stat_peak_memory = 0;
|
||||
server.stat_current_cow_bytes = 0;
|
||||
server.stat_current_cow_updated = 0;
|
||||
server.stat_current_save_keys_processed = 0;
|
||||
server.stat_current_save_keys_total = 0;
|
||||
server.stat_rdb_cow_bytes = 0;
|
||||
@ -4835,6 +4837,7 @@ sds genRedisInfoString(const char *section) {
|
||||
"# Persistence\r\n"
|
||||
"loading:%d\r\n"
|
||||
"current_cow_size:%zu\r\n"
|
||||
"current_cow_size_age:%lu\r\n"
|
||||
"current_fork_perc:%.2f\r\n"
|
||||
"current_save_keys_processed:%zu\r\n"
|
||||
"current_save_keys_total:%zu\r\n"
|
||||
@ -4857,6 +4860,7 @@ sds genRedisInfoString(const char *section) {
|
||||
"module_fork_last_cow_size:%zu\r\n",
|
||||
(int)server.loading,
|
||||
server.stat_current_cow_bytes,
|
||||
server.stat_current_cow_updated ? (unsigned long) elapsedMs(server.stat_current_cow_updated) / 1000 : 0,
|
||||
fork_perc,
|
||||
server.stat_current_save_keys_processed,
|
||||
server.stat_current_save_keys_total,
|
||||
@ -5829,6 +5833,7 @@ int redisFork(int purpose) {
|
||||
server.child_pid = childpid;
|
||||
server.child_type = purpose;
|
||||
server.stat_current_cow_bytes = 0;
|
||||
server.stat_current_cow_updated = 0;
|
||||
server.stat_current_save_keys_processed = 0;
|
||||
server.stat_module_progress = 0;
|
||||
server.stat_current_save_keys_total = dbTotalServerKeyCount();
|
||||
|
@ -130,6 +130,11 @@ typedef long long ustime_t; /* microsecond time type. */
|
||||
* special code. */
|
||||
#define SERVER_CHILD_NOERROR_RETVAL 255
|
||||
|
||||
/* Reading copy-on-write info is sometimes expensive and may slow down child
|
||||
* processes that report it continuously. We measure the cost of obtaining it
|
||||
* and hold back additional reading based on this factor. */
|
||||
#define CHILD_COW_DUTY_CYCLE 100
|
||||
|
||||
/* Instantaneous metrics tracking. */
|
||||
#define STATS_METRIC_SAMPLES 16 /* Number of samples per metric. */
|
||||
#define STATS_METRIC_COMMAND 0 /* Number of commands executed. */
|
||||
@ -1279,6 +1284,7 @@ struct redisServer {
|
||||
redisAtomic long long stat_net_input_bytes; /* Bytes read from network. */
|
||||
redisAtomic long long stat_net_output_bytes; /* Bytes written to network. */
|
||||
size_t stat_current_cow_bytes; /* Copy on write bytes while child is active. */
|
||||
monotime stat_current_cow_updated; /* Last update time of stat_current_cow_bytes */
|
||||
size_t stat_current_save_keys_processed; /* Processed keys while child is active. */
|
||||
size_t stat_current_save_keys_total; /* Number of keys when child started. */
|
||||
size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */
|
||||
|
@ -613,6 +613,11 @@ size_t zmalloc_get_smap_bytes_by_field(char *field, long pid) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Return the total number bytes in pages marked as Private Dirty.
|
||||
*
|
||||
* Note: depending on the platform and memory footprint of the process, this
|
||||
* call can be slow, exceeding 1000ms!
|
||||
*/
|
||||
size_t zmalloc_get_private_dirty(long pid) {
|
||||
return zmalloc_get_smap_bytes_by_field("Private_Dirty:",pid);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user