Add latency stats around cluster config file operations (#1534)
When the cluster changes, we need to persist the cluster configuration, and these file IO operations may cause latency. Signed-off-by: Binbin <binloveplay1314@qq.com>
This commit is contained in:
parent
10357ceda5
commit
11cb8ee27c
@ -817,6 +817,7 @@ int clusterSaveConfig(int do_fsync) {
|
||||
ssize_t written_bytes;
|
||||
int fd = -1;
|
||||
int retval = C_ERR;
|
||||
mstime_t latency;
|
||||
|
||||
server.cluster->todo_before_sleep &= ~CLUSTER_TODO_SAVE_CONFIG;
|
||||
|
||||
@ -830,11 +831,15 @@ int clusterSaveConfig(int do_fsync) {
|
||||
|
||||
/* Create a temp file with the new content. */
|
||||
tmpfilename = sdscatfmt(sdsempty(), "%s.tmp-%i-%I", server.cluster_configfile, (int)getpid(), mstime());
|
||||
latencyStartMonitor(latency);
|
||||
if ((fd = open(tmpfilename, O_WRONLY | O_CREAT, 0644)) == -1) {
|
||||
serverLog(LL_WARNING, "Could not open temp cluster config file: %s", strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
latencyEndMonitor(latency);
|
||||
latencyAddSampleIfNeeded("cluster-config-open", latency);
|
||||
|
||||
latencyStartMonitor(latency);
|
||||
while (offset < content_size) {
|
||||
written_bytes = write(fd, ci + offset, content_size - offset);
|
||||
if (written_bytes <= 0) {
|
||||
@ -845,31 +850,52 @@ int clusterSaveConfig(int do_fsync) {
|
||||
}
|
||||
offset += written_bytes;
|
||||
}
|
||||
latencyEndMonitor(latency);
|
||||
latencyAddSampleIfNeeded("cluster-config-write", latency);
|
||||
|
||||
if (do_fsync) {
|
||||
latencyStartMonitor(latency);
|
||||
server.cluster->todo_before_sleep &= ~CLUSTER_TODO_FSYNC_CONFIG;
|
||||
if (valkey_fsync(fd) == -1) {
|
||||
serverLog(LL_WARNING, "Could not sync tmp cluster config file: %s", strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
latencyEndMonitor(latency);
|
||||
latencyAddSampleIfNeeded("cluster-config-fsync", latency);
|
||||
}
|
||||
|
||||
latencyStartMonitor(latency);
|
||||
if (rename(tmpfilename, server.cluster_configfile) == -1) {
|
||||
serverLog(LL_WARNING, "Could not rename tmp cluster config file: %s", strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
latencyEndMonitor(latency);
|
||||
latencyAddSampleIfNeeded("cluster-config-rename", latency);
|
||||
|
||||
if (do_fsync) {
|
||||
latencyStartMonitor(latency);
|
||||
if (fsyncFileDir(server.cluster_configfile) == -1) {
|
||||
serverLog(LL_WARNING, "Could not sync cluster config file dir: %s", strerror(errno));
|
||||
goto cleanup;
|
||||
}
|
||||
latencyEndMonitor(latency);
|
||||
latencyAddSampleIfNeeded("cluster-config-dir-fsync", latency);
|
||||
}
|
||||
retval = C_OK; /* If we reached this point, everything is fine. */
|
||||
|
||||
cleanup:
|
||||
if (fd != -1) close(fd);
|
||||
if (retval == C_ERR) unlink(tmpfilename);
|
||||
if (fd != -1) {
|
||||
latencyStartMonitor(latency);
|
||||
close(fd);
|
||||
latencyEndMonitor(latency);
|
||||
latencyAddSampleIfNeeded("cluster-config-close", latency);
|
||||
}
|
||||
if (retval == C_ERR) {
|
||||
latencyStartMonitor(latency);
|
||||
unlink(tmpfilename);
|
||||
latencyEndMonitor(latency);
|
||||
latencyAddSampleIfNeeded("cluster-config-unlink", latency);
|
||||
}
|
||||
sdsfree(tmpfilename);
|
||||
sdsfree(ci);
|
||||
return retval;
|
||||
|
@ -189,3 +189,15 @@ tags {"needs:debug"} {
|
||||
assert_match "*wrong number of arguments for 'latency|help' command" $e
|
||||
}
|
||||
}
|
||||
|
||||
start_cluster 1 1 {tags {"latency-monitor cluster external:skip needs:latency"} overrides {latency-monitor-threshold 1}} {
|
||||
test "Cluster config file latency" {
|
||||
# This test just a sanity test so that we can make sure the code path is cover.
|
||||
# We don't assert anything since we can't be sure whether it will be counted.
|
||||
R 0 cluster saveconfig
|
||||
R 1 cluster saveconfig
|
||||
R 1 cluster failover force
|
||||
R 0 latency latest
|
||||
R 1 latency latest
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user