From 248059e50c59c9c9b896f81bf6f82d0b85e45c53 Mon Sep 17 00:00:00 2001 From: christianEQ Date: Fri, 18 Jun 2021 19:01:51 +0000 Subject: [PATCH] detect stagnating server load before 100% (diagnostic tool) Former-commit-id: 534b70643b8f39303331048d3e86475caa08b864 --- src/keydb-diagnostic-tool.cpp | 57 ++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/src/keydb-diagnostic-tool.cpp b/src/keydb-diagnostic-tool.cpp index b5986127a..506cc3ebe 100644 --- a/src/keydb-diagnostic-tool.cpp +++ b/src/keydb-diagnostic-tool.cpp @@ -42,6 +42,7 @@ #include #include #include +#include extern "C" { #include /* Use hiredis sds. */ #include "hiredis.h" @@ -891,23 +892,31 @@ double getServerCpuTime(redisContext *ctx) { redisReply *reply = (redisReply*)redisCommand(ctx, "INFO CPU"); if (reply->type != REDIS_REPLY_STRING) { freeReplyObject(reply); - printf("Error executing INFO command. Exiting.\r\n"); + printf("Error executing INFO command. Exiting.\n"); return -1; } double used_cpu_user, used_cpu_sys; if (extractPropertyFromInfo(reply->str, "used_cpu_user", used_cpu_user)) { - printf("Error reading user CPU usage from INFO command. Exiting.\r\n"); + printf("Error reading user CPU usage from INFO command. Exiting.\n"); return -1; } if (extractPropertyFromInfo(reply->str, "used_cpu_sys", used_cpu_sys)) { - printf("Error reading system CPU usage from INFO command. Exiting.\r\n"); + printf("Error reading system CPU usage from INFO command. Exiting.\n"); return -1; } freeReplyObject(reply); return used_cpu_user + used_cpu_sys; } +double getMean(std::deque *q) { + double sum = 0; + for (long unsigned int i = 0; i < q->size(); i++) { + sum += (*q)[i]; + } + return sum / q->size(); +} + bool isAtFullLoad(double cpuPercent, unsigned int threads) { return cpuPercent / threads >= 96; } @@ -954,7 +963,9 @@ int main(int argc, const char **argv) { double server_cpu_time, last_server_cpu_time = getServerCpuTime(ctx); struct rusage self_ru; double self_cpu_time, last_self_cpu_time = getSelfCpuTime(&self_ru); - double server_cpu_load, last_server_cpu_load, self_cpu_load, server_cpu_gain, last_server_cpu_gain; + double server_cpu_load, last_server_cpu_load = 0, self_cpu_load, server_cpu_gain; + std::deque load_gain_history = {}; + double current_gain_avg, peak_gain_avg = 0; redisReply *reply = (redisReply*)redisCommand(ctx, "INFO CPU"); if (reply->type != REDIS_REPLY_STRING) { @@ -971,19 +982,15 @@ int main(int argc, const char **argv) { printf("Server has %d threads.\n", server_threads); - while (self_threads < config.max_threads) { - printf("Creating %d clients for thread %d...\n", config.numclients, self_threads); for (int i = 0; i < config.numclients; i++) { sprintf(command, "SET %d %s\r\n", self_threads * config.numclients + i, set_value); createClient(command, strlen(command), NULL,self_threads); } - printf("Starting thread %d\n", self_threads); - benchmarkThread *t = config.threads[self_threads]; if (pthread_create(&(t->thread), NULL, execBenchmarkThread, t)){ - fprintf(stderr, "FATAL: Failed to start thread %d.\n", self_threads); + fprintf(stderr, "FATAL: Failed to start thread %d. Exiting.\n", self_threads); exit(1); } self_threads++; @@ -997,23 +1004,42 @@ int main(int argc, const char **argv) { if (server_cpu_time < 0) { break; } - printf("CPU Usage Self: %.1f%%, Server: %.1f%%\r\n", self_cpu_load, server_cpu_load); + printf("%d threads, %d total clients. CPU Usage Self: %.1f%% (%.1f%% per thread), Server: %.1f%% (%.1f%% per thread)\r", + self_threads, + self_threads * config.numclients, + self_cpu_load, + self_cpu_load / self_threads, + server_cpu_load, + server_cpu_load / server_threads); + fflush(stdout); server_cpu_gain = server_cpu_load - last_server_cpu_load; + load_gain_history.push_back(server_cpu_gain); + if (load_gain_history.size() > 5) { + load_gain_history.pop_front(); + } + current_gain_avg = getMean(&load_gain_history); + if (current_gain_avg > peak_gain_avg) { + peak_gain_avg = current_gain_avg; + } last_server_cpu_time = server_cpu_time; last_self_cpu_time = self_cpu_time; last_server_cpu_load = server_cpu_load; - - if (isAtFullLoad(server_cpu_load, server_threads)) { - printf("Server is at full CPU load.\n"); + printf("\nServer is at full CPU load. If higher performance is expected, check server configuration.\n"); break; } - if (isAtFullLoad(self_cpu_load, self_threads)) { - printf("Diagnostic tool is at full CPU load.\n"); + if (current_gain_avg <= 0.05 * peak_gain_avg) { + printf("\nServer CPU load appears to have stagnated with increasing clients.\n" + "Server does not appear to be at full load. Check network for throughput.\n"); break; } + + if (self_threads * config.numclients > 2000) { + printf("\nClient limit of 2000 reached. Server is not at full load and appears to be increasing.\n" + "2000 clients should be more than enough to reach a bottleneck. Check all configuration.\n"); + } } printf("Done.\n"); @@ -1023,3 +1049,4 @@ int main(int argc, const char **argv) { return 0; } + \ No newline at end of file