Add client info to SHUTDOWN / CLUSTER FAILOVER logs (#875)

Print the full client info by using catClientInfoString, the
info is useful when we want to identify the source of request.

Signed-off-by: Binbin <binloveplay1314@qq.com>
This commit is contained in:
Binbin 2024-09-08 16:26:56 +08:00 committed by GitHub
parent 6478526597
commit c642cf0134
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 27 additions and 14 deletions

View File

@ -6656,25 +6656,27 @@ int clusterCommandSpecial(client *c) {
} }
resetManualFailover(); resetManualFailover();
server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT; server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT;
sds client = catClientInfoString(sdsempty(), c, server.hide_user_data_from_log);
if (takeover) { if (takeover) {
/* A takeover does not perform any initial check. It just /* A takeover does not perform any initial check. It just
* generates a new configuration epoch for this node without * generates a new configuration epoch for this node without
* consensus, claims the primary's slots, and broadcast the new * consensus, claims the primary's slots, and broadcast the new
* configuration. */ * configuration. */
serverLog(LL_NOTICE, "Taking over the primary (user request)."); serverLog(LL_NOTICE, "Taking over the primary (user request from '%s').", client);
clusterBumpConfigEpochWithoutConsensus(); clusterBumpConfigEpochWithoutConsensus();
clusterFailoverReplaceYourPrimary(); clusterFailoverReplaceYourPrimary();
} else if (force) { } else if (force) {
/* If this is a forced failover, we don't need to talk with our /* If this is a forced failover, we don't need to talk with our
* primary to agree about the offset. We just failover taking over * primary to agree about the offset. We just failover taking over
* it without coordination. */ * it without coordination. */
serverLog(LL_NOTICE, "Forced failover user request accepted."); serverLog(LL_NOTICE, "Forced failover user request accepted (user request from '%s').", client);
server.cluster->mf_can_start = 1; server.cluster->mf_can_start = 1;
} else { } else {
serverLog(LL_NOTICE, "Manual failover user request accepted."); serverLog(LL_NOTICE, "Manual failover user request accepted (user request from '%s').", client);
clusterSendMFStart(myself->replicaof); clusterSendMFStart(myself->replicaof);
} }
sdsfree(client);
addReply(c, shared.ok); addReply(c, shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr, "set-config-epoch") && c->argc == 3) { } else if (!strcasecmp(c->argv[1]->ptr, "set-config-epoch") && c->argc == 3) {
/* CLUSTER SET-CONFIG-EPOCH <epoch> /* CLUSTER SET-CONFIG-EPOCH <epoch>

View File

@ -1278,7 +1278,7 @@ void shutdownCommand(client *c) {
} }
blockClientShutdown(c); blockClientShutdown(c);
if (prepareForShutdown(flags) == C_OK) exit(0); if (prepareForShutdown(c, flags) == C_OK) exit(0);
/* If we're here, then shutdown is ongoing (the client is still blocked) or /* If we're here, then shutdown is ongoing (the client is still blocked) or
* failed (the client has received an error). */ * failed (the client has received an error). */
} }

View File

@ -522,7 +522,7 @@ void debugCommand(client *c) {
int flags = !strcasecmp(c->argv[1]->ptr, "restart") int flags = !strcasecmp(c->argv[1]->ptr, "restart")
? (RESTART_SERVER_GRACEFULLY | RESTART_SERVER_CONFIG_REWRITE) ? (RESTART_SERVER_GRACEFULLY | RESTART_SERVER_CONFIG_REWRITE)
: RESTART_SERVER_NONE; : RESTART_SERVER_NONE;
restartServer(flags, delay); restartServer(c, flags, delay);
addReplyError(c, "failed to restart the server. Check server logs."); addReplyError(c, "failed to restart the server. Check server logs.");
} else if (!strcasecmp(c->argv[1]->ptr, "oom")) { } else if (!strcasecmp(c->argv[1]->ptr, "oom")) {
void *ptr = zmalloc(SIZE_MAX / 2); /* Should trigger an out of memory. */ void *ptr = zmalloc(SIZE_MAX / 2); /* Should trigger an out of memory. */

View File

@ -1328,7 +1328,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
else if (server.last_sig_received == SIGTERM && server.shutdown_on_sigterm) else if (server.last_sig_received == SIGTERM && server.shutdown_on_sigterm)
shutdownFlags = server.shutdown_on_sigterm; shutdownFlags = server.shutdown_on_sigterm;
if (prepareForShutdown(shutdownFlags) == C_OK) exit(0); if (prepareForShutdown(NULL, shutdownFlags) == C_OK) exit(0);
} else if (isShutdownInitiated()) { } else if (isShutdownInitiated()) {
if (server.mstime >= server.shutdown_mstime || isReadyToShutdown()) { if (server.mstime >= server.shutdown_mstime || isReadyToShutdown()) {
if (finishShutdown() == C_OK) exit(0); if (finishShutdown() == C_OK) exit(0);
@ -1560,7 +1560,7 @@ void whileBlockedCron(void) {
/* We received a SIGTERM during loading, shutting down here in a safe way, /* We received a SIGTERM during loading, shutting down here in a safe way,
* as it isn't ok doing so inside the signal handler. */ * as it isn't ok doing so inside the signal handler. */
if (server.shutdown_asap && server.loading) { if (server.shutdown_asap && server.loading) {
if (prepareForShutdown(SHUTDOWN_NOSAVE) == C_OK) exit(0); if (prepareForShutdown(NULL, SHUTDOWN_NOSAVE) == C_OK) exit(0);
serverLog(LL_WARNING, serverLog(LL_WARNING,
"SIGTERM received but errors trying to shut down the server, check the logs for more information"); "SIGTERM received but errors trying to shut down the server, check the logs for more information");
server.shutdown_asap = 0; server.shutdown_asap = 0;
@ -2139,7 +2139,7 @@ extern char **environ;
* *
* On success the function does not return, because the process turns into * On success the function does not return, because the process turns into
* a different process. On error C_ERR is returned. */ * a different process. On error C_ERR is returned. */
int restartServer(int flags, mstime_t delay) { int restartServer(client *c, int flags, mstime_t delay) {
int j; int j;
/* Check if we still have accesses to the executable that started this /* Check if we still have accesses to the executable that started this
@ -2162,7 +2162,7 @@ int restartServer(int flags, mstime_t delay) {
} }
/* Perform a proper shutdown. We don't wait for lagging replicas though. */ /* Perform a proper shutdown. We don't wait for lagging replicas though. */
if (flags & RESTART_SERVER_GRACEFULLY && prepareForShutdown(SHUTDOWN_NOW) != C_OK) { if (flags & RESTART_SERVER_GRACEFULLY && prepareForShutdown(c, SHUTDOWN_NOW) != C_OK) {
serverLog(LL_WARNING, "Can't restart: error preparing for shutdown"); serverLog(LL_WARNING, "Can't restart: error preparing for shutdown");
return C_ERR; return C_ERR;
} }
@ -4189,7 +4189,12 @@ void closeListeningSockets(int unlink_unix_socket) {
} }
} }
/* Prepare for shutting down the server. Flags: /* Prepare for shutting down the server.
*
* The client *c can be NULL, it may come from a signal. If client is passed in,
* it is used to print the client info.
*
* Flags:
* *
* - SHUTDOWN_SAVE: Save a database dump even if the server is configured not to * - SHUTDOWN_SAVE: Save a database dump even if the server is configured not to
* save any dump. * save any dump.
@ -4212,7 +4217,7 @@ void closeListeningSockets(int unlink_unix_socket) {
* errors are logged but ignored and C_OK is returned. * errors are logged but ignored and C_OK is returned.
* *
* On success, this function returns C_OK and then it's OK to call exit(0). */ * On success, this function returns C_OK and then it's OK to call exit(0). */
int prepareForShutdown(int flags) { int prepareForShutdown(client *c, int flags) {
if (isShutdownInitiated()) return C_ERR; if (isShutdownInitiated()) return C_ERR;
/* When SHUTDOWN is called while the server is loading a dataset in /* When SHUTDOWN is called while the server is loading a dataset in
@ -4225,7 +4230,13 @@ int prepareForShutdown(int flags) {
server.shutdown_flags = flags; server.shutdown_flags = flags;
serverLog(LL_NOTICE, "User requested shutdown..."); if (c != NULL) {
sds client = catClientInfoString(sdsempty(), c, server.hide_user_data_from_log);
serverLog(LL_NOTICE, "User requested shutdown... (user request from '%s')", client);
sdsfree(client);
} else {
serverLog(LL_NOTICE, "User requested shutdown...");
}
if (server.supervised_mode == SUPERVISED_SYSTEMD) serverCommunicateSystemd("STOPPING=1\n"); if (server.supervised_mode == SUPERVISED_SYSTEMD) serverCommunicateSystemd("STOPPING=1\n");
/* If we have any replicas, let them catch up the replication offset before /* If we have any replicas, let them catch up the replication offset before

View File

@ -3296,7 +3296,7 @@ void preventCommandAOF(client *c);
void preventCommandReplication(client *c); void preventCommandReplication(client *c);
void slowlogPushCurrentCommand(client *c, struct serverCommand *cmd, ustime_t duration); void slowlogPushCurrentCommand(client *c, struct serverCommand *cmd, ustime_t duration);
void updateCommandLatencyHistogram(struct hdr_histogram **latency_histogram, int64_t duration_hist); void updateCommandLatencyHistogram(struct hdr_histogram **latency_histogram, int64_t duration_hist);
int prepareForShutdown(int flags); int prepareForShutdown(client *c, int flags);
void replyToClientsBlockedOnShutdown(void); void replyToClientsBlockedOnShutdown(void);
int abortShutdown(void); int abortShutdown(void);
void afterCommand(client *c); void afterCommand(client *c);
@ -3341,7 +3341,7 @@ void dismissMemoryInChild(void);
#define RESTART_SERVER_NONE 0 #define RESTART_SERVER_NONE 0
#define RESTART_SERVER_GRACEFULLY (1 << 0) /* Do proper shutdown. */ #define RESTART_SERVER_GRACEFULLY (1 << 0) /* Do proper shutdown. */
#define RESTART_SERVER_CONFIG_REWRITE (1 << 1) /* CONFIG REWRITE before restart.*/ #define RESTART_SERVER_CONFIG_REWRITE (1 << 1) /* CONFIG REWRITE before restart.*/
int restartServer(int flags, mstime_t delay); int restartServer(client *c, int flags, mstime_t delay);
int getKeySlot(sds key); int getKeySlot(sds key);
int calculateKeySlot(sds key); int calculateKeySlot(sds key);