Speedup cluster failover. (#7948)
This commit deals with manual failover as well as non-manual failover. We did tests with manual failover as follows: 1, Setup redis cluster which holds 16 partions, each having only 1 corresponding replica. 2, Write a batch of data to redis cluster and make sure the redis is doing a active expire in serverCron. 3, Do a manual failover sequentially to each partions with a time interval of 3 minutes. 4, Collect logs and do some computaiton work. The result: case avgTime maxTime minTime C1 95.8ms 227ms 25ms C2 47.9ms 96ms 12ms C3 12.6ms 27ms 7ms Explanation case C1: All nodes use the version before optimization case C2: Masters use the elder version while replicas use the optimized version case C3: All nodes use the optimized version failover time: The time between when replica got a `manual failover request` and when it `won the failover election`. avgTime: average failover time maxTime: maximum failover time minTime: mimimum failover time ms: millisecond Co-authored-by: chendq8 <c.d_q@163.com>
This commit is contained in:
parent
dac26729a9
commit
7fa56dd773
@ -1816,6 +1816,7 @@ int clusterProcessPacket(clusterLink *link) {
|
|||||||
server.cluster->mf_master_offset == 0)
|
server.cluster->mf_master_offset == 0)
|
||||||
{
|
{
|
||||||
server.cluster->mf_master_offset = sender->repl_offset;
|
server.cluster->mf_master_offset = sender->repl_offset;
|
||||||
|
clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_MANUALFAILOVER);
|
||||||
serverLog(LL_WARNING,
|
serverLog(LL_WARNING,
|
||||||
"Received replication offset for paused "
|
"Received replication offset for paused "
|
||||||
"master manual failover: %lld",
|
"master manual failover: %lld",
|
||||||
@ -2160,6 +2161,12 @@ int clusterProcessPacket(clusterLink *link) {
|
|||||||
pauseClients(now+(CLUSTER_MF_TIMEOUT*CLUSTER_MF_PAUSE_MULT));
|
pauseClients(now+(CLUSTER_MF_TIMEOUT*CLUSTER_MF_PAUSE_MULT));
|
||||||
serverLog(LL_WARNING,"Manual failover requested by replica %.40s.",
|
serverLog(LL_WARNING,"Manual failover requested by replica %.40s.",
|
||||||
sender->name);
|
sender->name);
|
||||||
|
/* We need to send a ping message to the replica, as it would carry
|
||||||
|
* `server.cluster->mf_master_offset`, which means the master paused clients
|
||||||
|
* at offset `server.cluster->mf_master_offset`, so that the replica would
|
||||||
|
* know that it is safe to set its `server.cluster->mf_can_start` to 1 so as
|
||||||
|
* to complete failover as quickly as possible. */
|
||||||
|
clusterSendPing(link, CLUSTERMSG_TYPE_PING);
|
||||||
} else if (type == CLUSTERMSG_TYPE_UPDATE) {
|
} else if (type == CLUSTERMSG_TYPE_UPDATE) {
|
||||||
clusterNode *n; /* The node the update is about. */
|
clusterNode *n; /* The node the update is about. */
|
||||||
uint64_t reportedConfigEpoch =
|
uint64_t reportedConfigEpoch =
|
||||||
@ -3434,7 +3441,10 @@ void clusterHandleManualFailover(void) {
|
|||||||
serverLog(LL_WARNING,
|
serverLog(LL_WARNING,
|
||||||
"All master replication stream processed, "
|
"All master replication stream processed, "
|
||||||
"manual failover can start.");
|
"manual failover can start.");
|
||||||
|
clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_MANUALFAILOVER);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* -----------------------------------------------------------------------------
|
/* -----------------------------------------------------------------------------
|
||||||
@ -3709,25 +3719,35 @@ void clusterCron(void) {
|
|||||||
* handlers, or to perform potentially expansive tasks that we need to do
|
* handlers, or to perform potentially expansive tasks that we need to do
|
||||||
* a single time before replying to clients. */
|
* a single time before replying to clients. */
|
||||||
void clusterBeforeSleep(void) {
|
void clusterBeforeSleep(void) {
|
||||||
/* Handle failover, this is needed when it is likely that there is already
|
int flags = server.cluster->todo_before_sleep;
|
||||||
* the quorum from masters in order to react fast. */
|
|
||||||
if (server.cluster->todo_before_sleep & CLUSTER_TODO_HANDLE_FAILOVER)
|
|
||||||
clusterHandleSlaveFailover();
|
|
||||||
|
|
||||||
/* Update the cluster state. */
|
|
||||||
if (server.cluster->todo_before_sleep & CLUSTER_TODO_UPDATE_STATE)
|
|
||||||
clusterUpdateState();
|
|
||||||
|
|
||||||
/* Save the config, possibly using fsync. */
|
|
||||||
if (server.cluster->todo_before_sleep & CLUSTER_TODO_SAVE_CONFIG) {
|
|
||||||
int fsync = server.cluster->todo_before_sleep &
|
|
||||||
CLUSTER_TODO_FSYNC_CONFIG;
|
|
||||||
clusterSaveConfigOrDie(fsync);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Reset our flags (not strictly needed since every single function
|
/* Reset our flags (not strictly needed since every single function
|
||||||
* called for flags set should be able to clear its flag). */
|
* called for flags set should be able to clear its flag). */
|
||||||
server.cluster->todo_before_sleep = 0;
|
server.cluster->todo_before_sleep = 0;
|
||||||
|
|
||||||
|
if (flags & CLUSTER_TODO_HANDLE_MANUALFAILOVER) {
|
||||||
|
/* Handle manual failover as soon as possible so that won't have a 100ms
|
||||||
|
* as it was handled only in clusterCron */
|
||||||
|
if(nodeIsSlave(myself)) {
|
||||||
|
clusterHandleManualFailover();
|
||||||
|
if (!(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
|
||||||
|
clusterHandleSlaveFailover();
|
||||||
|
}
|
||||||
|
} else if (flags & CLUSTER_TODO_HANDLE_FAILOVER) {
|
||||||
|
/* Handle failover, this is needed when it is likely that there is already
|
||||||
|
* the quorum from masters in order to react fast. */
|
||||||
|
clusterHandleSlaveFailover();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update the cluster state. */
|
||||||
|
if (flags & CLUSTER_TODO_UPDATE_STATE)
|
||||||
|
clusterUpdateState();
|
||||||
|
|
||||||
|
/* Save the config, possibly using fsync. */
|
||||||
|
if (flags & CLUSTER_TODO_SAVE_CONFIG) {
|
||||||
|
int fsync = flags & CLUSTER_TODO_FSYNC_CONFIG;
|
||||||
|
clusterSaveConfigOrDie(fsync);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void clusterDoBeforeSleep(int flags) {
|
void clusterDoBeforeSleep(int flags) {
|
||||||
|
@ -77,6 +77,7 @@ typedef struct clusterLink {
|
|||||||
#define CLUSTER_TODO_UPDATE_STATE (1<<1)
|
#define CLUSTER_TODO_UPDATE_STATE (1<<1)
|
||||||
#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
|
#define CLUSTER_TODO_SAVE_CONFIG (1<<2)
|
||||||
#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
|
#define CLUSTER_TODO_FSYNC_CONFIG (1<<3)
|
||||||
|
#define CLUSTER_TODO_HANDLE_MANUALFAILOVER (1<<4)
|
||||||
|
|
||||||
/* Message types.
|
/* Message types.
|
||||||
*
|
*
|
||||||
|
Loading…
x
Reference in New Issue
Block a user