Cluster: refactor ping/data delay handling.
This commit is contained in:
parent
00a3bc4359
commit
79de9d6c78
@ -3564,15 +3564,17 @@ void clusterCron(void) {
|
|||||||
/* If we are not receiving any data for more than half the cluster
|
/* If we are not receiving any data for more than half the cluster
|
||||||
* timeout, reconnect the link: maybe there is a connection
|
* timeout, reconnect the link: maybe there is a connection
|
||||||
* issue even if the node is alive. */
|
* issue even if the node is alive. */
|
||||||
|
mstime_t ping_delay = now - node->ping_sent;
|
||||||
|
mstime_t data_delay = now - node->data_received;
|
||||||
if (node->link && /* is connected */
|
if (node->link && /* is connected */
|
||||||
now - node->link->ctime >
|
now - node->link->ctime >
|
||||||
server.cluster_node_timeout && /* was not already reconnected */
|
server.cluster_node_timeout && /* was not already reconnected */
|
||||||
node->ping_sent && /* we already sent a ping */
|
node->ping_sent && /* we already sent a ping */
|
||||||
node->pong_received < node->ping_sent && /* still waiting pong */
|
node->pong_received < node->ping_sent && /* still waiting pong */
|
||||||
/* and we are waiting for the pong more than timeout/2 */
|
/* and we are waiting for the pong more than timeout/2 */
|
||||||
now - node->ping_sent > server.cluster_node_timeout/2 &&
|
ping_delay > server.cluster_node_timeout/2 &&
|
||||||
/* and in such interval we are not seeing any traffic at all. */
|
/* and in such interval we are not seeing any traffic at all. */
|
||||||
now - node->data_received > server.cluster_node_timeout/2)
|
data_delay > server.cluster_node_timeout/2)
|
||||||
{
|
{
|
||||||
/* Disconnect the link, it will be reconnected automatically. */
|
/* Disconnect the link, it will be reconnected automatically. */
|
||||||
freeClusterLink(node->link);
|
freeClusterLink(node->link);
|
||||||
@ -3604,18 +3606,18 @@ void clusterCron(void) {
|
|||||||
/* Check only if we have an active ping for this instance. */
|
/* Check only if we have an active ping for this instance. */
|
||||||
if (node->ping_sent == 0) continue;
|
if (node->ping_sent == 0) continue;
|
||||||
|
|
||||||
/* Compute the delay of the PONG. Note that if we already received
|
/* Check if this node looks unreachable.
|
||||||
* the PONG, then node->ping_sent is zero, so can't reach this
|
* Note that if we already received the PONG, then node->ping_sent
|
||||||
* code at all. */
|
* is zero, so can't reach this code at all, so we don't risk of
|
||||||
mstime_t delay = now - node->ping_sent;
|
* checking for a PONG delay if we didn't sent the PING.
|
||||||
|
*
|
||||||
/* We consider every incoming data as proof of liveness, since
|
* We also consider every incoming data as proof of liveness, since
|
||||||
* our cluster bus link is also used for data: under heavy data
|
* our cluster bus link is also used for data: under heavy data
|
||||||
* load pong delays are possible. */
|
* load pong delays are possible. */
|
||||||
mstime_t data_delay = now - node->data_received;
|
mstime_t node_delay = (ping_delay < data_delay) ? ping_delay :
|
||||||
if (data_delay < delay) delay = data_delay;
|
data_delay;
|
||||||
|
|
||||||
if (delay > server.cluster_node_timeout) {
|
if (node_delay > server.cluster_node_timeout) {
|
||||||
/* Timeout reached. Set the node as possibly failing if it is
|
/* Timeout reached. Set the node as possibly failing if it is
|
||||||
* not already in this state. */
|
* not already in this state. */
|
||||||
if (!(node->flags & (CLUSTER_NODE_PFAIL|CLUSTER_NODE_FAIL))) {
|
if (!(node->flags & (CLUSTER_NODE_PFAIL|CLUSTER_NODE_FAIL))) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user