futriix/tests/unit/cluster/failover2.tcl

# Check the basic monitoring and failover capabilities.

start_cluster 3 4 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 5000}} {

    test "Cluster is up" {
        wait_for_cluster_state ok
    }

    test "Cluster is writable" {
        cluster_write_test [srv 0 port]
    }

    set paused_pid [srv 0 pid]
    test "Killing one primary node" {
        pause_process $paused_pid
    }

    test "Wait for failover" {
        wait_for_condition 1000 50 {
            [s -3 role] == "master" || [s -6 role] == "master"
        } else {
            fail "No failover detected"
        }
    }

    test "Killing the new primary node" {
        if {[s -3 role] == "master"} {
            set replica_to_be_primary -6
            set paused_pid2 [srv -3 pid]
        } else {
            set replica_to_be_primary -3
            set paused_pid2 [srv -6 pid]
        }
        pause_process $paused_pid2
    }

    test "Cluster should eventually be up again" {
        for {set j 0} {$j < [llength $::servers]} {incr j} {
            if {[process_is_paused [srv -$j pid]]} continue
            wait_for_condition 1000 50 {
                [CI $j cluster_state] eq "ok"
            } else {
                fail "Cluster node $j cluster_state:[CI $j cluster_state]"
            }
        }
    }

    test "wait for new failover" {
        wait_for_condition 1000 50 {
            [s $replica_to_be_primary role] == "master"
        } else {
            fail "No failover detected"
        }
    }

    test "Restarting the previously killed primary nodes" {
        resume_process $paused_pid
        resume_process $paused_pid2
    }

    test "Make sure there is no failover timeout" {
        verify_no_log_message -3 "*Failover attempt expired*" 0
        verify_no_log_message -6 "*Failover attempt expired*" 0
    }

} ;# start_cluster
Optimize failover time when the new primary node is down again (#782) We will not reset failover_auth_time after setting it, this is used to check auth_timeout and auth_retry_time, but we should at least reset it after a successful failover. Let's assume the following scenario: 1. Two replicas initiate an election. 2. Replica 1 is elected as the primary node, and replica 2 does not have enough votes. 3. Replica 1 is down, ie the new primary node down again in a short time. 4. Replica 2 know that the new primary node is down and wants to initiate a failover, but because the failover_auth_time of the previous round has not been reset, it needs to wait for it to time out and then wait for the next retry time, which will take cluster-node-timeout * 4 times, this adds a lot of delay. There is another problem. Like we will set additional random time for failover_auth_time, such as random 500ms and replicas ranking 1s. If replica 2 receives PONG from the new primary node before sending the FAILOVER_AUTH_REQUEST, that is, before the failover_auth_time, it will change itself to a replica. If the new primary node goes down again at this time, replica 2 will use the previous failover_auth_time to initiate an election instead of going through the logic of random 500ms and replicas ranking 1s again, which may lead to unexpected consequences (for example, a low-ranking replica initiates an election and becomes the new primary node). That is, we need to reset failover_auth_time at the appropriate time. When the replica switches to a new primary, we reset it, because the existing failover_auth_time is already out of date in this case. --------- Signed-off-by: Binbin <binloveplay1314@qq.com> 2024-07-20 03:27:49 +08:00			`# Check the basic monitoring and failover capabilities.`

			`start_cluster 3 4 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 5000}} {`

			`test "Cluster is up" {`
			`wait_for_cluster_state ok`
			`}`

			`test "Cluster is writable" {`
			`cluster_write_test [srv 0 port]`
			`}`

			`set paused_pid [srv 0 pid]`
			`test "Killing one primary node" {`
			`pause_process $paused_pid`
			`}`

			`test "Wait for failover" {`
			`wait_for_condition 1000 50 {`
			`[s -3 role] == "master" \|\| [s -6 role] == "master"`
			`} else {`
			`fail "No failover detected"`
			`}`
			`}`

			`test "Killing the new primary node" {`
			`if {[s -3 role] == "master"} {`
			`set replica_to_be_primary -6`
			`set paused_pid2 [srv -3 pid]`
			`} else {`
			`set replica_to_be_primary -3`
			`set paused_pid2 [srv -6 pid]`
			`}`
			`pause_process $paused_pid2`
			`}`

			`test "Cluster should eventually be up again" {`
			`for {set j 0} {$j < [llength $::servers]} {incr j} {`
			`if {[process_is_paused [srv -$j pid]]} continue`
			`wait_for_condition 1000 50 {`
			`[CI $j cluster_state] eq "ok"`
			`} else {`
			`fail "Cluster node $j cluster_state:[CI $j cluster_state]"`
			`}`
			`}`
			`}`

			`test "wait for new failover" {`
			`wait_for_condition 1000 50 {`
			`[s $replica_to_be_primary role] == "master"`
			`} else {`
			`fail "No failover detected"`
			`}`
			`}`

			`test "Restarting the previously killed primary nodes" {`
			`resume_process $paused_pid`
			`resume_process $paused_pid2`
			`}`

			`test "Make sure there is no failover timeout" {`
			`verify_no_log_message -3 "Failover attempt expired" 0`
			`verify_no_log_message -6 "Failover attempt expired" 0`
			`}`

			`} ;# start_cluster`