# Check the basic monitoring and failover capabilities.

start_cluster 5 5 {tags {external:skip cluster}} {

test "Cluster should start ok" {
    wait_for_cluster_state ok
}

set paused_pid5 [srv -5 pid]
set paused_pid6 [srv -6 pid]
test "Killing two slave nodes" {
    pause_process $paused_pid5
    pause_process $paused_pid6
}

test "Cluster should be still up" {
    for {set j 0} {$j < [llength $::servers]} {incr j} {
        if {[process_is_paused [srv -$j pid]]} continue
        wait_for_condition 1000 50 {
            [CI $j cluster_state] eq "ok"
        } else {
            fail "Cluster node $j cluster_state:[CI $j cluster_state]"
        }
    }
}

set paused_pid [srv -5 pid]
test "Killing one master node" {
    pause_process $paused_pid
}

# Note: the only slave of instance 0 is already down so no
# failover is possible, that would change the state back to ok.
test "Cluster should be down now" {
    for {set j 0} {$j < [llength $::servers]} {incr j} {
        if {[process_is_paused [srv -$j pid]]} continue
        wait_for_condition 1000 50 {
            [CI $j cluster_state] eq "ok"
        } else {
            fail "Cluster node $j cluster_state:[CI $j cluster_state]"
        }
    }
}

test "Restarting master node" {
    pause_process $paused_pid
}

test "Cluster should be up again" {
    for {set j 0} {$j < [llength $::servers]} {incr j} {
        if {[process_is_paused [srv -$j pid]]} continue
        wait_for_condition 1000 50 {
            [CI $j cluster_state] eq "ok"
        } else {
            fail "Cluster node $j cluster_state:[CI $j cluster_state]"
        }
    }
}

} ;# start_cluster