Try to stabilize the failover call in the slot migration test (#1078)
The CI report replica will return the error when performing CLUSTER FAILOVER: ``` -ERR Master is down or failed, please use CLUSTER FAILOVER FORCE ``` This may because the primary state is fail or the cluster connection is disconnected during the primary pause. In this PR, we added some waits in wait_for_role, if the role is replica, we will wait for the replication link and the cluster link to be ok. Signed-off-by: Binbin <binloveplay1314@qq.com>
This commit is contained in:
parent
a0b1cbad83
commit
22bc49c4a6
@ -277,6 +277,14 @@ proc cluster_get_myself id {
|
||||
return {}
|
||||
}
|
||||
|
||||
# Returns the parsed "myself's primary" CLUSTER NODES entry as a dictionary.
|
||||
proc cluster_get_myself_primary id {
|
||||
set myself [cluster_get_myself $id]
|
||||
set replicaof [dict get $myself slaveof]
|
||||
set node [cluster_get_node_by_id $id $replicaof]
|
||||
return $node
|
||||
}
|
||||
|
||||
# Get a specific node by ID by parsing the CLUSTER NODES output
|
||||
# of the instance Number 'instance_id'
|
||||
proc cluster_get_node_by_id {instance_id node_id} {
|
||||
|
@ -14,17 +14,61 @@ proc get_cluster_role {srv_idx} {
|
||||
return $role
|
||||
}
|
||||
|
||||
proc get_myself_primary_flags {srv_idx} {
|
||||
set flags [dict get [cluster_get_myself_primary $srv_idx] flags]
|
||||
return $flags
|
||||
}
|
||||
|
||||
proc get_myself_primary_linkstate {srv_idx} {
|
||||
set linkstate [dict get [cluster_get_myself_primary $srv_idx] linkstate]
|
||||
return $linkstate
|
||||
}
|
||||
|
||||
proc wait_for_role {srv_idx role} {
|
||||
# Wait for the role, make sure the replication role matches.
|
||||
wait_for_condition 100 100 {
|
||||
[lindex [split [R $srv_idx ROLE] " "] 0] eq $role
|
||||
} else {
|
||||
puts "R $srv_idx ROLE: [R $srv_idx ROLE]"
|
||||
fail "R $srv_idx didn't assume the replication $role in time"
|
||||
}
|
||||
|
||||
if {$role eq "slave"} {
|
||||
# Wait for the replication link, make sure the replication link is normal.
|
||||
wait_for_condition 100 100 {
|
||||
[s -$srv_idx master_link_status] eq "up"
|
||||
} else {
|
||||
puts "R $srv_idx INFO REPLICATION: [R $srv_idx INFO REPLICATION]"
|
||||
fail "R $srv_idx didn't assume the replication link in time"
|
||||
}
|
||||
}
|
||||
|
||||
# Wait for the cluster role, make sure the cluster role matches.
|
||||
wait_for_condition 100 100 {
|
||||
[get_cluster_role $srv_idx] eq $role
|
||||
} else {
|
||||
puts "R $srv_idx CLUSTER NODES: [R $srv_idx CLUSTER NODES]"
|
||||
fail "R $srv_idx didn't assume the cluster $role in time"
|
||||
}
|
||||
|
||||
if {$role eq "slave"} {
|
||||
# Wait for the flags, make sure the primary node is not failed.
|
||||
wait_for_condition 100 100 {
|
||||
[get_myself_primary_flags $srv_idx] eq "master"
|
||||
} else {
|
||||
puts "R $srv_idx CLUSTER NODES: [R $srv_idx CLUSTER NODES]"
|
||||
fail "R $srv_idx didn't assume the primary state in time"
|
||||
}
|
||||
|
||||
# Wait for the cluster link, make sure that the cluster connection is normal.
|
||||
wait_for_condition 100 100 {
|
||||
[get_myself_primary_linkstate $srv_idx] eq "connected"
|
||||
} else {
|
||||
puts "R $srv_idx CLUSTER NODES: [R $srv_idx CLUSTER NODES]"
|
||||
fail "R $srv_idx didn't assume the cluster primary link in time"
|
||||
}
|
||||
}
|
||||
|
||||
wait_for_cluster_propagation
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user