From e2d64485b8262971776fb1be803c7296c98d1572 Mon Sep 17 00:00:00 2001 From: Oran Agra Date: Sun, 9 Aug 2020 06:08:00 +0300 Subject: [PATCH] Reduce the probability of failure when start redis in runtest-cluster #7554 (#7635) When runtest-cluster, at first, we need to create a cluster use spawn_instance, a port which is not used is choosen, however sometimes we can't run server on the port. possibley due to a race with another process taking it first. such as redis/redis/runs/896537490. It may be due to the machine problem or In order to reduce the probability of failure when start redis in runtest-cluster, we attemp to use another port when find server do not start up. Co-authored-by: Oran Agra Co-authored-by: yanhui13 --- tests/instances.tcl | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tests/instances.tcl b/tests/instances.tcl index 677af6427..e2aa4ab13 100644 --- a/tests/instances.tcl +++ b/tests/instances.tcl @@ -59,8 +59,6 @@ proc exec_instance {type cfgfile} { proc spawn_instance {type base_port count {conf {}}} { for {set j 0} {$j < $count} {incr j} { set port [find_available_port $base_port $::redis_port_count] - incr base_port - puts "Starting $type #$j at port $port" # Create a directory for this instance. set dirname "${type}_${j}" @@ -93,10 +91,30 @@ proc spawn_instance {type base_port count {conf {}}} { close $cfg # Finally exec it and remember the pid for later cleanup. - set pid [exec_instance $type $cfgfile] - lappend ::pids $pid + set retry 100 + while {$retry} { + set pid [exec_instance $type $cfgfile] - # Check availability + # Check availability + if {[server_is_up 127.0.0.1 $port 100] == 0} { + puts "Starting $type #$j at port $port failed, try another" + incr retry -1 + set port [find_available_port $base_port $::redis_port_count] + set cfg [open $cfgfile a+] + if {$::tls} { + puts $cfg "tls-port $port" + } else { + puts $cfg "port $port" + } + close $cfg + } else { + puts "Starting $type #$j at port $port" + lappend ::pids $pid + break + } + } + + # Check availability finally if {[server_is_up 127.0.0.1 $port 100] == 0} { abort_sentinel_test "Problems starting $type #$j: ping timeout" }