diff --git a/tests/sentinel-tests/00-base.tcl b/tests/sentinel-tests/00-base.tcl index 0587c625c..b8dfa70ca 100644 --- a/tests/sentinel-tests/00-base.tcl +++ b/tests/sentinel-tests/00-base.tcl @@ -23,20 +23,11 @@ test "Sentinels are able to auto-discover slaves" { } } -test "Can change master parameters via SENTINEL SET" { - foreach_sentinel_id id { - S $id SENTINEL SET mymaster down-after-milliseconds 2000 - } - foreach_sentinel_id id { - assert {[dict get [S $id sentinel master mymaster] down-after-milliseconds] == 2000} - } -} - test "Basic failover works if the master is down" { set old_port [RI $master_id tcp_port] set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] assert {[lindex $addr 1] == $old_port} - R $master_id debug sleep 5 + R $master_id debug sleep 10 foreach_sentinel_id id { wait_for_condition 100 50 { [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port @@ -79,7 +70,7 @@ test "ODOWN is not possible without enough Sentinels reports" { set old_port [RI $master_id tcp_port] set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] assert {[lindex $addr 1] == $old_port} - R $master_id debug sleep 5 + R $master_id debug sleep 10 # Make sure failover did not happened. set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] @@ -95,7 +86,7 @@ test "Failover is not possible without majority agreement" { for {set id 0} {$id < $quorum} {incr id} { S $id SENTINEL REMOVE mymaster } - R $master_id debug sleep 5 + R $master_id debug sleep 10 # Make sure failover did not happened. set addr [S $quorum SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] @@ -124,7 +115,7 @@ test "Failover works if we configure for absolute agreement" { } } - R $master_id debug sleep 5 + R $master_id debug sleep 10 foreach_sentinel_id id { wait_for_condition 1000 50 { [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port diff --git a/tests/sentinel-tests/01-conf-update.tcl b/tests/sentinel-tests/01-conf-update.tcl new file mode 100644 index 000000000..493237075 --- /dev/null +++ b/tests/sentinel-tests/01-conf-update.tcl @@ -0,0 +1,38 @@ +# Test Sentinel configuration consistency after partitions heal. + +source "../sentinel-tests/includes/init-tests.tcl" + +test "We can failover with Sentinel 1 crashed" { + foreach_sentinel_id id { + S $id SENTINEL SET mymaster down-after-milliseconds 2000 + } + + set old_port [RI $master_id tcp_port] + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + assert {[lindex $addr 1] == $old_port} + + # Crash Sentinel 1 + kill_instance sentinel 1 + + R $master_id debug sleep 10 + foreach_sentinel_id id { + if {$id != 1} { + wait_for_condition 1000 50 { + [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port + } else { + fail "Sentinel $id did not received failover info" + } + } + } + set addr [S 0 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] + set master_id [get_instance_id_by_port redis [lindex $addr 1]] +} + +test "After Sentinel 1 is restarted, its config gets updated" { + restart_instance sentinel 1 + wait_for_condition 1000 50 { + [lindex [S 1 SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 1] != $old_port + } else { + fail "Restarted Sentinel did not received failover info" + } +} diff --git a/tests/sentinel-tests/includes/init-tests.tcl b/tests/sentinel-tests/includes/init-tests.tcl index 302f64b65..82beeea4f 100644 --- a/tests/sentinel-tests/includes/init-tests.tcl +++ b/tests/sentinel-tests/includes/init-tests.tcl @@ -17,7 +17,16 @@ test "Sentinels can start monitoring a master" { } foreach_sentinel_id id { assert {[S $id sentinel master mymaster] ne {}} + S $id SENTINEL SET mymaster down-after-milliseconds 2000 } } - +test "Sentinels can talk with the master" { + foreach_sentinel_id id { + wait_for_condition 100 50 { + [catch {S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster}] == 0 + } else { + fail "Sentinel $id can't talk with the master." + } + } +} diff --git a/tests/sentinel.tcl b/tests/sentinel.tcl index 4b49ed4c7..f1c1669ac 100644 --- a/tests/sentinel.tcl +++ b/tests/sentinel.tcl @@ -53,8 +53,8 @@ proc spawn_instance {type base_port count} { } else { set prgname redis-sentinel } - set sentinel_pid [exec ../../src/${prgname} $cfgfile &] - lappend ::pids $sentinel_pid + set pid [exec ../../src/${prgname} $cfgfile &] + lappend ::pids $pid # Check availability if {[server_is_up 127.0.0.1 $port 100] == 0} { @@ -63,7 +63,7 @@ proc spawn_instance {type base_port count} { # Push the instance into the right list lappend ::${type}_instances [list \ - pid $sentinel_pid \ + pid $pid \ host 127.0.0.1 \ port $port \ link [redis 127.0.0.1 $port] \ @@ -212,6 +212,13 @@ proc get_instance_attrib {type id attrib} { dict get [lindex [set ::${type}_instances] $id] $attrib } +# Set the specific attribute of the specified instance type, id. +proc set_instance_attrib {type id attrib newval} { + set d [lindex [set ::${type}_instances] $id] + dict set d $attrib $newval + lset ::${type}_instances $id $d +} + # Create a master-slave cluster of the given number of total instances. # The first instance "0" is the master, all others are configured as # slaves. @@ -219,8 +226,8 @@ proc create_redis_master_slave_cluster n { foreach_redis_id id { if {$id == 0} { # Our master. - R $id flushall R $id slaveof no one + R $id flushall } elseif {$id < $n} { R $id slaveof [get_instance_attrib redis 0 host] \ [get_instance_attrib redis 0 port] @@ -246,6 +253,47 @@ proc get_instance_id_by_port {type port} { fail "Instance $type port $port not found." } +# Kill an instance of the specified type/id with SIGKILL. +# This function will mark the instance PID as -1 to remember that this instance +# is no longer running and will remove its PID from the list of pids that +# we kill at cleanup. +# +# The instance can be restarted with restart-instance. +proc kill_instance {type id} { + set pid [get_instance_attrib $type $id pid] + exec kill -9 $pid + set_instance_attrib $type $id pid -1 + set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance + + # Remove the PID from the list of pids to kill at exit. + set ::pids [lsearch -all -inline -not -exact $::pids $pid] +} + +# Restart an instance previously killed by kill_instance +proc restart_instance {type id} { + set dirname "${type}_${id}" + set cfgfile [file join $dirname $type.conf] + set port [get_instance_attrib $type $id port] + + # Execute the instance with its old setup and append the new pid + # file for cleanup. + if {$type eq "redis"} { + set prgname redis-server + } else { + set prgname redis-sentinel + } + set pid [exec ../../src/${prgname} $cfgfile &] + lappend ::pids $pid + + # Check that the instance is running + if {[server_is_up 127.0.0.1 $port 100] == 0} { + abort_sentinel_test "Problems starting $type #$j: ping timeout" + } + + # Connect with it with a fresh link + set_instance_attrib $type $id link [redis 127.0.0.1 $port] +} + if {[catch main e]} { puts $::errorInfo cleanup diff --git a/tests/support/test.tcl b/tests/support/test.tcl index 96b529d7a..bf2cb0e2f 100644 --- a/tests/support/test.tcl +++ b/tests/support/test.tcl @@ -53,11 +53,17 @@ proc assert_type {type key} { # executed. proc wait_for_condition {maxtries delay e _else_ elsescript} { while {[incr maxtries -1] >= 0} { - if {[uplevel 1 [list expr $e]]} break + set errcode [catch {uplevel 1 [list expr $e]} result] + if {$errcode == 0} { + if {$result} break + } else { + return -code $errcode $result + } after $delay } if {$maxtries == -1} { - uplevel 1 $elsescript + set errcode [catch [uplevel 1 $elsescript] result] + return -code $errcode $result } }