From db4153645458bbc23c0394a8974b734825a656e5 Mon Sep 17 00:00:00 2001 From: Wen Hui Date: Thu, 29 Jul 2021 05:32:28 -0400 Subject: [PATCH] Remove duplicate zero-port sentinels (#9240) The issue is that when a sentinel with the same address and IP is turned on with a different runid, its port is set to 0 but it is still present in the dictionary master->sentinels which contain all the sentinels for a master. This causes a problem when we do INFO SENTINEL because it takes the size of the dictionary of sentinels. This might also cause a problem for failover if enough sentinels have their port set to 0 since the number of voters in failover is also determined by the size of the dictionary of sentinels. This commits removes the sentinels with the port set to zero from the dictionary of sentinels. Fixes #8786 --- src/sentinel.c | 4 ++++ tests/sentinel/tests/11-port-0.tcl | 33 ++++++++++++++++++++++++++++++ tests/support/util.tcl | 13 ++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 tests/sentinel/tests/11-port-0.tcl diff --git a/src/sentinel.c b/src/sentinel.c index 038240b27..40f9a2ac4 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -1212,6 +1212,10 @@ int sentinelUpdateSentinelAddressInAllMasters(sentinelRedisInstance *ri) { if (match->link->pc != NULL) instanceLinkCloseConnection(match->link,match->link->pc); + /* Remove any sentinel with port number set to 0 */ + if (match->addr->port == 0) + dictDelete(master->sentinels,match->name); + if (match == ri) continue; /* Address already updated for it. */ /* Update the address of the matching Sentinel by copying the address diff --git a/tests/sentinel/tests/11-port-0.tcl b/tests/sentinel/tests/11-port-0.tcl new file mode 100644 index 000000000..e7bfdcc73 --- /dev/null +++ b/tests/sentinel/tests/11-port-0.tcl @@ -0,0 +1,33 @@ +source "../tests/includes/init-tests.tcl" + +test "Start/Stop sentinel on same port with a different runID should not change the total number of sentinels" { + set sentinel_id [expr $::instances_count-1] + # Kill sentinel instance + kill_instance sentinel $sentinel_id + + # Delete line with myid in sentinels config file + set orgfilename [file join "sentinel_$sentinel_id" "sentinel.conf"] + set tmpfilename "sentinel.conf_tmp" + set dirname "sentinel_$sentinel_id" + + delete_lines_with_pattern $orgfilename $tmpfilename "myid" + + # Get count of total sentinels + set a [S 0 SENTINEL master mymaster] + set original_count [lindex $a 33] + + # Restart sentinel with the modified config file + set pid [exec_instance "sentinel" $dirname $orgfilename] + lappend ::pids $pid + + after 5000 + + # Get new count of total sentinel + set b [S 0 SENTINEL master mymaster] + set curr_count [lindex $b 33] + + # If the count is not the same then fail the test + if {$original_count != $curr_count} { + fail "Sentinel count is incorrect, original count being $original_count and current count is $curr_count" + } +} diff --git a/tests/support/util.tcl b/tests/support/util.tcl index dd1a75801..69fa46c2a 100644 --- a/tests/support/util.tcl +++ b/tests/support/util.tcl @@ -865,3 +865,16 @@ proc config_set {param value {options {}}} { } } } + +proc delete_lines_with_pattern {filename tmpfilename pattern} { + set fh_in [open $filename r] + set fh_out [open $tmpfilename w] + while {[gets $fh_in line] != -1} { + if {![regexp $pattern $line]} { + puts $fh_out $line + } + } + close $fh_in + close $fh_out + file rename -force $tmpfilename $filename +}