Sentinel: fix no reconnect after auth-pass is changed (#10400)

When updating SENTINEL with master’s new password (command:
`SENTINEL SET mymaster auth-pass some-new-password`), 
sentinel might still keep the old connection and avoid reconnecting 
with the new password. This is because of wrong logic that traces 
the last ping (pong) time to servers. In fact it worked fine until 8631e64 
changed the condition to send ping. To resolve it with minimal risk, 
let’s disconnect master and replicas once changing password/user. 

Based on earlier work of yz1509.
This commit is contained in:
Moti Cohen 2022-03-13 10:13:47 +02:00 committed by GitHub
parent b576fbc474
commit a6bf509810
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 187 additions and 8 deletions

View File

@ -1134,6 +1134,27 @@ int sentinelTryConnectionSharing(sentinelRedisInstance *ri) {
return C_ERR;
}
/* Disconnect the relevant master and its replicas. */
void dropInstanceConnections(sentinelRedisInstance *ri) {
serverAssert(ri->flags & SRI_MASTER);
/* Disconnect with the master. */
instanceLinkCloseConnection(ri->link, ri->link->cc);
instanceLinkCloseConnection(ri->link, ri->link->pc);
/* Disconnect with all replicas. */
dictIterator *di;
dictEntry *de;
sentinelRedisInstance *repl_ri;
di = dictGetIterator(ri->slaves);
while ((de = dictNext(di)) != NULL) {
repl_ri = dictGetVal(de);
instanceLinkCloseConnection(repl_ri->link, repl_ri->link->cc);
instanceLinkCloseConnection(repl_ri->link, repl_ri->link->pc);
}
dictReleaseIterator(di);
}
/* Drop all connections to other sentinels. Returns the number of connections
* dropped.*/
int sentinelDropConnections(void) {
@ -4297,6 +4318,7 @@ void sentinelSetCommand(client *c) {
char *value = c->argv[++j]->ptr;
sdsfree(ri->auth_pass);
ri->auth_pass = strlen(value) ? sdsnew(value) : NULL;
dropInstanceConnections(ri);
changes++;
redacted = 1;
} else if (!strcasecmp(option,"auth-user") && moreargs > 0) {
@ -4304,6 +4326,7 @@ void sentinelSetCommand(client *c) {
char *value = c->argv[++j]->ptr;
sdsfree(ri->auth_user);
ri->auth_user = strlen(value) ? sdsnew(value) : NULL;
dropInstanceConnections(ri);
changes++;
} else if (!strcasecmp(option,"quorum") && moreargs > 0) {
/* quorum <count> */

View File

@ -2,6 +2,158 @@
source "../tests/includes/init-tests.tcl"
set num_sentinels [llength $::sentinel_instances]
set ::user "testuser"
set ::password "secret"
proc server_set_password {} {
foreach_redis_id id {
assert_equal {OK} [R $id CONFIG SET requirepass $::password]
assert_equal {OK} [R $id AUTH $::password]
assert_equal {OK} [R $id CONFIG SET masterauth $::password]
}
}
proc server_reset_password {} {
foreach_redis_id id {
assert_equal {OK} [R $id CONFIG SET requirepass ""]
assert_equal {OK} [R $id CONFIG SET masterauth ""]
}
}
proc server_set_acl {id} {
assert_equal {OK} [R $id ACL SETUSER $::user on >$::password allchannels +@all ]
assert_equal {OK} [R $id ACL SETUSER default off]
R $id CLIENT KILL USER default SKIPME no
assert_equal {OK} [R $id AUTH $::user $::password]
assert_equal {OK} [R $id CONFIG SET masteruser $::user]
assert_equal {OK} [R $id CONFIG SET masterauth $::password]
}
proc server_reset_acl {id} {
assert_equal {OK} [R $id ACL SETUSER default on]
assert_equal {1} [R $id ACL DELUSER $::user]
assert_equal {OK} [R $id CONFIG SET masteruser ""]
assert_equal {OK} [R $id CONFIG SET masterauth ""]
}
proc verify_sentinel_connect_replicas {id} {
foreach replica [S $id SENTINEL REPLICAS mymaster] {
if {[string match "*disconnected*" [dict get $replica flags]]} {
return 0
}
}
return 1
}
proc wait_for_sentinels_connect_servers { {is_connect 1} } {
foreach_sentinel_id id {
wait_for_condition 1000 50 {
[string match "*disconnected*" [dict get [S $id SENTINEL MASTER mymaster] flags]] != $is_connect
} else {
fail "At least some sentinel can't connect to master"
}
wait_for_condition 1000 50 {
[verify_sentinel_connect_replicas $id] == $is_connect
} else {
fail "At least some sentinel can't connect to replica"
}
}
}
test "Sentinels (re)connection following SENTINEL SET mymaster auth-pass" {
# 3 types of sentinels to test:
# (re)started while master changed pwd. Manage to connect only after setting pwd
set sent2re 0
# (up)dated in advance with master new password
set sent2up 1
# (un)touched. Yet manage to maintain (old) connection
set sent2un 2
wait_for_sentinels_connect_servers
kill_instance sentinel $sent2re
assert_equal {OK} [S $sent2up SENTINEL SET mymaster auth-pass $::password]
server_set_password
restart_instance sentinel $sent2re
# Verify sentinel that restarted failed to connect master
if {![string match "*disconnected*" [dict get [S $sent2re SENTINEL MASTER mymaster] flags]]} {
fail "Expected to be disconnected from master due to wrong password"
}
# Update restarted sentinel with master password
assert_equal {OK} [S $sent2re SENTINEL SET mymaster auth-pass $::password]
# All sentinels expected to connect successfully
wait_for_sentinels_connect_servers
# remove requirepass and verify sentinels manage to connect servers
server_reset_password
wait_for_sentinels_connect_servers
# Sanity check
verify_sentinel_auto_discovery
}
test "Sentinels (re)connection following master ACL change" {
# 2 types of sentinels to test:
# (re)started while master changed pwd. Manage to connect only after setting pwd
set sent2re 0
# (up)dated in advance with master new password
set sent2up 1
# (un)touched. Note,sentinel that kept old connection with master and didn't
# set new ACL password won't persist ACL pwd change, unlike legacy auth-pass
set sent2un 2
wait_for_sentinels_connect_servers
# kill sentinel 'sent2re' and restart it after
kill_instance sentinel $sent2re
# Update sentinel 'sent2up' with new user and pwd
assert_equal {OK} [S $sent2up SENTINEL SET mymaster auth-user $::user]
assert_equal {OK} [S $sent2up SENTINEL SET mymaster auth-pass $::password]
foreach_redis_id id {
server_set_acl $id
}
restart_instance sentinel $sent2re
# Verify sentinel that restarted failed to connect master
if {![string match "*disconnected*" [dict get [S $sent2re SENTINEL MASTER mymaster] flags]]} {
fail "Expected: Sentinel to be disconnected from master due to wrong password"
}
# Verify sentinel with updated password managed to connect
if {[string match "*disconnected*" [dict get [S $sent2up SENTINEL MASTER mymaster] flags]]} {
fail "Expected: Sentinel to be connected to master"
}
# Verify sentinel untouched gets failed to connect master
if {![string match "*disconnected*" [dict get [S $sent2un SENTINEL MASTER mymaster] flags]]} {
fail "Expected: Sentinel to be disconnected from master due to wrong password"
}
# Now update all sentinels with new password
foreach_sentinel_id id {
assert_equal {OK} [S $id SENTINEL SET mymaster auth-user $::user]
assert_equal {OK} [S $id SENTINEL SET mymaster auth-pass $::password]
}
# All sentinels expected to connect successfully
wait_for_sentinels_connect_servers
# remove requirepass and verify sentinels manage to connect servers
foreach_redis_id id {
server_reset_acl $id
}
wait_for_sentinels_connect_servers
# Sanity check
verify_sentinel_auto_discovery
}
test "Set parameters in normal case" {
set info [S 0 SENTINEL master mymaster]

View File

@ -12,6 +12,17 @@ proc restart_killed_instances {} {
}
}
proc verify_sentinel_auto_discovery {} {
set sentinels [llength $::sentinel_instances]
foreach_sentinel_id id {
wait_for_condition 1000 50 {
[dict get [S $id SENTINEL MASTER mymaster] num-other-sentinels] == ($sentinels-1)
} else {
fail "At least some sentinel can't detect some other sentinel"
}
}
}
test "(init) Restart killed instances" {
restart_killed_instances
}
@ -59,14 +70,7 @@ test "(init) Sentinels can talk with the master" {
}
test "(init) Sentinels are able to auto-discover other sentinels" {
set sentinels [llength $::sentinel_instances]
foreach_sentinel_id id {
wait_for_condition 1000 50 {
[dict get [S $id SENTINEL MASTER mymaster] num-other-sentinels] == ($sentinels-1)
} else {
fail "At least some sentinel can't detect some other sentinel"
}
}
verify_sentinel_auto_discovery
}
test "(init) Sentinels are able to auto-discover slaves" {