Fix WAITAOF mix-use last_offset and last_numreplicas (#11922)

There be a situation that satisfies WAIT, and then wrongly unblock
WAITAOF because we mix-use last_offset and last_numreplicas.

We update last_offset and last_numreplicas only when the condition
matches. i.e. output of either replicationCountAOFAcksByOffset or
replicationCountAcksByOffset is right.

In this case, we need to have separate last_ variables for each of
them. Added a last_aof_offset and last_aof_numreplicas for WAITAOF.

WAITAOF was added in #11713. Found while coding #11917.
A Test was added to validate that case.
This commit is contained in:
Binbin 2023-03-16 00:16:16 +08:00 committed by GitHub
parent 72f5aad014
commit 58285a6e92
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 86 additions and 5 deletions

View File

@ -3612,7 +3612,9 @@ void unblockClientWaitingReplicas(client *c) {
* since we received enough ACKs from slaves. */
void processClientsWaitingReplicas(void) {
long long last_offset = 0;
long long last_aof_offset = 0;
int last_numreplicas = 0;
int last_aof_numreplicas = 0;
listIter li;
listNode *ln;
@ -3628,7 +3630,7 @@ void processClientsWaitingReplicas(void) {
if (is_wait_aof && c->bstate.numlocal && !server.aof_enabled) {
addReplyError(c, "WAITAOF cannot be used when numlocal is set but appendonly is disabled.");
unblockClient(c);
return;
continue;
}
/* Every time we find a client that is satisfied for a given
@ -3636,10 +3638,14 @@ void processClientsWaitingReplicas(void) {
* may be unblocked without calling replicationCountAcksByOffset()
* or calling replicationCountAOFAcksByOffset()
* if the requested offset / replicas were equal or less. */
if (last_offset && last_offset >= c->bstate.reploffset &&
if (!is_wait_aof && last_offset && last_offset >= c->bstate.reploffset &&
last_numreplicas >= c->bstate.numreplicas)
{
numreplicas = last_numreplicas;
} else if (is_wait_aof && last_aof_offset && last_aof_offset >= c->bstate.reploffset &&
last_aof_numreplicas >= c->bstate.numreplicas)
{
numreplicas = last_aof_numreplicas;
} else {
numreplicas = is_wait_aof ?
replicationCountAOFAcksByOffset(c->bstate.reploffset) :
@ -3648,8 +3654,13 @@ void processClientsWaitingReplicas(void) {
/* Check if the number of replicas is satisfied. */
if (numreplicas < c->bstate.numreplicas) continue;
last_offset = c->bstate.reploffset;
last_numreplicas = numreplicas;
if (is_wait_aof) {
last_aof_offset = c->bstate.reploffset;
last_aof_numreplicas = numreplicas;
} else {
last_offset = c->bstate.reploffset;
last_numreplicas = numreplicas;
}
}
/* Check if the local constraint of WAITAOF is served */

View File

@ -1009,7 +1009,7 @@ typedef struct blockingState {
/* BLOCKED_LIST, BLOCKED_ZSET and BLOCKED_STREAM or any other Keys related blocking */
dict *keys; /* The keys we are blocked on */
/* BLOCKED_WAIT */
/* BLOCKED_WAIT and BLOCKED_WAITAOF */
int numreplicas; /* Number of replicas we are waiting for ACK. */
int numlocal; /* Indication if WAITAOF is waiting for local fsync. */
long long reploffset; /* Replication offset to reach. */

View File

@ -360,3 +360,73 @@ tags {"wait aof network external:skip"} {
}
}
}
start_server {tags {"failover external:skip"}} {
start_server {} {
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
set replica1 [srv -1 client]
set replica1_pid [srv -1 pid]
set replica2 [srv -2 client]
test {setup replication for following tests} {
$replica1 replicaof $master_host $master_port
$replica2 replicaof $master_host $master_port
wait_for_sync $replica1
wait_for_sync $replica2
}
test {WAIT and WAITAOF replica multiple clients unblock - reuse last result} {
set rd [redis_deferring_client]
set rd2 [redis_deferring_client]
$master config set appendonly yes
$replica1 config set appendonly yes
$replica2 config set appendonly yes
$master config set appendfsync always
$replica1 config set appendfsync no
$replica2 config set appendfsync no
waitForBgrewriteaof $master
waitForBgrewriteaof $replica1
waitForBgrewriteaof $replica2
exec kill -SIGSTOP $replica1_pid
$rd incr foo
$rd read
$rd waitaof 0 1 0
# rd2 has a newer repl_offset
$rd2 incr foo
$rd2 read
$rd2 wait 2 0
wait_for_blocked_clients_count 2
exec kill -SIGCONT $replica1_pid
# WAIT will unblock the client first.
assert_equal [$rd2 read] {2}
# Make $replica1 catch up the repl_aof_off, then WAITAOF will unblock the client.
$replica1 config set appendfsync always
$master incr foo
assert_equal [$rd read] {1 1}
$rd ping
assert_equal [$rd read] {PONG}
$rd2 ping
assert_equal [$rd2 read] {PONG}
$rd close
$rd2 close
}
}
}
}