client struct: lazy init components and optimize struct layout (#1405)
# Refactor client structure to use modular data components ## Current State The client structure allocates memory for replication / pubsub / multi-keys / module / blocked data for every client, despite these features being used by only a small subset of clients. In addition the current field layout in the client struct is suboptimal, with poor alignment and unnecessary padding between fields, leading to a larger than necessary memory footprint of 896 bytes per client. Furthermore, fields that are frequently accessed together during operations are scattered throughout the struct, resulting in poor cache locality. ## This PR's Change 1. Lazy Initialization - **Components are only allocated when first used:** - PubSubData: Created on first SUBSCRIBE/PUBLISH operation - ReplicationData: Initialized only for replica connections - ModuleData: Allocated when module interaction begins - BlockingState: Created when first blocking command is issued - MultiState: Initialized on MULTI command 2. Memory Layout Optimization: - Grouped related fields for better locality - Moved rarely accessed fields (e.g., client->name) to struct end - Optimized field alignment to eliminate padding 3. Additional changes: - Moved watched_keys to be static allocated in the `mstate` struct - Relocated replication init logic to replication.c ### Key Benefits - **Efficient Memory Usage:** - 45% smaller base client structure - Basic clients now use 528 bytes (down from 896). - Better memory locality for related operations - Performance improvement in high throughput scenarios. No performance regressions in other cases. ### Performance Impact Tested with 650 clients and 512 bytes values. #### Single Thread Performance | Operation | Dataset | New (ops/sec) | Old (ops/sec) | Change % | |------------|---------|---------------|---------------|-----------| | SET | 1 key | 261,799 | 258,261 | +1.37% | | SET | 3M keys | 209,134 | ~209,000 | ~0% | | GET | 1 key | 281,564 | 277,965 | +1.29% | | GET | 3M keys | 231,158 | 228,410 | +1.20% | #### 8 IO Threads Performance | Operation | Dataset | New (ops/sec) | Old (ops/sec) | Change % | |------------|---------|---------------|---------------|-----------| | SET | 1 key | 1,331,578 | 1,331,626 | -0.00% | | SET | 3M keys | 1,254,441 | 1,152,645 | +8.83% | | GET | 1 key | 1,293,149 | 1,289,503 | +0.28% | | GET | 3M keys | 1,152,898 | 1,101,791 | +4.64% | #### Pipeline Performance (3M keys) | Operation | Pipeline Size | New (ops/sec) | Old (ops/sec) | Change % | |-----------|--------------|---------------|---------------|-----------| | SET | 10 | 548,964 | 538,498 | +1.94% | | SET | 20 | 606,148 | 594,872 | +1.89% | | SET | 30 | 631,122 | 616,606 | +2.35% | | GET | 10 | 628,482 | 624,166 | +0.69% | | GET | 20 | 687,371 | 681,659 | +0.84% | | GET | 30 | 725,855 | 721,102 | +0.66% | ### Observations: 1. Single-threaded operations show consistent improvements (1-1.4%) 2. Multi-threaded performance shows significant gains for large datasets: - SET with 3M keys: +8.83% improvement - GET with 3M keys: +4.64% improvement 3. Pipeline operations show consistent improvements: - SET operations: +1.89% to +2.35% - GET operations: +0.66% to +0.84% 4. No performance regressions observed in any test scenario Related issue:https://github.com/valkey-io/valkey/issues/761 --------- Signed-off-by: Uri Yagelnik <uriy@amazon.com> Signed-off-by: uriyage <78144248+uriyage@users.noreply.github.com> Co-authored-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
This commit is contained in:
parent
dc4628d444
commit
6c09eea2bc
@ -1960,7 +1960,7 @@ int ACLShouldKillPubsubClient(client *c, list *upcoming) {
|
||||
|
||||
if (getClientType(c) == CLIENT_TYPE_PUBSUB) {
|
||||
/* Check for pattern violations. */
|
||||
dictIterator *di = dictGetIterator(c->pubsub_patterns);
|
||||
dictIterator *di = dictGetIterator(c->pubsub_data->pubsub_patterns);
|
||||
dictEntry *de;
|
||||
while (!kill && ((de = dictNext(di)) != NULL)) {
|
||||
o = dictGetKey(de);
|
||||
@ -1972,7 +1972,7 @@ int ACLShouldKillPubsubClient(client *c, list *upcoming) {
|
||||
/* Check for channel violations. */
|
||||
if (!kill) {
|
||||
/* Check for global channels violation. */
|
||||
di = dictGetIterator(c->pubsub_channels);
|
||||
di = dictGetIterator(c->pubsub_data->pubsub_channels);
|
||||
|
||||
while (!kill && ((de = dictNext(di)) != NULL)) {
|
||||
o = dictGetKey(de);
|
||||
@ -1983,7 +1983,7 @@ int ACLShouldKillPubsubClient(client *c, list *upcoming) {
|
||||
}
|
||||
if (!kill) {
|
||||
/* Check for shard channels violation. */
|
||||
di = dictGetIterator(c->pubsubshard_channels);
|
||||
di = dictGetIterator(c->pubsub_data->pubsubshard_channels);
|
||||
while (!kill && ((de = dictNext(di)) != NULL)) {
|
||||
o = dictGetKey(de);
|
||||
int res = ACLCheckChannelAgainstList(upcoming, o->ptr, sdslen(o->ptr), 0);
|
||||
|
@ -1376,7 +1376,8 @@ struct client *createAOFClient(void) {
|
||||
|
||||
/* We set the fake client as a replica waiting for the synchronization
|
||||
* so that the server will not try to send replies to this client. */
|
||||
c->repl_state = REPLICA_STATE_WAIT_BGSAVE_START;
|
||||
initClientReplicationData(c);
|
||||
c->repl_data->repl_state = REPLICA_STATE_WAIT_BGSAVE_START;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
127
src/blocked.c
127
src/blocked.c
@ -75,16 +75,25 @@ static void moduleUnblockClientOnKey(client *c, robj *key);
|
||||
static void releaseBlockedEntry(client *c, dictEntry *de, int remove_key);
|
||||
|
||||
void initClientBlockingState(client *c) {
|
||||
c->bstate.btype = BLOCKED_NONE;
|
||||
c->bstate.timeout = 0;
|
||||
c->bstate.unblock_on_nokey = 0;
|
||||
c->bstate.keys = dictCreate(&objectKeyHeapPointerValueDictType);
|
||||
c->bstate.numreplicas = 0;
|
||||
c->bstate.numlocal = 0;
|
||||
c->bstate.reploffset = 0;
|
||||
c->bstate.generic_blocked_list_node = NULL;
|
||||
c->bstate.module_blocked_handle = NULL;
|
||||
c->bstate.async_rm_call_handle = NULL;
|
||||
if (c->bstate) return;
|
||||
c->bstate = zmalloc(sizeof(blockingState));
|
||||
c->bstate->btype = BLOCKED_NONE;
|
||||
c->bstate->timeout = 0;
|
||||
c->bstate->unblock_on_nokey = 0;
|
||||
c->bstate->keys = dictCreate(&objectKeyHeapPointerValueDictType);
|
||||
c->bstate->numreplicas = 0;
|
||||
c->bstate->numlocal = 0;
|
||||
c->bstate->reploffset = 0;
|
||||
c->bstate->generic_blocked_list_node = NULL;
|
||||
c->bstate->module_blocked_handle = NULL;
|
||||
c->bstate->async_rm_call_handle = NULL;
|
||||
}
|
||||
|
||||
void freeClientBlockingState(client *c) {
|
||||
if (!c->bstate) return;
|
||||
dictRelease(c->bstate->keys);
|
||||
zfree(c->bstate);
|
||||
c->bstate = NULL;
|
||||
}
|
||||
|
||||
/* Block a client for the specific operation type. Once the CLIENT_BLOCKED
|
||||
@ -94,8 +103,10 @@ void blockClient(client *c, int btype) {
|
||||
/* Primary client should never be blocked unless pause or module */
|
||||
serverAssert(!(c->flag.primary && btype != BLOCKED_MODULE && btype != BLOCKED_POSTPONE));
|
||||
|
||||
initClientBlockingState(c);
|
||||
|
||||
c->flag.blocked = 1;
|
||||
c->bstate.btype = btype;
|
||||
c->bstate->btype = btype;
|
||||
if (!c->flag.module)
|
||||
server.blocked_clients++; /* We count blocked client stats on regular clients and not on module clients */
|
||||
server.blocked_clients_by_type[btype]++;
|
||||
@ -199,18 +210,18 @@ void queueClientForReprocessing(client *c) {
|
||||
/* Unblock a client calling the right function depending on the kind
|
||||
* of operation the client is blocking for. */
|
||||
void unblockClient(client *c, int queue_for_reprocessing) {
|
||||
if (c->bstate.btype == BLOCKED_LIST || c->bstate.btype == BLOCKED_ZSET || c->bstate.btype == BLOCKED_STREAM) {
|
||||
if (c->bstate->btype == BLOCKED_LIST || c->bstate->btype == BLOCKED_ZSET || c->bstate->btype == BLOCKED_STREAM) {
|
||||
unblockClientWaitingData(c);
|
||||
} else if (c->bstate.btype == BLOCKED_WAIT) {
|
||||
} else if (c->bstate->btype == BLOCKED_WAIT) {
|
||||
unblockClientWaitingReplicas(c);
|
||||
} else if (c->bstate.btype == BLOCKED_MODULE) {
|
||||
} else if (c->bstate->btype == BLOCKED_MODULE) {
|
||||
if (moduleClientIsBlockedOnKeys(c)) unblockClientWaitingData(c);
|
||||
unblockClientFromModule(c);
|
||||
} else if (c->bstate.btype == BLOCKED_POSTPONE) {
|
||||
serverAssert(c->bstate.postponed_list_node);
|
||||
listDelNode(server.postponed_clients, c->bstate.postponed_list_node);
|
||||
c->bstate.postponed_list_node = NULL;
|
||||
} else if (c->bstate.btype == BLOCKED_SHUTDOWN) {
|
||||
} else if (c->bstate->btype == BLOCKED_POSTPONE) {
|
||||
serverAssert(c->bstate->postponed_list_node);
|
||||
listDelNode(server.postponed_clients, c->bstate->postponed_list_node);
|
||||
c->bstate->postponed_list_node = NULL;
|
||||
} else if (c->bstate->btype == BLOCKED_SHUTDOWN) {
|
||||
/* No special cleanup. */
|
||||
} else {
|
||||
serverPanic("Unknown btype in unblockClient().");
|
||||
@ -218,7 +229,7 @@ void unblockClient(client *c, int queue_for_reprocessing) {
|
||||
|
||||
/* Reset the client for a new query, unless the client has pending command to process
|
||||
* or in case a shutdown operation was canceled and we are still in the processCommand sequence */
|
||||
if (!c->flag.pending_command && c->bstate.btype != BLOCKED_SHUTDOWN) {
|
||||
if (!c->flag.pending_command && c->bstate->btype != BLOCKED_SHUTDOWN) {
|
||||
/* Clients that are not blocked on keys are not reprocessed so we must
|
||||
* call reqresAppendResponse here (for clients blocked on key,
|
||||
* unblockClientOnKey is called, which eventually calls processCommand,
|
||||
@ -229,12 +240,12 @@ void unblockClient(client *c, int queue_for_reprocessing) {
|
||||
|
||||
/* We count blocked client stats on regular clients and not on module clients */
|
||||
if (!c->flag.module) server.blocked_clients--;
|
||||
server.blocked_clients_by_type[c->bstate.btype]--;
|
||||
server.blocked_clients_by_type[c->bstate->btype]--;
|
||||
/* Clear the flags, and put the client in the unblocked list so that
|
||||
* we'll process new commands in its query buffer ASAP. */
|
||||
c->flag.blocked = 0;
|
||||
c->bstate.btype = BLOCKED_NONE;
|
||||
c->bstate.unblock_on_nokey = 0;
|
||||
c->bstate->btype = BLOCKED_NONE;
|
||||
c->bstate->unblock_on_nokey = 0;
|
||||
removeClientFromTimeoutTable(c);
|
||||
if (queue_for_reprocessing) queueClientForReprocessing(c);
|
||||
}
|
||||
@ -243,22 +254,22 @@ void unblockClient(client *c, int queue_for_reprocessing) {
|
||||
* send it a reply of some kind. After this function is called,
|
||||
* unblockClient() will be called with the same client as argument. */
|
||||
void replyToBlockedClientTimedOut(client *c) {
|
||||
if (c->bstate.btype == BLOCKED_LIST || c->bstate.btype == BLOCKED_ZSET || c->bstate.btype == BLOCKED_STREAM) {
|
||||
if (c->bstate->btype == BLOCKED_LIST || c->bstate->btype == BLOCKED_ZSET || c->bstate->btype == BLOCKED_STREAM) {
|
||||
addReplyNullArray(c);
|
||||
updateStatsOnUnblock(c, 0, 0, 0);
|
||||
} else if (c->bstate.btype == BLOCKED_WAIT) {
|
||||
} else if (c->bstate->btype == BLOCKED_WAIT) {
|
||||
if (c->cmd->proc == waitCommand) {
|
||||
addReplyLongLong(c, replicationCountAcksByOffset(c->bstate.reploffset));
|
||||
addReplyLongLong(c, replicationCountAcksByOffset(c->bstate->reploffset));
|
||||
} else if (c->cmd->proc == waitaofCommand) {
|
||||
addReplyArrayLen(c, 2);
|
||||
addReplyLongLong(c, server.fsynced_reploff >= c->bstate.reploffset);
|
||||
addReplyLongLong(c, replicationCountAOFAcksByOffset(c->bstate.reploffset));
|
||||
addReplyLongLong(c, server.fsynced_reploff >= c->bstate->reploffset);
|
||||
addReplyLongLong(c, replicationCountAOFAcksByOffset(c->bstate->reploffset));
|
||||
} else if (c->cmd->proc == clusterCommand) {
|
||||
addReplyErrorObject(c, shared.noreplicaserr);
|
||||
} else {
|
||||
serverPanic("Unknown wait command %s in replyToBlockedClientTimedOut().", c->cmd->declared_name);
|
||||
}
|
||||
} else if (c->bstate.btype == BLOCKED_MODULE) {
|
||||
} else if (c->bstate->btype == BLOCKED_MODULE) {
|
||||
moduleBlockedClientTimedOut(c, 0);
|
||||
} else {
|
||||
serverPanic("Unknown btype in replyToBlockedClientTimedOut().");
|
||||
@ -274,7 +285,7 @@ void replyToClientsBlockedOnShutdown(void) {
|
||||
listRewind(server.clients, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *c = listNodeValue(ln);
|
||||
if (c->flag.blocked && c->bstate.btype == BLOCKED_SHUTDOWN) {
|
||||
if (c->flag.blocked && c->bstate->btype == BLOCKED_SHUTDOWN) {
|
||||
addReplyError(c, "Errors trying to SHUTDOWN. Check logs.");
|
||||
unblockClient(c, 1);
|
||||
}
|
||||
@ -301,7 +312,7 @@ void disconnectAllBlockedClients(void) {
|
||||
* command processing will start from scratch, and the command will
|
||||
* be either executed or rejected. (unlike LIST blocked clients for
|
||||
* which the command is already in progress in a way. */
|
||||
if (c->bstate.btype == BLOCKED_POSTPONE) continue;
|
||||
if (c->bstate->btype == BLOCKED_POSTPONE) continue;
|
||||
|
||||
unblockClientOnError(c, "-UNBLOCKED force unblock from blocking operation, "
|
||||
"instance state changed (master -> replica?)");
|
||||
@ -386,15 +397,17 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo
|
||||
list *l;
|
||||
int j;
|
||||
|
||||
initClientBlockingState(c);
|
||||
|
||||
if (!c->flag.reprocessing_command) {
|
||||
/* If the client is re-processing the command, we do not set the timeout
|
||||
* because we need to retain the client's original timeout. */
|
||||
c->bstate.timeout = timeout;
|
||||
c->bstate->timeout = timeout;
|
||||
}
|
||||
|
||||
for (j = 0; j < numkeys; j++) {
|
||||
/* If the key already exists in the dictionary ignore it. */
|
||||
if (!(client_blocked_entry = dictAddRaw(c->bstate.keys, keys[j], NULL))) {
|
||||
if (!(client_blocked_entry = dictAddRaw(c->bstate->keys, keys[j], NULL))) {
|
||||
continue;
|
||||
}
|
||||
incrRefCount(keys[j]);
|
||||
@ -411,7 +424,7 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo
|
||||
l = dictGetVal(db_blocked_existing_entry);
|
||||
}
|
||||
listAddNodeTail(l, c);
|
||||
dictSetVal(c->bstate.keys, client_blocked_entry, listLast(l));
|
||||
dictSetVal(c->bstate->keys, client_blocked_entry, listLast(l));
|
||||
|
||||
/* We need to add the key to blocking_keys_unblock_on_nokey, if the client
|
||||
* wants to be awakened if key is deleted (like XREADGROUP) */
|
||||
@ -425,7 +438,7 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo
|
||||
}
|
||||
}
|
||||
}
|
||||
c->bstate.unblock_on_nokey = unblock_on_nokey;
|
||||
c->bstate->unblock_on_nokey = unblock_on_nokey;
|
||||
/* Currently we assume key blocking will require reprocessing the command.
|
||||
* However in case of modules, they have a different way to handle the reprocessing
|
||||
* which does not require setting the pending command flag */
|
||||
@ -439,15 +452,15 @@ static void unblockClientWaitingData(client *c) {
|
||||
dictEntry *de;
|
||||
dictIterator *di;
|
||||
|
||||
if (dictSize(c->bstate.keys) == 0) return;
|
||||
if (dictSize(c->bstate->keys) == 0) return;
|
||||
|
||||
di = dictGetIterator(c->bstate.keys);
|
||||
di = dictGetIterator(c->bstate->keys);
|
||||
/* The client may wait for multiple keys, so unblock it for every key. */
|
||||
while ((de = dictNext(di)) != NULL) {
|
||||
releaseBlockedEntry(c, de, 0);
|
||||
}
|
||||
dictReleaseIterator(di);
|
||||
dictEmpty(c->bstate.keys, NULL);
|
||||
dictEmpty(c->bstate->keys, NULL);
|
||||
}
|
||||
|
||||
static blocking_type getBlockedTypeByType(int type) {
|
||||
@ -546,7 +559,7 @@ static void releaseBlockedEntry(client *c, dictEntry *de, int remove_key) {
|
||||
if (listLength(l) == 0) {
|
||||
dictDelete(c->db->blocking_keys, key);
|
||||
dictDelete(c->db->blocking_keys_unblock_on_nokey, key);
|
||||
} else if (c->bstate.unblock_on_nokey) {
|
||||
} else if (c->bstate->unblock_on_nokey) {
|
||||
unblock_on_nokey_entry = dictFind(c->db->blocking_keys_unblock_on_nokey, key);
|
||||
/* it is not possible to have a client blocked on nokey with no matching entry */
|
||||
serverAssertWithInfo(c, key, unblock_on_nokey_entry != NULL);
|
||||
@ -555,7 +568,7 @@ static void releaseBlockedEntry(client *c, dictEntry *de, int remove_key) {
|
||||
dictDelete(c->db->blocking_keys_unblock_on_nokey, key);
|
||||
}
|
||||
}
|
||||
if (remove_key) dictDelete(c->bstate.keys, key);
|
||||
if (remove_key) dictDelete(c->bstate->keys, key);
|
||||
}
|
||||
|
||||
void signalKeyAsReady(serverDb *db, robj *key, int type) {
|
||||
@ -593,9 +606,9 @@ static void handleClientsBlockedOnKey(readyList *rl) {
|
||||
* module is trying to accomplish right now.
|
||||
* 3. In case of XREADGROUP call we will want to unblock on any change in object type
|
||||
* or in case the key was deleted, since the group is no longer valid. */
|
||||
if ((o != NULL && (receiver->bstate.btype == getBlockedTypeByType(o->type))) ||
|
||||
(o != NULL && (receiver->bstate.btype == BLOCKED_MODULE)) || (receiver->bstate.unblock_on_nokey)) {
|
||||
if (receiver->bstate.btype != BLOCKED_MODULE)
|
||||
if ((o != NULL && (receiver->bstate->btype == getBlockedTypeByType(o->type))) ||
|
||||
(o != NULL && (receiver->bstate->btype == BLOCKED_MODULE)) || (receiver->bstate->unblock_on_nokey)) {
|
||||
if (receiver->bstate->btype != BLOCKED_MODULE)
|
||||
unblockClientOnKey(receiver, rl->key);
|
||||
else
|
||||
moduleUnblockClientOnKey(receiver, rl->key);
|
||||
@ -606,16 +619,17 @@ static void handleClientsBlockedOnKey(readyList *rl) {
|
||||
|
||||
/* block a client for replica acknowledgement */
|
||||
void blockClientForReplicaAck(client *c, mstime_t timeout, long long offset, long numreplicas, int numlocal) {
|
||||
c->bstate.timeout = timeout;
|
||||
c->bstate.reploffset = offset;
|
||||
c->bstate.numreplicas = numreplicas;
|
||||
c->bstate.numlocal = numlocal;
|
||||
initClientBlockingState(c);
|
||||
c->bstate->timeout = timeout;
|
||||
c->bstate->reploffset = offset;
|
||||
c->bstate->numreplicas = numreplicas;
|
||||
c->bstate->numlocal = numlocal;
|
||||
listAddNodeHead(server.clients_waiting_acks, c);
|
||||
/* Note that we remember the linked list node where the client is stored,
|
||||
* this way removing the client in unblockClientWaitingReplicas() will not
|
||||
* require a linear scan, but just a constant time operation. */
|
||||
serverAssert(c->bstate.client_waiting_acks_list_node == NULL);
|
||||
c->bstate.client_waiting_acks_list_node = listFirst(server.clients_waiting_acks);
|
||||
serverAssert(c->bstate->client_waiting_acks_list_node == NULL);
|
||||
c->bstate->client_waiting_acks_list_node = listFirst(server.clients_waiting_acks);
|
||||
blockClient(c, BLOCKED_WAIT);
|
||||
}
|
||||
|
||||
@ -623,11 +637,12 @@ void blockClientForReplicaAck(client *c, mstime_t timeout, long long offset, lon
|
||||
* requesting to avoid processing clients commands which will be processed later
|
||||
* when the it is ready to accept them. */
|
||||
void blockPostponeClient(client *c) {
|
||||
c->bstate.timeout = 0;
|
||||
initClientBlockingState(c);
|
||||
c->bstate->timeout = 0;
|
||||
blockClient(c, BLOCKED_POSTPONE);
|
||||
listAddNodeTail(server.postponed_clients, c);
|
||||
serverAssert(c->bstate.postponed_list_node == NULL);
|
||||
c->bstate.postponed_list_node = listLast(server.postponed_clients);
|
||||
serverAssert(c->bstate->postponed_list_node == NULL);
|
||||
c->bstate->postponed_list_node = listLast(server.postponed_clients);
|
||||
/* Mark this client to execute its command */
|
||||
c->flag.pending_command = 1;
|
||||
}
|
||||
@ -644,13 +659,13 @@ void blockClientShutdown(client *c) {
|
||||
static void unblockClientOnKey(client *c, robj *key) {
|
||||
dictEntry *de;
|
||||
|
||||
de = dictFind(c->bstate.keys, key);
|
||||
de = dictFind(c->bstate->keys, key);
|
||||
releaseBlockedEntry(c, de, 1);
|
||||
|
||||
/* Only in case of blocking API calls, we might be blocked on several keys.
|
||||
however we should force unblock the entire blocking keys */
|
||||
serverAssert(c->bstate.btype == BLOCKED_STREAM || c->bstate.btype == BLOCKED_LIST ||
|
||||
c->bstate.btype == BLOCKED_ZSET);
|
||||
serverAssert(c->bstate->btype == BLOCKED_STREAM || c->bstate->btype == BLOCKED_LIST ||
|
||||
c->bstate->btype == BLOCKED_ZSET);
|
||||
|
||||
/* We need to unblock the client before calling processCommandAndResetClient
|
||||
* because it checks the CLIENT_BLOCKED flag */
|
||||
@ -712,7 +727,7 @@ static void moduleUnblockClientOnKey(client *c, robj *key) {
|
||||
* command with timeout reply. */
|
||||
void unblockClientOnTimeout(client *c) {
|
||||
/* The client has been unlocked (in the moduleUnblocked list), return ASAP. */
|
||||
if (c->bstate.btype == BLOCKED_MODULE && isModuleClientUnblocked(c)) return;
|
||||
if (c->bstate->btype == BLOCKED_MODULE && isModuleClientUnblocked(c)) return;
|
||||
|
||||
replyToBlockedClientTimedOut(c);
|
||||
if (c->flag.pending_command) c->flag.pending_command = 0;
|
||||
|
@ -1006,7 +1006,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
|
||||
/* If CLIENT_MULTI flag is not set EXEC is just going to return an
|
||||
* error. */
|
||||
if (!c->flag.multi) return myself;
|
||||
ms = &c->mstate;
|
||||
ms = c->mstate;
|
||||
} else {
|
||||
/* In order to have a single codepath create a fake Multi State
|
||||
* structure if the client is not in MULTI/EXEC state, this way
|
||||
@ -1023,7 +1023,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
|
||||
|
||||
/* Only valid for sharded pubsub as regular pubsub can operate on any node and bypasses this layer. */
|
||||
int pubsubshard_included =
|
||||
(cmd_flags & CMD_PUBSUB) || (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_PUBSUB));
|
||||
(cmd_flags & CMD_PUBSUB) || (c->cmd->proc == execCommand && (c->mstate->cmd_flags & CMD_PUBSUB));
|
||||
|
||||
/* Check that all the keys are in the same hash slot, and obtain this
|
||||
* slot and the node associated. */
|
||||
@ -1176,7 +1176,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
|
||||
* node is a replica and the request is about a hash slot our primary
|
||||
* is serving, we can reply without redirection. */
|
||||
int is_write_command =
|
||||
(cmd_flags & CMD_WRITE) || (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE));
|
||||
(cmd_flags & CMD_WRITE) || (c->cmd->proc == execCommand && (c->mstate->cmd_flags & CMD_WRITE));
|
||||
if ((c->flag.readonly || pubsubshard_included) && !is_write_command && clusterNodeIsReplica(myself) &&
|
||||
clusterNodeGetPrimary(myself) == n) {
|
||||
return myself;
|
||||
@ -1233,14 +1233,14 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
|
||||
* returns 1. Otherwise 0 is returned and no operation is performed. */
|
||||
int clusterRedirectBlockedClientIfNeeded(client *c) {
|
||||
clusterNode *myself = getMyClusterNode();
|
||||
if (c->flag.blocked && (c->bstate.btype == BLOCKED_LIST || c->bstate.btype == BLOCKED_ZSET ||
|
||||
c->bstate.btype == BLOCKED_STREAM || c->bstate.btype == BLOCKED_MODULE)) {
|
||||
if (c->flag.blocked && (c->bstate->btype == BLOCKED_LIST || c->bstate->btype == BLOCKED_ZSET ||
|
||||
c->bstate->btype == BLOCKED_STREAM || c->bstate->btype == BLOCKED_MODULE)) {
|
||||
dictEntry *de;
|
||||
dictIterator *di;
|
||||
|
||||
/* If the client is blocked on module, but not on a specific key,
|
||||
* don't unblock it. */
|
||||
if (c->bstate.btype == BLOCKED_MODULE && !moduleClientIsBlockedOnKeys(c)) return 0;
|
||||
if (c->bstate->btype == BLOCKED_MODULE && !moduleClientIsBlockedOnKeys(c)) return 0;
|
||||
|
||||
/* If the cluster is down, unblock the client with the right error.
|
||||
* If the cluster is configured to allow reads on cluster down, we
|
||||
@ -1252,7 +1252,7 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
|
||||
}
|
||||
|
||||
/* All keys must belong to the same slot, so check first key only. */
|
||||
di = dictGetIterator(c->bstate.keys);
|
||||
di = dictGetIterator(c->bstate->keys);
|
||||
if ((de = dictNext(di)) != NULL) {
|
||||
robj *key = dictGetKey(de);
|
||||
int slot = keyHashSlot((char *)key->ptr, sdslen(key->ptr));
|
||||
|
@ -6574,7 +6574,7 @@ void clusterCommandSetSlot(client *c) {
|
||||
* replication, it would also unlikely win the election.
|
||||
*
|
||||
* And 0x702ff is 7.2.255, we only support new versions in this case. */
|
||||
if (r->repl_state == REPLICA_STATE_ONLINE && r->replica_version > 0x702ff) {
|
||||
if (r->repl_data->repl_state == REPLICA_STATE_ONLINE && r->repl_data->replica_version > 0x702ff) {
|
||||
num_eligible_replicas++;
|
||||
}
|
||||
}
|
||||
|
100
src/module.c
100
src/module.c
@ -651,6 +651,19 @@ void *VM_PoolAlloc(ValkeyModuleCtx *ctx, size_t bytes) {
|
||||
* Helpers for modules API implementation
|
||||
* -------------------------------------------------------------------------- */
|
||||
|
||||
static void initClientModuleData(client *c) {
|
||||
if (c->module_data) return;
|
||||
c->module_data = zcalloc(sizeof(ClientModuleData));
|
||||
}
|
||||
|
||||
void freeClientModuleData(client *c) {
|
||||
if (!c->module_data) return;
|
||||
/* Free the ValkeyModuleBlockedClient held onto for reprocessing if not already freed. */
|
||||
zfree(c->module_data->module_blocked_client);
|
||||
zfree(c->module_data);
|
||||
c->module_data = NULL;
|
||||
}
|
||||
|
||||
void moduleEnqueueLoadModule(sds path, sds *argv, int argc) {
|
||||
int i;
|
||||
struct moduleLoadQueueEntry *loadmod;
|
||||
@ -721,11 +734,11 @@ void moduleReleaseTempClient(client *c) {
|
||||
c->flag.fake = 1;
|
||||
c->user = NULL; /* Root user */
|
||||
c->cmd = c->lastcmd = c->realcmd = c->io_parsed_cmd = NULL;
|
||||
if (c->bstate.async_rm_call_handle) {
|
||||
ValkeyModuleAsyncRMCallPromise *promise = c->bstate.async_rm_call_handle;
|
||||
if (c->bstate && c->bstate->async_rm_call_handle) {
|
||||
ValkeyModuleAsyncRMCallPromise *promise = c->bstate->async_rm_call_handle;
|
||||
promise->c = NULL; /* Remove the client from the promise so it will no longer be possible to abort it. */
|
||||
freeValkeyModuleAsyncRMCallPromise(promise);
|
||||
c->bstate.async_rm_call_handle = NULL;
|
||||
c->bstate->async_rm_call_handle = NULL;
|
||||
}
|
||||
moduleTempClients[moduleTempClientCount++] = c;
|
||||
}
|
||||
@ -897,7 +910,7 @@ static CallReply *moduleParseReply(client *c, ValkeyModuleCtx *ctx) {
|
||||
|
||||
void moduleCallCommandUnblockedHandler(client *c) {
|
||||
ValkeyModuleCtx ctx;
|
||||
ValkeyModuleAsyncRMCallPromise *promise = c->bstate.async_rm_call_handle;
|
||||
ValkeyModuleAsyncRMCallPromise *promise = c->bstate->async_rm_call_handle;
|
||||
serverAssert(promise);
|
||||
ValkeyModule *module = promise->module;
|
||||
if (!promise->on_unblocked) {
|
||||
@ -6569,7 +6582,7 @@ ValkeyModuleCallReply *VM_Call(ValkeyModuleCtx *ctx, const char *cmdname, const
|
||||
.ctx = (ctx->flags & VALKEYMODULE_CTX_AUTO_MEMORY) ? ctx : NULL,
|
||||
};
|
||||
reply = callReplyCreatePromise(promise);
|
||||
c->bstate.async_rm_call_handle = promise;
|
||||
c->bstate->async_rm_call_handle = promise;
|
||||
if (!(call_flags & CMD_CALL_PROPAGATE_AOF)) {
|
||||
/* No need for AOF propagation, set the relevant flags of the client */
|
||||
c->flag.module_prevent_aof_prop = 1;
|
||||
@ -7679,7 +7692,7 @@ void VM_LatencyAddSample(const char *event, mstime_t latency) {
|
||||
|
||||
/* Returns 1 if the client already in the moduleUnblocked list, 0 otherwise. */
|
||||
int isModuleClientUnblocked(client *c) {
|
||||
ValkeyModuleBlockedClient *bc = c->bstate.module_blocked_handle;
|
||||
ValkeyModuleBlockedClient *bc = c->bstate->module_blocked_handle;
|
||||
|
||||
return bc->unblocked == 1;
|
||||
}
|
||||
@ -7697,7 +7710,7 @@ int isModuleClientUnblocked(client *c) {
|
||||
* The structure ValkeyModuleBlockedClient will be always deallocated when
|
||||
* running the list of clients blocked by a module that need to be unblocked. */
|
||||
void unblockClientFromModule(client *c) {
|
||||
ValkeyModuleBlockedClient *bc = c->bstate.module_blocked_handle;
|
||||
ValkeyModuleBlockedClient *bc = c->bstate->module_blocked_handle;
|
||||
|
||||
/* Call the disconnection callback if any. Note that
|
||||
* bc->disconnect_callback is set to NULL if the client gets disconnected
|
||||
@ -7765,9 +7778,10 @@ ValkeyModuleBlockedClient *moduleBlockClient(ValkeyModuleCtx *ctx,
|
||||
client *c = ctx->client;
|
||||
int islua = scriptIsRunning();
|
||||
int ismulti = server.in_exec;
|
||||
initClientBlockingState(c);
|
||||
|
||||
c->bstate.module_blocked_handle = zmalloc(sizeof(ValkeyModuleBlockedClient));
|
||||
ValkeyModuleBlockedClient *bc = c->bstate.module_blocked_handle;
|
||||
c->bstate->module_blocked_handle = zmalloc(sizeof(ValkeyModuleBlockedClient));
|
||||
ValkeyModuleBlockedClient *bc = c->bstate->module_blocked_handle;
|
||||
ctx->module->blocked_clients++;
|
||||
|
||||
/* We need to handle the invalid operation of calling modules blocking
|
||||
@ -7795,7 +7809,7 @@ ValkeyModuleBlockedClient *moduleBlockClient(ValkeyModuleCtx *ctx,
|
||||
if (timeout_ms) {
|
||||
mstime_t now = mstime();
|
||||
if (timeout_ms > LLONG_MAX - now) {
|
||||
c->bstate.module_blocked_handle = NULL;
|
||||
c->bstate->module_blocked_handle = NULL;
|
||||
addReplyError(c, "timeout is out of range"); /* 'timeout_ms+now' would overflow */
|
||||
return bc;
|
||||
}
|
||||
@ -7803,20 +7817,20 @@ ValkeyModuleBlockedClient *moduleBlockClient(ValkeyModuleCtx *ctx,
|
||||
}
|
||||
|
||||
if (islua || ismulti) {
|
||||
c->bstate.module_blocked_handle = NULL;
|
||||
c->bstate->module_blocked_handle = NULL;
|
||||
addReplyError(c, islua ? "Blocking module command called from Lua script"
|
||||
: "Blocking module command called from transaction");
|
||||
} else if (ctx->flags & VALKEYMODULE_CTX_BLOCKED_REPLY) {
|
||||
c->bstate.module_blocked_handle = NULL;
|
||||
c->bstate->module_blocked_handle = NULL;
|
||||
addReplyError(c, "Blocking module command called from a Reply callback context");
|
||||
} else if (!auth_reply_callback && clientHasModuleAuthInProgress(c)) {
|
||||
c->bstate.module_blocked_handle = NULL;
|
||||
c->bstate->module_blocked_handle = NULL;
|
||||
addReplyError(c, "Clients undergoing module based authentication can only be blocked on auth");
|
||||
} else {
|
||||
if (keys) {
|
||||
blockForKeys(c, BLOCKED_MODULE, keys, numkeys, timeout, flags & VALKEYMODULE_BLOCK_UNBLOCK_DELETED);
|
||||
} else {
|
||||
c->bstate.timeout = timeout;
|
||||
c->bstate->timeout = timeout;
|
||||
blockClient(c, BLOCKED_MODULE);
|
||||
}
|
||||
}
|
||||
@ -7912,7 +7926,7 @@ void moduleUnregisterAuthCBs(ValkeyModule *module) {
|
||||
/* Search for & attempt next module auth callback after skipping the ones already attempted.
|
||||
* Returns the result of the module auth callback. */
|
||||
int attemptNextAuthCb(client *c, robj *username, robj *password, robj **err) {
|
||||
int handle_next_callback = c->module_auth_ctx == NULL;
|
||||
int handle_next_callback = (!c->module_data || c->module_data->module_auth_ctx == NULL);
|
||||
ValkeyModuleAuthCtx *cur_auth_ctx = NULL;
|
||||
listNode *ln;
|
||||
listIter li;
|
||||
@ -7922,7 +7936,7 @@ int attemptNextAuthCb(client *c, robj *username, robj *password, robj **err) {
|
||||
cur_auth_ctx = listNodeValue(ln);
|
||||
/* Skip over the previously attempted auth contexts. */
|
||||
if (!handle_next_callback) {
|
||||
handle_next_callback = cur_auth_ctx == c->module_auth_ctx;
|
||||
handle_next_callback = cur_auth_ctx == c->module_data->module_auth_ctx;
|
||||
continue;
|
||||
}
|
||||
/* Remove the module auth complete flag before we attempt the next cb. */
|
||||
@ -7931,7 +7945,8 @@ int attemptNextAuthCb(client *c, robj *username, robj *password, robj **err) {
|
||||
moduleCreateContext(&ctx, cur_auth_ctx->module, VALKEYMODULE_CTX_NONE);
|
||||
ctx.client = c;
|
||||
*err = NULL;
|
||||
c->module_auth_ctx = cur_auth_ctx;
|
||||
initClientModuleData(c);
|
||||
c->module_data->module_auth_ctx = cur_auth_ctx;
|
||||
result = cur_auth_ctx->auth_cb(&ctx, username, password, err);
|
||||
moduleFreeContext(&ctx);
|
||||
if (result == VALKEYMODULE_AUTH_HANDLED) break;
|
||||
@ -7947,8 +7962,8 @@ int attemptNextAuthCb(client *c, robj *username, robj *password, robj **err) {
|
||||
* return the result of the reply callback. */
|
||||
int attemptBlockedAuthReplyCallback(client *c, robj *username, robj *password, robj **err) {
|
||||
int result = VALKEYMODULE_AUTH_NOT_HANDLED;
|
||||
if (!c->module_blocked_client) return result;
|
||||
ValkeyModuleBlockedClient *bc = (ValkeyModuleBlockedClient *)c->module_blocked_client;
|
||||
if (!c->module_data || !c->module_data->module_blocked_client) return result;
|
||||
ValkeyModuleBlockedClient *bc = (ValkeyModuleBlockedClient *)c->module_data->module_blocked_client;
|
||||
bc->client = c;
|
||||
if (bc->auth_reply_cb) {
|
||||
ValkeyModuleCtx ctx;
|
||||
@ -7961,7 +7976,7 @@ int attemptBlockedAuthReplyCallback(client *c, robj *username, robj *password, r
|
||||
moduleFreeContext(&ctx);
|
||||
}
|
||||
moduleInvokeFreePrivDataCallback(c, bc);
|
||||
c->module_blocked_client = NULL;
|
||||
c->module_data->module_blocked_client = NULL;
|
||||
c->lastcmd->microseconds += bc->background_duration;
|
||||
bc->module->blocked_clients--;
|
||||
zfree(bc);
|
||||
@ -7989,7 +8004,7 @@ int checkModuleAuthentication(client *c, robj *username, robj *password, robj **
|
||||
serverAssert(result == VALKEYMODULE_AUTH_HANDLED);
|
||||
return AUTH_BLOCKED;
|
||||
}
|
||||
c->module_auth_ctx = NULL;
|
||||
if (c->module_data) c->module_data->module_auth_ctx = NULL;
|
||||
if (result == VALKEYMODULE_AUTH_NOT_HANDLED) {
|
||||
c->flag.module_auth_has_result = 0;
|
||||
return AUTH_NOT_HANDLED;
|
||||
@ -8011,7 +8026,7 @@ int checkModuleAuthentication(client *c, robj *username, robj *password, robj **
|
||||
* This function returns 1 if client was served (and should be unblocked) */
|
||||
int moduleTryServeClientBlockedOnKey(client *c, robj *key) {
|
||||
int served = 0;
|
||||
ValkeyModuleBlockedClient *bc = c->bstate.module_blocked_handle;
|
||||
ValkeyModuleBlockedClient *bc = c->bstate->module_blocked_handle;
|
||||
|
||||
/* Protect against re-processing: don't serve clients that are already
|
||||
* in the unblocking list for any reason (including VM_UnblockClient()
|
||||
@ -8223,14 +8238,14 @@ int moduleUnblockClientByHandle(ValkeyModuleBlockedClient *bc, void *privdata) {
|
||||
/* This API is used by the server core to unblock a client that was blocked
|
||||
* by a module. */
|
||||
void moduleUnblockClient(client *c) {
|
||||
ValkeyModuleBlockedClient *bc = c->bstate.module_blocked_handle;
|
||||
ValkeyModuleBlockedClient *bc = c->bstate->module_blocked_handle;
|
||||
moduleUnblockClientByHandle(bc, NULL);
|
||||
}
|
||||
|
||||
/* Return true if the client 'c' was blocked by a module using
|
||||
* VM_BlockClientOnKeys(). */
|
||||
int moduleClientIsBlockedOnKeys(client *c) {
|
||||
ValkeyModuleBlockedClient *bc = c->bstate.module_blocked_handle;
|
||||
ValkeyModuleBlockedClient *bc = c->bstate->module_blocked_handle;
|
||||
return bc->blocked_on_keys;
|
||||
}
|
||||
|
||||
@ -8340,7 +8355,7 @@ void moduleHandleBlockedClients(void) {
|
||||
/* Hold onto the blocked client if module auth is in progress. The reply callback is invoked
|
||||
* when the client is reprocessed. */
|
||||
if (c && clientHasModuleAuthInProgress(c)) {
|
||||
c->module_blocked_client = bc;
|
||||
c->module_data->module_blocked_client = bc;
|
||||
} else {
|
||||
/* Free privdata if any. */
|
||||
moduleInvokeFreePrivDataCallback(c, bc);
|
||||
@ -8402,9 +8417,9 @@ void moduleHandleBlockedClients(void) {
|
||||
* moduleBlockedClientTimedOut().
|
||||
*/
|
||||
int moduleBlockedClientMayTimeout(client *c) {
|
||||
if (c->bstate.btype != BLOCKED_MODULE) return 1;
|
||||
if (c->bstate->btype != BLOCKED_MODULE) return 1;
|
||||
|
||||
ValkeyModuleBlockedClient *bc = c->bstate.module_blocked_handle;
|
||||
ValkeyModuleBlockedClient *bc = c->bstate->module_blocked_handle;
|
||||
return (bc && bc->timeout_callback != NULL);
|
||||
}
|
||||
|
||||
@ -8420,7 +8435,7 @@ int moduleBlockedClientMayTimeout(client *c) {
|
||||
* of the client synchronously. This ensures that we can reply to the client before
|
||||
* resetClient() is called. */
|
||||
void moduleBlockedClientTimedOut(client *c, int from_module) {
|
||||
ValkeyModuleBlockedClient *bc = c->bstate.module_blocked_handle;
|
||||
ValkeyModuleBlockedClient *bc = c->bstate->module_blocked_handle;
|
||||
|
||||
/* Protect against re-processing: don't serve clients that are already
|
||||
* in the unblocking list for any reason (including VM_UnblockClient()
|
||||
@ -9559,16 +9574,16 @@ static void eventLoopHandleOneShotEvents(void) {
|
||||
* A client's user can be changed through the AUTH command, module
|
||||
* authentication, and when a client is freed. */
|
||||
void moduleNotifyUserChanged(client *c) {
|
||||
if (c->auth_callback) {
|
||||
c->auth_callback(c->id, c->auth_callback_privdata);
|
||||
if (!c->module_data || !c->module_data->auth_callback) return;
|
||||
|
||||
/* The callback will fire exactly once, even if the user remains
|
||||
* the same. It is expected to completely clean up the state
|
||||
* so all references are cleared here. */
|
||||
c->auth_callback = NULL;
|
||||
c->auth_callback_privdata = NULL;
|
||||
c->auth_module = NULL;
|
||||
}
|
||||
c->module_data->auth_callback(c->id, c->module_data->auth_callback_privdata);
|
||||
|
||||
/* The callback will fire exactly once, even if the user remains
|
||||
* the same. It is expected to completely clean up the state
|
||||
* so all references are cleared here. */
|
||||
c->module_data->auth_callback = NULL;
|
||||
c->module_data->auth_callback_privdata = NULL;
|
||||
c->module_data->auth_module = NULL;
|
||||
}
|
||||
|
||||
void revokeClientAuthentication(client *c) {
|
||||
@ -9599,9 +9614,9 @@ static void moduleFreeAuthenticatedClients(ValkeyModule *module) {
|
||||
listRewind(server.clients, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
client *c = listNodeValue(ln);
|
||||
if (!c->auth_module) continue;
|
||||
if (!c->module_data || !c->module_data->auth_module) continue;
|
||||
|
||||
ValkeyModule *auth_module = (ValkeyModule *)c->auth_module;
|
||||
ValkeyModule *auth_module = (ValkeyModule *)c->module_data->auth_module;
|
||||
if (auth_module == module) {
|
||||
revokeClientAuthentication(c);
|
||||
}
|
||||
@ -9909,9 +9924,10 @@ static int authenticateClientWithUser(ValkeyModuleCtx *ctx,
|
||||
}
|
||||
|
||||
if (callback) {
|
||||
ctx->client->auth_callback = callback;
|
||||
ctx->client->auth_callback_privdata = privdata;
|
||||
ctx->client->auth_module = ctx->module;
|
||||
initClientModuleData(ctx->client);
|
||||
ctx->client->module_data->auth_callback = callback;
|
||||
ctx->client->module_data->auth_callback_privdata = privdata;
|
||||
ctx->client->module_data->auth_module = ctx->module;
|
||||
}
|
||||
|
||||
if (client_id) {
|
||||
|
@ -228,5 +228,6 @@ int moduleLateDefrag(robj *key, robj *value, unsigned long *cursor, monotime end
|
||||
void moduleDefragGlobals(void);
|
||||
void *moduleGetHandleByName(char *modulename);
|
||||
int moduleIsModuleCommand(void *module_handle, struct serverCommand *cmd);
|
||||
void freeClientModuleData(client *c);
|
||||
|
||||
#endif /* _MODULE_H_ */
|
||||
|
113
src/multi.c
113
src/multi.c
@ -33,33 +33,42 @@
|
||||
|
||||
/* Client state initialization for MULTI/EXEC */
|
||||
void initClientMultiState(client *c) {
|
||||
c->mstate.commands = NULL;
|
||||
c->mstate.count = 0;
|
||||
c->mstate.cmd_flags = 0;
|
||||
c->mstate.cmd_inv_flags = 0;
|
||||
c->mstate.argv_len_sums = 0;
|
||||
c->mstate.alloc_count = 0;
|
||||
if (c->mstate) return;
|
||||
c->mstate = zcalloc(sizeof(multiState));
|
||||
}
|
||||
|
||||
/* Release all the resources associated with MULTI/EXEC state */
|
||||
void freeClientMultiState(client *c) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < c->mstate.count; j++) {
|
||||
void freeClientMultiStateCmds(client *c) {
|
||||
for (int j = 0; j < c->mstate->count; j++) {
|
||||
int i;
|
||||
multiCmd *mc = c->mstate.commands + j;
|
||||
multiCmd *mc = c->mstate->commands + j;
|
||||
|
||||
for (i = 0; i < mc->argc; i++) decrRefCount(mc->argv[i]);
|
||||
zfree(mc->argv);
|
||||
}
|
||||
zfree(c->mstate.commands);
|
||||
|
||||
zfree(c->mstate->commands);
|
||||
c->mstate->commands = NULL;
|
||||
}
|
||||
|
||||
/* Release all the resources associated with MULTI/EXEC state */
|
||||
void freeClientMultiState(client *c) {
|
||||
if (!c->mstate) return;
|
||||
|
||||
freeClientMultiStateCmds(c);
|
||||
unwatchAllKeys(c);
|
||||
zfree(c->mstate);
|
||||
c->mstate = NULL;
|
||||
}
|
||||
|
||||
void resetClientMultiState(client *c) {
|
||||
if (c->mstate.commands) {
|
||||
freeClientMultiState(c);
|
||||
initClientMultiState(c);
|
||||
}
|
||||
if (!c->mstate || !c->mstate->commands) return;
|
||||
|
||||
freeClientMultiStateCmds(c);
|
||||
c->mstate->count = 0;
|
||||
c->mstate->cmd_flags = 0;
|
||||
c->mstate->cmd_inv_flags = 0;
|
||||
c->mstate->argv_len_sums = 0;
|
||||
c->mstate->alloc_count = 0;
|
||||
}
|
||||
|
||||
/* Add a new command into the MULTI commands queue */
|
||||
@ -71,26 +80,27 @@ void queueMultiCommand(client *c, uint64_t cmd_flags) {
|
||||
* bother to read previous responses and didn't notice the multi was already
|
||||
* aborted. */
|
||||
if (c->flag.dirty_cas || c->flag.dirty_exec) return;
|
||||
if (c->mstate.count == 0) {
|
||||
if (!c->mstate) initClientMultiState(c);
|
||||
if (c->mstate->count == 0) {
|
||||
/* If a client is using multi/exec, assuming it is used to execute at least
|
||||
* two commands. Hence, creating by default size of 2. */
|
||||
c->mstate.commands = zmalloc(sizeof(multiCmd) * 2);
|
||||
c->mstate.alloc_count = 2;
|
||||
c->mstate->commands = zmalloc(sizeof(multiCmd) * 2);
|
||||
c->mstate->alloc_count = 2;
|
||||
}
|
||||
if (c->mstate.count == c->mstate.alloc_count) {
|
||||
c->mstate.alloc_count = c->mstate.alloc_count < INT_MAX / 2 ? c->mstate.alloc_count * 2 : INT_MAX;
|
||||
c->mstate.commands = zrealloc(c->mstate.commands, sizeof(multiCmd) * (c->mstate.alloc_count));
|
||||
if (c->mstate->count == c->mstate->alloc_count) {
|
||||
c->mstate->alloc_count = c->mstate->alloc_count < INT_MAX / 2 ? c->mstate->alloc_count * 2 : INT_MAX;
|
||||
c->mstate->commands = zrealloc(c->mstate->commands, sizeof(multiCmd) * (c->mstate->alloc_count));
|
||||
}
|
||||
mc = c->mstate.commands + c->mstate.count;
|
||||
mc = c->mstate->commands + c->mstate->count;
|
||||
mc->cmd = c->cmd;
|
||||
mc->argc = c->argc;
|
||||
mc->argv = c->argv;
|
||||
mc->argv_len = c->argv_len;
|
||||
|
||||
c->mstate.count++;
|
||||
c->mstate.cmd_flags |= cmd_flags;
|
||||
c->mstate.cmd_inv_flags |= ~cmd_flags;
|
||||
c->mstate.argv_len_sums += c->argv_len_sum + sizeof(robj *) * c->argc;
|
||||
c->mstate->count++;
|
||||
c->mstate->cmd_flags |= cmd_flags;
|
||||
c->mstate->cmd_inv_flags |= ~cmd_flags;
|
||||
c->mstate->argv_len_sums += c->argv_len_sum + sizeof(robj *) * c->argc;
|
||||
|
||||
/* Reset the client's args since we copied them into the mstate and shouldn't
|
||||
* reference them from c anymore. */
|
||||
@ -118,6 +128,7 @@ void flagTransaction(client *c) {
|
||||
}
|
||||
|
||||
void multiCommand(client *c) {
|
||||
if (!c->mstate) initClientMultiState(c);
|
||||
c->flag.multi = 1;
|
||||
addReply(c, shared.ok);
|
||||
}
|
||||
@ -195,12 +206,12 @@ void execCommand(client *c) {
|
||||
orig_argv_len = c->argv_len;
|
||||
orig_argc = c->argc;
|
||||
orig_cmd = c->cmd;
|
||||
addReplyArrayLen(c, c->mstate.count);
|
||||
for (j = 0; j < c->mstate.count; j++) {
|
||||
c->argc = c->mstate.commands[j].argc;
|
||||
c->argv = c->mstate.commands[j].argv;
|
||||
c->argv_len = c->mstate.commands[j].argv_len;
|
||||
c->cmd = c->realcmd = c->mstate.commands[j].cmd;
|
||||
addReplyArrayLen(c, c->mstate->count);
|
||||
for (j = 0; j < c->mstate->count; j++) {
|
||||
c->argc = c->mstate->commands[j].argc;
|
||||
c->argv = c->mstate->commands[j].argv;
|
||||
c->argv_len = c->mstate->commands[j].argv_len;
|
||||
c->cmd = c->realcmd = c->mstate->commands[j].cmd;
|
||||
|
||||
/* ACL permissions are also checked at the time of execution in case
|
||||
* they were changed after the commands were queued. */
|
||||
@ -234,10 +245,10 @@ void execCommand(client *c) {
|
||||
}
|
||||
|
||||
/* Commands may alter argc/argv, restore mstate. */
|
||||
c->mstate.commands[j].argc = c->argc;
|
||||
c->mstate.commands[j].argv = c->argv;
|
||||
c->mstate.commands[j].argv_len = c->argv_len;
|
||||
c->mstate.commands[j].cmd = c->cmd;
|
||||
c->mstate->commands[j].argc = c->argc;
|
||||
c->mstate->commands[j].argv = c->argv;
|
||||
c->mstate->commands[j].argv_len = c->argv_len;
|
||||
c->mstate->commands[j].cmd = c->cmd;
|
||||
|
||||
/* The original argv has already been processed for slowlog and monitor,
|
||||
* so we can safely free it before proceeding to the next command. */
|
||||
@ -304,10 +315,10 @@ void watchForKey(client *c, robj *key) {
|
||||
listNode *ln;
|
||||
watchedKey *wk;
|
||||
|
||||
if (listLength(c->watched_keys) == 0) server.watching_clients++;
|
||||
if (listLength(&c->mstate->watched_keys) == 0) server.watching_clients++;
|
||||
|
||||
/* Check if we are already watching for this key */
|
||||
listRewind(c->watched_keys, &li);
|
||||
listRewind(&c->mstate->watched_keys, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
wk = listNodeValue(ln);
|
||||
if (wk->db == c->db && equalStringObjects(key, wk->key)) return; /* Key already watched */
|
||||
@ -326,7 +337,7 @@ void watchForKey(client *c, robj *key) {
|
||||
wk->db = c->db;
|
||||
wk->expired = keyIsExpired(c->db, key);
|
||||
incrRefCount(key);
|
||||
listAddNodeTail(c->watched_keys, wk);
|
||||
listAddNodeTail(&c->mstate->watched_keys, wk);
|
||||
watchedKeyLinkToClients(clients, wk);
|
||||
}
|
||||
|
||||
@ -336,8 +347,8 @@ void unwatchAllKeys(client *c) {
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
|
||||
if (listLength(c->watched_keys) == 0) return;
|
||||
listRewind(c->watched_keys, &li);
|
||||
if (!c->mstate || listLength(&c->mstate->watched_keys) == 0) return;
|
||||
listRewind(&c->mstate->watched_keys, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
list *clients;
|
||||
watchedKey *wk;
|
||||
@ -350,7 +361,7 @@ void unwatchAllKeys(client *c) {
|
||||
/* Kill the entry at all if this was the only client */
|
||||
if (listLength(clients) == 0) dictDelete(wk->db->watched_keys, wk->key);
|
||||
/* Remove this watched key from the client->watched list */
|
||||
listDelNode(c->watched_keys, ln);
|
||||
listDelNode(&c->mstate->watched_keys, ln);
|
||||
decrRefCount(wk->key);
|
||||
zfree(wk);
|
||||
}
|
||||
@ -363,8 +374,8 @@ int isWatchedKeyExpired(client *c) {
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
watchedKey *wk;
|
||||
if (listLength(c->watched_keys) == 0) return 0;
|
||||
listRewind(c->watched_keys, &li);
|
||||
if (!c->mstate || listLength(&c->mstate->watched_keys) == 0) return 0;
|
||||
listRewind(&c->mstate->watched_keys, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
wk = listNodeValue(ln);
|
||||
if (wk->expired) continue; /* was expired when WATCH was called */
|
||||
@ -474,6 +485,9 @@ void watchCommand(client *c) {
|
||||
addReply(c, shared.ok);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!c->mstate) initClientMultiState(c);
|
||||
|
||||
for (j = 1; j < c->argc; j++) watchForKey(c, c->argv[j]);
|
||||
addReply(c, shared.ok);
|
||||
}
|
||||
@ -485,11 +499,12 @@ void unwatchCommand(client *c) {
|
||||
}
|
||||
|
||||
size_t multiStateMemOverhead(client *c) {
|
||||
size_t mem = c->mstate.argv_len_sums;
|
||||
if (!c->mstate) return 0;
|
||||
size_t mem = c->mstate->argv_len_sums;
|
||||
/* Add watched keys overhead, Note: this doesn't take into account the watched keys themselves, because they aren't
|
||||
* managed per-client. */
|
||||
mem += listLength(c->watched_keys) * (sizeof(listNode) + sizeof(watchedKey));
|
||||
mem += listLength(&c->mstate->watched_keys) * (sizeof(listNode) + sizeof(c->mstate->watched_keys));
|
||||
/* Reserved memory for queued multi commands. */
|
||||
mem += c->mstate.alloc_count * sizeof(multiCmd);
|
||||
mem += c->mstate->alloc_count * sizeof(multiCmd);
|
||||
return mem;
|
||||
}
|
||||
|
236
src/networking.c
236
src/networking.c
@ -119,7 +119,7 @@ int authRequired(client *c) {
|
||||
}
|
||||
|
||||
static inline int isReplicaReadyForReplData(client *replica) {
|
||||
return (replica->repl_state == REPLICA_STATE_ONLINE || replica->repl_state == REPLICA_STATE_BG_RDB_LOAD) &&
|
||||
return (replica->repl_data->repl_state == REPLICA_STATE_ONLINE || replica->repl_data->repl_state == REPLICA_STATE_BG_RDB_LOAD) &&
|
||||
!(replica->flag.close_asap);
|
||||
}
|
||||
|
||||
@ -154,8 +154,6 @@ client *createClient(connection *conn) {
|
||||
c->bufpos = 0;
|
||||
c->buf_peak = c->buf_usable_size;
|
||||
c->buf_peak_last_reset_time = server.unixtime;
|
||||
c->ref_repl_buf_node = NULL;
|
||||
c->ref_block_pos = 0;
|
||||
c->qb_pos = 0;
|
||||
c->querybuf = NULL;
|
||||
c->querybuf_peak = 0;
|
||||
@ -180,55 +178,31 @@ client *createClient(connection *conn) {
|
||||
c->ctime = c->last_interaction = server.unixtime;
|
||||
c->duration = 0;
|
||||
clientSetDefaultAuth(c);
|
||||
c->repl_state = REPL_STATE_NONE;
|
||||
c->repl_start_cmd_stream_on_ack = 0;
|
||||
c->reploff = 0;
|
||||
c->read_reploff = 0;
|
||||
c->repl_applied = 0;
|
||||
c->repl_ack_off = 0;
|
||||
c->repl_ack_time = 0;
|
||||
c->repl_aof_off = 0;
|
||||
c->repl_last_partial_write = 0;
|
||||
c->replica_listening_port = 0;
|
||||
c->replica_addr = NULL;
|
||||
c->replica_version = 0;
|
||||
c->replica_capa = REPLICA_CAPA_NONE;
|
||||
c->replica_req = REPLICA_REQ_NONE;
|
||||
c->associated_rdb_client_id = 0;
|
||||
c->rdb_client_disconnect_time = 0;
|
||||
c->reply = listCreate();
|
||||
c->deferred_reply_errors = NULL;
|
||||
c->reply_bytes = 0;
|
||||
c->obuf_soft_limit_reached_time = 0;
|
||||
listSetFreeMethod(c->reply, freeClientReplyValue);
|
||||
listSetDupMethod(c->reply, dupClientReplyValue);
|
||||
initClientBlockingState(c);
|
||||
c->repl_data = NULL;
|
||||
c->bstate = NULL;
|
||||
c->pubsub_data = NULL;
|
||||
c->module_data = NULL;
|
||||
c->mstate = NULL;
|
||||
c->woff = 0;
|
||||
c->watched_keys = listCreate();
|
||||
c->pubsub_channels = dictCreate(&objectKeyPointerValueDictType);
|
||||
c->pubsub_patterns = dictCreate(&objectKeyPointerValueDictType);
|
||||
c->pubsubshard_channels = dictCreate(&objectKeyPointerValueDictType);
|
||||
c->peerid = NULL;
|
||||
c->sockname = NULL;
|
||||
c->client_list_node = NULL;
|
||||
c->io_read_state = CLIENT_IDLE;
|
||||
c->io_write_state = CLIENT_IDLE;
|
||||
c->nwritten = 0;
|
||||
c->client_tracking_redirection = 0;
|
||||
c->client_tracking_prefixes = NULL;
|
||||
c->last_memory_usage = 0;
|
||||
c->last_memory_type = CLIENT_TYPE_NORMAL;
|
||||
c->module_blocked_client = NULL;
|
||||
c->module_auth_ctx = NULL;
|
||||
c->auth_callback = NULL;
|
||||
c->auth_callback_privdata = NULL;
|
||||
c->auth_module = NULL;
|
||||
listInitNode(&c->clients_pending_write_node, c);
|
||||
listInitNode(&c->pending_read_list_node, c);
|
||||
c->mem_usage_bucket = NULL;
|
||||
c->mem_usage_bucket_node = NULL;
|
||||
if (conn) linkClient(c);
|
||||
initClientMultiState(c);
|
||||
c->net_input_bytes = 0;
|
||||
c->net_input_bytes_curr_cmd = 0;
|
||||
c->net_output_bytes = 0;
|
||||
@ -266,7 +240,9 @@ void putClientInPendingWriteQueue(client *c) {
|
||||
* if not already done and, for replicas, if the replica can actually receive
|
||||
* writes at this stage. */
|
||||
if (!c->flag.pending_write &&
|
||||
(c->repl_state == REPL_STATE_NONE || (isReplicaReadyForReplData(c) && !c->repl_start_cmd_stream_on_ack))) {
|
||||
(!c->repl_data ||
|
||||
c->repl_data->repl_state == REPL_STATE_NONE ||
|
||||
(isReplicaReadyForReplData(c) && !c->repl_data->repl_start_cmd_stream_on_ack))) {
|
||||
/* Here instead of installing the write handler, we just flag the
|
||||
* client and put it into a list of clients that have something
|
||||
* to write to the socket. This way before re-entering the event
|
||||
@ -1340,10 +1316,10 @@ void deferredAfterErrorReply(client *c, list *errors) {
|
||||
void copyReplicaOutputBuffer(client *dst, client *src) {
|
||||
serverAssert(src->bufpos == 0 && listLength(src->reply) == 0);
|
||||
|
||||
if (src->ref_repl_buf_node == NULL) return;
|
||||
dst->ref_repl_buf_node = src->ref_repl_buf_node;
|
||||
dst->ref_block_pos = src->ref_block_pos;
|
||||
((replBufBlock *)listNodeValue(dst->ref_repl_buf_node))->refcount++;
|
||||
if (src->repl_data->ref_repl_buf_node == NULL) return;
|
||||
dst->repl_data->ref_repl_buf_node = src->repl_data->ref_repl_buf_node;
|
||||
dst->repl_data->ref_block_pos = src->repl_data->ref_block_pos;
|
||||
((replBufBlock *)listNodeValue(dst->repl_data->ref_repl_buf_node))->refcount++;
|
||||
}
|
||||
|
||||
/* Return true if the specified client has pending reply buffers to write to
|
||||
@ -1353,13 +1329,13 @@ int clientHasPendingReplies(client *c) {
|
||||
/* Replicas use global shared replication buffer instead of
|
||||
* private output buffer. */
|
||||
serverAssert(c->bufpos == 0 && listLength(c->reply) == 0);
|
||||
if (c->ref_repl_buf_node == NULL) return 0;
|
||||
if (c->repl_data->ref_repl_buf_node == NULL) return 0;
|
||||
|
||||
/* If the last replication buffer block content is totally sent,
|
||||
* we have nothing to send. */
|
||||
listNode *ln = listLast(server.repl_buffer_blocks);
|
||||
replBufBlock *tail = listNodeValue(ln);
|
||||
if (ln == c->ref_repl_buf_node && c->ref_block_pos == tail->used) return 0;
|
||||
if (ln == c->repl_data->ref_repl_buf_node && c->repl_data->ref_block_pos == tail->used) return 0;
|
||||
|
||||
return 1;
|
||||
} else {
|
||||
@ -1526,23 +1502,6 @@ void disconnectReplicas(void) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if there is any other replica waiting dumping RDB finished expect me.
|
||||
* This function is useful to judge current dumping RDB can be used for full
|
||||
* synchronization or not. */
|
||||
int anyOtherReplicaWaitRdb(client *except_me) {
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
if (replica != except_me && replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_END) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Remove the specified client from global lists where the client could
|
||||
* be referenced, not including the Pub/Sub channels.
|
||||
* This is used by freeClient() and replicationCachePrimary(). */
|
||||
@ -1567,7 +1526,7 @@ void unlinkClient(client *c) {
|
||||
|
||||
/* Check if this is a replica waiting for diskless replication (rdb pipe),
|
||||
* in which case it needs to be cleaned from that list */
|
||||
if (c->flag.replica && c->repl_state == REPLICA_STATE_WAIT_BGSAVE_END && server.rdb_pipe_conns) {
|
||||
if (c->repl_data && c->flag.replica && c->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END && server.rdb_pipe_conns) {
|
||||
int i;
|
||||
int still_alive = 0;
|
||||
for (i = 0; i < server.rdb_pipe_numconns; i++) {
|
||||
@ -1653,11 +1612,7 @@ void clearClientConnectionState(client *c) {
|
||||
clientSetDefaultAuth(c);
|
||||
moduleNotifyUserChanged(c);
|
||||
discardTransaction(c);
|
||||
|
||||
pubsubUnsubscribeAllChannels(c, 0);
|
||||
pubsubUnsubscribeShardAllChannels(c, 0);
|
||||
pubsubUnsubscribeAllPatterns(c, 0);
|
||||
unmarkClientAsPubSub(c);
|
||||
freeClientPubSubData(c);
|
||||
|
||||
if (c->name) {
|
||||
decrRefCount(c->name);
|
||||
@ -1696,9 +1651,7 @@ void freeClient(client *c) {
|
||||
|
||||
/* Notify module system that this client auth status changed. */
|
||||
moduleNotifyUserChanged(c);
|
||||
|
||||
/* Free the RedisModuleBlockedClient held onto for reprocessing if not already freed. */
|
||||
zfree(c->module_blocked_client);
|
||||
freeClientModuleData(c);
|
||||
|
||||
/* If this client was scheduled for async freeing we need to remove it
|
||||
* from the queue. Note that we need to do this here, because later
|
||||
@ -1745,31 +1698,16 @@ void freeClient(client *c) {
|
||||
/* If there is any in-flight command, we don't record their duration. */
|
||||
c->duration = 0;
|
||||
if (c->flag.blocked) unblockClient(c, 1);
|
||||
dictRelease(c->bstate.keys);
|
||||
|
||||
/* UNWATCH all the keys */
|
||||
unwatchAllKeys(c);
|
||||
listRelease(c->watched_keys);
|
||||
c->watched_keys = NULL;
|
||||
|
||||
/* Unsubscribe from all the pubsub channels */
|
||||
pubsubUnsubscribeAllChannels(c, 0);
|
||||
pubsubUnsubscribeShardAllChannels(c, 0);
|
||||
pubsubUnsubscribeAllPatterns(c, 0);
|
||||
unmarkClientAsPubSub(c);
|
||||
dictRelease(c->pubsub_channels);
|
||||
c->pubsub_channels = NULL;
|
||||
dictRelease(c->pubsub_patterns);
|
||||
c->pubsub_patterns = NULL;
|
||||
dictRelease(c->pubsubshard_channels);
|
||||
c->pubsubshard_channels = NULL;
|
||||
freeClientBlockingState(c);
|
||||
freeClientPubSubData(c);
|
||||
|
||||
/* Free data structures. */
|
||||
listRelease(c->reply);
|
||||
c->reply = NULL;
|
||||
zfree_with_size(c->buf, c->buf_usable_size);
|
||||
c->buf = NULL;
|
||||
freeReplicaReferencedReplBuffer(c);
|
||||
|
||||
freeClientArgv(c);
|
||||
freeClientOriginalArgv(c);
|
||||
if (c->deferred_reply_errors) listRelease(c->deferred_reply_errors);
|
||||
@ -1787,45 +1725,7 @@ void freeClient(client *c) {
|
||||
* places where active clients may be referenced. */
|
||||
unlinkClient(c);
|
||||
|
||||
/* Primary/replica cleanup Case 1:
|
||||
* we lost the connection with a replica. */
|
||||
if (c->flag.replica) {
|
||||
/* If there is no any other replica waiting dumping RDB finished, the
|
||||
* current child process need not continue to dump RDB, then we kill it.
|
||||
* So child process won't use more memory, and we also can fork a new
|
||||
* child process asap to dump rdb for next full synchronization or bgsave.
|
||||
* But we also need to check if users enable 'save' RDB, if enable, we
|
||||
* should not remove directly since that means RDB is important for users
|
||||
* to keep data safe and we may delay configured 'save' for full sync. */
|
||||
if (server.saveparamslen == 0 && c->repl_state == REPLICA_STATE_WAIT_BGSAVE_END &&
|
||||
server.child_type == CHILD_TYPE_RDB && server.rdb_child_type == RDB_CHILD_TYPE_DISK &&
|
||||
anyOtherReplicaWaitRdb(c) == 0) {
|
||||
serverLog(LL_NOTICE, "Background saving, persistence disabled, last replica dropped, killing fork child.");
|
||||
killRDBChild();
|
||||
}
|
||||
if (c->repl_state == REPLICA_STATE_SEND_BULK) {
|
||||
if (c->repldbfd != -1) close(c->repldbfd);
|
||||
if (c->replpreamble) sdsfree(c->replpreamble);
|
||||
}
|
||||
list *l = (c->flag.monitor) ? server.monitors : server.replicas;
|
||||
ln = listSearchKey(l, c);
|
||||
serverAssert(ln != NULL);
|
||||
listDelNode(l, ln);
|
||||
/* We need to remember the time when we started to have zero
|
||||
* attached replicas, as after some time we'll free the replication
|
||||
* backlog. */
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA && listLength(server.replicas) == 0)
|
||||
server.repl_no_replicas_since = server.unixtime;
|
||||
refreshGoodReplicasCount();
|
||||
/* Fire the replica change modules event. */
|
||||
if (c->repl_state == REPLICA_STATE_ONLINE)
|
||||
moduleFireServerEvent(VALKEYMODULE_EVENT_REPLICA_CHANGE, VALKEYMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE,
|
||||
NULL);
|
||||
}
|
||||
|
||||
/* Primary/replica cleanup Case 2:
|
||||
* we lost the connection with the primary. */
|
||||
if (c->flag.primary) replicationHandlePrimaryDisconnection();
|
||||
freeClientReplicationData(c);
|
||||
|
||||
/* Remove client from memory usage buckets */
|
||||
if (c->mem_usage_bucket) {
|
||||
@ -1841,7 +1741,6 @@ void freeClient(client *c) {
|
||||
freeClientMultiState(c);
|
||||
sdsfree(c->peerid);
|
||||
sdsfree(c->sockname);
|
||||
sdsfree(c->replica_addr);
|
||||
zfree(c);
|
||||
}
|
||||
|
||||
@ -1932,10 +1831,10 @@ void beforeNextClient(client *c) {
|
||||
* In these scenarios, qb_pos points to the part of the current command
|
||||
* or the beginning of next command, and the current command is not applied yet,
|
||||
* so the repl_applied is not equal to qb_pos. */
|
||||
if (c->repl_applied) {
|
||||
sdsrange(c->querybuf, c->repl_applied, -1);
|
||||
c->qb_pos -= c->repl_applied;
|
||||
c->repl_applied = 0;
|
||||
if (c->repl_data->repl_applied) {
|
||||
sdsrange(c->querybuf, c->repl_data->repl_applied, -1);
|
||||
c->qb_pos -= c->repl_data->repl_applied;
|
||||
c->repl_data->repl_applied = 0;
|
||||
}
|
||||
} else {
|
||||
trimClientQueryBuffer(c);
|
||||
@ -1974,18 +1873,18 @@ int freeClientsInAsyncFreeQueue(void) {
|
||||
* The primary gives a grace period before freeing this client because
|
||||
* it serves as a reference to the first required replication data block for
|
||||
* this replica */
|
||||
if (!c->rdb_client_disconnect_time) {
|
||||
if (!c->repl_data->rdb_client_disconnect_time) {
|
||||
if (c->conn) connSetReadHandler(c->conn, NULL);
|
||||
c->rdb_client_disconnect_time = server.unixtime;
|
||||
c->repl_data->rdb_client_disconnect_time = server.unixtime;
|
||||
dualChannelServerLog(LL_VERBOSE, "Postpone RDB client id=%llu (%s) free for %d seconds",
|
||||
(unsigned long long)c->id, replicationGetReplicaName(c), server.wait_before_rdb_client_free);
|
||||
}
|
||||
if (server.unixtime - c->rdb_client_disconnect_time <= server.wait_before_rdb_client_free) continue;
|
||||
if (server.unixtime - c->repl_data->rdb_client_disconnect_time <= server.wait_before_rdb_client_free) continue;
|
||||
dualChannelServerLog(
|
||||
LL_NOTICE,
|
||||
"Replica main channel failed to establish PSYNC within the grace period (%ld seconds). "
|
||||
"Freeing RDB client %llu.",
|
||||
(long int)(server.unixtime - c->rdb_client_disconnect_time), (unsigned long long)c->id);
|
||||
(long int)(server.unixtime - c->repl_data->rdb_client_disconnect_time), (unsigned long long)c->id);
|
||||
c->flag.protected_rdb_channel = 0;
|
||||
}
|
||||
|
||||
@ -2015,27 +1914,27 @@ void writeToReplica(client *c) {
|
||||
int nwritten = 0;
|
||||
serverAssert(c->bufpos == 0 && listLength(c->reply) == 0);
|
||||
while (clientHasPendingReplies(c)) {
|
||||
replBufBlock *o = listNodeValue(c->ref_repl_buf_node);
|
||||
serverAssert(o->used >= c->ref_block_pos);
|
||||
replBufBlock *o = listNodeValue(c->repl_data->ref_repl_buf_node);
|
||||
serverAssert(o->used >= c->repl_data->ref_block_pos);
|
||||
|
||||
/* Send current block if it is not fully sent. */
|
||||
if (o->used > c->ref_block_pos) {
|
||||
nwritten = connWrite(c->conn, o->buf + c->ref_block_pos, o->used - c->ref_block_pos);
|
||||
if (o->used > c->repl_data->ref_block_pos) {
|
||||
nwritten = connWrite(c->conn, o->buf + c->repl_data->ref_block_pos, o->used - c->repl_data->ref_block_pos);
|
||||
if (nwritten <= 0) {
|
||||
c->write_flags |= WRITE_FLAGS_WRITE_ERROR;
|
||||
return;
|
||||
}
|
||||
c->nwritten += nwritten;
|
||||
c->ref_block_pos += nwritten;
|
||||
c->repl_data->ref_block_pos += nwritten;
|
||||
}
|
||||
|
||||
/* If we fully sent the object on head, go to the next one. */
|
||||
listNode *next = listNextNode(c->ref_repl_buf_node);
|
||||
if (next && c->ref_block_pos == o->used) {
|
||||
listNode *next = listNextNode(c->repl_data->ref_repl_buf_node);
|
||||
if (next && c->repl_data->ref_block_pos == o->used) {
|
||||
o->refcount--;
|
||||
((replBufBlock *)(listNodeValue(next)))->refcount++;
|
||||
c->ref_repl_buf_node = next;
|
||||
c->ref_block_pos = 0;
|
||||
c->repl_data->ref_repl_buf_node = next;
|
||||
c->repl_data->ref_block_pos = 0;
|
||||
incrementalTrimReplicationBacklog(REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
|
||||
}
|
||||
}
|
||||
@ -2338,7 +2237,7 @@ int handleReadResult(client *c) {
|
||||
c->last_interaction = server.unixtime;
|
||||
c->net_input_bytes += c->nread;
|
||||
if (c->flag.primary) {
|
||||
c->read_reploff += c->nread;
|
||||
c->repl_data->read_reploff += c->nread;
|
||||
server.stat_net_repl_input_bytes += c->nread;
|
||||
} else {
|
||||
server.stat_net_input_bytes += c->nread;
|
||||
@ -2409,7 +2308,7 @@ parseResult handleParseResults(client *c) {
|
||||
}
|
||||
|
||||
if (c->read_flags & READ_FLAGS_INLINE_ZERO_QUERY_LEN && getClientType(c) == CLIENT_TYPE_REPLICA) {
|
||||
c->repl_ack_time = server.unixtime;
|
||||
c->repl_data->repl_ack_time = server.unixtime;
|
||||
}
|
||||
|
||||
if (c->read_flags & READ_FLAGS_INLINE_ZERO_QUERY_LEN) {
|
||||
@ -2993,10 +2892,12 @@ void commandProcessed(client *c) {
|
||||
clusterSlotStatsAddNetworkBytesInForUserClient(c);
|
||||
resetClient(c);
|
||||
|
||||
long long prev_offset = c->reploff;
|
||||
if (!c->repl_data) return;
|
||||
|
||||
long long prev_offset = c->repl_data->reploff;
|
||||
if (c->flag.primary && !c->flag.multi) {
|
||||
/* Update the applied replication offset of our primary. */
|
||||
c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos;
|
||||
c->repl_data->reploff = c->repl_data->read_reploff - sdslen(c->querybuf) + c->qb_pos;
|
||||
}
|
||||
|
||||
/* If the client is a primary we need to compute the difference
|
||||
@ -3006,10 +2907,10 @@ void commandProcessed(client *c) {
|
||||
* part of the replication stream, will be propagated to the
|
||||
* sub-replicas and to the replication backlog. */
|
||||
if (c->flag.primary) {
|
||||
long long applied = c->reploff - prev_offset;
|
||||
long long applied = c->repl_data->reploff - prev_offset;
|
||||
if (applied) {
|
||||
replicationFeedStreamFromPrimaryStream(c->querybuf + c->repl_applied, applied);
|
||||
c->repl_applied += applied;
|
||||
replicationFeedStreamFromPrimaryStream(c->querybuf + c->repl_data->repl_applied, applied);
|
||||
c->repl_data->repl_applied += applied;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3241,7 +3142,7 @@ void readToQueryBuf(client *c) {
|
||||
* so they are also considered a part of the query buffer in a broader sense.
|
||||
*
|
||||
* For unauthenticated clients, the query buffer cannot exceed 1MB at most. */
|
||||
size_t qb_memory = sdslen(c->querybuf) + c->mstate.argv_len_sums;
|
||||
size_t qb_memory = sdslen(c->querybuf) + (c->mstate ? c->mstate->argv_len_sums : 0);
|
||||
if (qb_memory > server.client_max_querybuf_len ||
|
||||
(qb_memory > 1024 * 1024 && (c->read_flags & READ_FLAGS_AUTH_REQUIRED))) {
|
||||
c->read_flags |= READ_FLAGS_QB_LIMIT_REACHED;
|
||||
@ -3369,9 +3270,9 @@ sds catClientInfoString(sds s, client *client, int hide_user_data) {
|
||||
size_t obufmem, total_mem = getClientMemoryUsage(client, &obufmem);
|
||||
|
||||
size_t used_blocks_of_repl_buf = 0;
|
||||
if (client->ref_repl_buf_node) {
|
||||
if (client->repl_data && client->repl_data->ref_repl_buf_node) {
|
||||
replBufBlock *last = listNodeValue(listLast(server.repl_buffer_blocks));
|
||||
replBufBlock *cur = listNodeValue(client->ref_repl_buf_node);
|
||||
replBufBlock *cur = listNodeValue(client->repl_data->ref_repl_buf_node);
|
||||
used_blocks_of_repl_buf = last->id - cur->id + 1;
|
||||
}
|
||||
sds ret = sdscatfmt(
|
||||
@ -3386,15 +3287,15 @@ sds catClientInfoString(sds s, client *client, int hide_user_data) {
|
||||
" idle=%I", (long long)(server.unixtime - client->last_interaction),
|
||||
" flags=%s", flags,
|
||||
" db=%i", client->db->id,
|
||||
" sub=%i", (int)dictSize(client->pubsub_channels),
|
||||
" psub=%i", (int)dictSize(client->pubsub_patterns),
|
||||
" ssub=%i", (int)dictSize(client->pubsubshard_channels),
|
||||
" multi=%i", (client->flag.multi) ? client->mstate.count : -1,
|
||||
" watch=%i", (int)listLength(client->watched_keys),
|
||||
" sub=%i", client->pubsub_data ? (int)dictSize(client->pubsub_data->pubsub_channels) : 0,
|
||||
" psub=%i", client->pubsub_data ? (int)dictSize(client->pubsub_data->pubsub_patterns) : 0,
|
||||
" ssub=%i", client->pubsub_data ? (int)dictSize(client->pubsub_data->pubsubshard_channels) : 0,
|
||||
" multi=%i", client->mstate ? client->mstate->count : -1,
|
||||
" watch=%i", client->mstate ? (int)listLength(&client->mstate->watched_keys) : 0,
|
||||
" qbuf=%U", client->querybuf ? (unsigned long long)sdslen(client->querybuf) : 0,
|
||||
" qbuf-free=%U", client->querybuf ? (unsigned long long)sdsavail(client->querybuf) : 0,
|
||||
" argv-mem=%U", (unsigned long long)client->argv_len_sum,
|
||||
" multi-mem=%U", (unsigned long long)client->mstate.argv_len_sums,
|
||||
" multi-mem=%U", client->mstate ? (unsigned long long)client->mstate->argv_len_sums : 0,
|
||||
" rbs=%U", (unsigned long long)client->buf_usable_size,
|
||||
" rbp=%U", (unsigned long long)client->buf_peak,
|
||||
" obl=%U", (unsigned long long)client->bufpos,
|
||||
@ -3404,7 +3305,7 @@ sds catClientInfoString(sds s, client *client, int hide_user_data) {
|
||||
" events=%s", events,
|
||||
" cmd=%s", client->lastcmd ? client->lastcmd->fullname : "NULL",
|
||||
" user=%s", hide_user_data ? "*redacted*" : (client->user ? client->user->name : "(superuser)"),
|
||||
" redir=%I", (client->flag.tracking) ? (long long)client->client_tracking_redirection : -1,
|
||||
" redir=%I", (client->flag.tracking) ? (long long)client->pubsub_data->client_tracking_redirection : -1,
|
||||
" resp=%i", client->resp,
|
||||
" lib-name=%s", client->lib_name ? (char *)client->lib_name->ptr : "",
|
||||
" lib-ver=%s", client->lib_ver ? (char *)client->lib_ver->ptr : "",
|
||||
@ -3892,6 +3793,7 @@ void clientCommand(client *c) {
|
||||
struct ClientFlags options = {0};
|
||||
robj **prefix = NULL;
|
||||
size_t numprefix = 0;
|
||||
initClientPubSubData(c);
|
||||
|
||||
/* Parse the options. */
|
||||
for (int j = 3; j < c->argc; j++) {
|
||||
@ -4031,7 +3933,7 @@ void clientCommand(client *c) {
|
||||
} else if (!strcasecmp(c->argv[1]->ptr, "getredir") && c->argc == 2) {
|
||||
/* CLIENT GETREDIR */
|
||||
if (c->flag.tracking) {
|
||||
addReplyLongLong(c, c->client_tracking_redirection);
|
||||
addReplyLongLong(c, c->pubsub_data->client_tracking_redirection);
|
||||
} else {
|
||||
addReplyLongLong(c, -1);
|
||||
}
|
||||
@ -4077,17 +3979,17 @@ void clientCommand(client *c) {
|
||||
/* Redirect */
|
||||
addReplyBulkCString(c, "redirect");
|
||||
if (c->flag.tracking) {
|
||||
addReplyLongLong(c, c->client_tracking_redirection);
|
||||
addReplyLongLong(c, c->pubsub_data->client_tracking_redirection);
|
||||
} else {
|
||||
addReplyLongLong(c, -1);
|
||||
}
|
||||
|
||||
/* Prefixes */
|
||||
addReplyBulkCString(c, "prefixes");
|
||||
if (c->client_tracking_prefixes) {
|
||||
addReplyArrayLen(c, raxSize(c->client_tracking_prefixes));
|
||||
if (c->pubsub_data->client_tracking_prefixes) {
|
||||
addReplyArrayLen(c, raxSize(c->pubsub_data->client_tracking_prefixes));
|
||||
raxIterator ri;
|
||||
raxStart(&ri, c->client_tracking_prefixes);
|
||||
raxStart(&ri, c->pubsub_data->client_tracking_prefixes);
|
||||
raxSeek(&ri, "^", NULL, 0);
|
||||
while (raxNext(&ri)) {
|
||||
addReplyBulkCBuffer(c, ri.key, ri.key_len);
|
||||
@ -4410,9 +4312,9 @@ size_t getClientOutputBufferMemoryUsage(client *c) {
|
||||
size_t repl_buf_size = 0;
|
||||
size_t repl_node_num = 0;
|
||||
size_t repl_node_size = sizeof(listNode) + sizeof(replBufBlock);
|
||||
if (c->ref_repl_buf_node) {
|
||||
if (c->repl_data->ref_repl_buf_node) {
|
||||
replBufBlock *last = listNodeValue(listLast(server.repl_buffer_blocks));
|
||||
replBufBlock *cur = listNodeValue(c->ref_repl_buf_node);
|
||||
replBufBlock *cur = listNodeValue(c->repl_data->ref_repl_buf_node);
|
||||
repl_buf_size = last->repl_offset + last->size - cur->repl_offset;
|
||||
repl_node_num = last->id - cur->id + 1;
|
||||
}
|
||||
@ -4445,8 +4347,8 @@ size_t getClientMemoryUsage(client *c, size_t *output_buffer_mem_usage) {
|
||||
|
||||
/* Add memory overhead of the tracking prefixes, this is an underestimation so we don't need to traverse the entire
|
||||
* rax */
|
||||
if (c->client_tracking_prefixes)
|
||||
mem += c->client_tracking_prefixes->numnodes * (sizeof(raxNode) * sizeof(raxNode *));
|
||||
if (c->pubsub_data && c->pubsub_data->client_tracking_prefixes)
|
||||
mem += c->pubsub_data->client_tracking_prefixes->numnodes * (sizeof(raxNode) * sizeof(raxNode *));
|
||||
|
||||
return mem;
|
||||
}
|
||||
@ -4612,7 +4514,7 @@ void flushReplicasOutputBuffers(void) {
|
||||
* 3. Obviously if the replica is not ONLINE.
|
||||
*/
|
||||
if (isReplicaReadyForReplData(replica) && !(replica->flag.close_asap) && can_receive_writes &&
|
||||
!replica->repl_start_cmd_stream_on_ack && clientHasPendingReplies(replica)) {
|
||||
!replica->repl_data->repl_start_cmd_stream_on_ack && clientHasPendingReplies(replica)) {
|
||||
writeToClient(replica);
|
||||
}
|
||||
}
|
||||
|
66
src/pubsub.c
66
src/pubsub.c
@ -219,20 +219,20 @@ int serverPubsubShardSubscriptionCount(void) {
|
||||
|
||||
/* Return the number of channels + patterns a client is subscribed to. */
|
||||
int clientSubscriptionsCount(client *c) {
|
||||
return dictSize(c->pubsub_channels) + dictSize(c->pubsub_patterns);
|
||||
return dictSize(c->pubsub_data->pubsub_channels) + dictSize(c->pubsub_data->pubsub_patterns);
|
||||
}
|
||||
|
||||
/* Return the number of shard level channels a client is subscribed to. */
|
||||
int clientShardSubscriptionsCount(client *c) {
|
||||
return dictSize(c->pubsubshard_channels);
|
||||
return dictSize(c->pubsub_data->pubsubshard_channels);
|
||||
}
|
||||
|
||||
dict *getClientPubSubChannels(client *c) {
|
||||
return c->pubsub_channels;
|
||||
return c->pubsub_data->pubsub_channels;
|
||||
}
|
||||
|
||||
dict *getClientPubSubShardChannels(client *c) {
|
||||
return c->pubsubshard_channels;
|
||||
return c->pubsub_data->pubsubshard_channels;
|
||||
}
|
||||
|
||||
/* Return the number of pubsub + pubsub shard level channels
|
||||
@ -255,6 +255,36 @@ void unmarkClientAsPubSub(client *c) {
|
||||
}
|
||||
}
|
||||
|
||||
void initClientPubSubData(client *c) {
|
||||
if (c->pubsub_data) return;
|
||||
c->pubsub_data = zmalloc(sizeof(ClientPubSubData));
|
||||
c->pubsub_data->pubsub_channels = dictCreate(&objectKeyPointerValueDictType);
|
||||
c->pubsub_data->pubsub_patterns = dictCreate(&objectKeyPointerValueDictType);
|
||||
c->pubsub_data->pubsubshard_channels = dictCreate(&objectKeyPointerValueDictType);
|
||||
c->pubsub_data->client_tracking_redirection = 0;
|
||||
c->pubsub_data->client_tracking_prefixes = NULL;
|
||||
}
|
||||
|
||||
void freeClientPubSubData(client *c) {
|
||||
if (!c->pubsub_data) return;
|
||||
/* Unsubscribe from all the pubsub channels */
|
||||
pubsubUnsubscribeAllChannels(c, 0);
|
||||
pubsubUnsubscribeShardAllChannels(c, 0);
|
||||
pubsubUnsubscribeAllPatterns(c, 0);
|
||||
unmarkClientAsPubSub(c);
|
||||
dictRelease(c->pubsub_data->pubsub_channels);
|
||||
c->pubsub_data->pubsub_channels = NULL;
|
||||
dictRelease(c->pubsub_data->pubsub_patterns);
|
||||
c->pubsub_data->pubsub_patterns = NULL;
|
||||
dictRelease(c->pubsub_data->pubsubshard_channels);
|
||||
c->pubsub_data->pubsubshard_channels = NULL;
|
||||
if (c->pubsub_data->client_tracking_prefixes) {
|
||||
disableTracking(c);
|
||||
}
|
||||
zfree(c->pubsub_data);
|
||||
c->pubsub_data = NULL;
|
||||
}
|
||||
|
||||
/* Subscribe a client to a channel. Returns 1 if the operation succeeded, or
|
||||
* 0 if the client was already subscribed to that channel. */
|
||||
int pubsubSubscribeChannel(client *c, robj *channel, pubsubtype type) {
|
||||
@ -262,6 +292,8 @@ int pubsubSubscribeChannel(client *c, robj *channel, pubsubtype type) {
|
||||
int retval = 0;
|
||||
unsigned int slot = 0;
|
||||
|
||||
if (!c->pubsub_data) initClientPubSubData(c);
|
||||
|
||||
/* Add the channel to the client -> channels hash table */
|
||||
void *position = dictFindPositionForInsert(type.clientPubSubChannels(c), channel, NULL);
|
||||
if (position) { /* Not yet subscribed to this channel */
|
||||
@ -344,7 +376,7 @@ void pubsubShardUnsubscribeAllChannelsInSlot(unsigned int slot) {
|
||||
dictEntry *entry;
|
||||
while ((entry = dictNext(iter)) != NULL) {
|
||||
client *c = dictGetKey(entry);
|
||||
int retval = dictDelete(c->pubsubshard_channels, channel);
|
||||
int retval = dictDelete(c->pubsub_data->pubsubshard_channels, channel);
|
||||
serverAssertWithInfo(c, channel, retval == DICT_OK);
|
||||
addReplyPubsubUnsubscribed(c, channel, pubSubShardType);
|
||||
/* If the client has no other pubsub subscription,
|
||||
@ -366,7 +398,9 @@ int pubsubSubscribePattern(client *c, robj *pattern) {
|
||||
dict *clients;
|
||||
int retval = 0;
|
||||
|
||||
if (dictAdd(c->pubsub_patterns, pattern, NULL) == DICT_OK) {
|
||||
if (!c->pubsub_data) initClientPubSubData(c);
|
||||
|
||||
if (dictAdd(c->pubsub_data->pubsub_patterns, pattern, NULL) == DICT_OK) {
|
||||
retval = 1;
|
||||
incrRefCount(pattern);
|
||||
/* Add the client to the pattern -> list of clients hash table */
|
||||
@ -392,8 +426,10 @@ int pubsubUnsubscribePattern(client *c, robj *pattern, int notify) {
|
||||
dict *clients;
|
||||
int retval = 0;
|
||||
|
||||
if (!c->pubsub_data) initClientPubSubData(c);
|
||||
|
||||
incrRefCount(pattern); /* Protect the object. May be the same we remove */
|
||||
if (dictDelete(c->pubsub_patterns, pattern) == DICT_OK) {
|
||||
if (dictDelete(c->pubsub_data->pubsub_patterns, pattern) == DICT_OK) {
|
||||
retval = 1;
|
||||
/* Remove the client from the pattern -> clients list hash table */
|
||||
de = dictFind(server.pubsub_patterns, pattern);
|
||||
@ -454,9 +490,10 @@ int pubsubUnsubscribeShardAllChannels(client *c, int notify) {
|
||||
* client was subscribed from. */
|
||||
int pubsubUnsubscribeAllPatterns(client *c, int notify) {
|
||||
int count = 0;
|
||||
if (!c->pubsub_data) initClientPubSubData(c);
|
||||
|
||||
if (dictSize(c->pubsub_patterns) > 0) {
|
||||
dictIterator *di = dictGetSafeIterator(c->pubsub_patterns);
|
||||
if (dictSize(c->pubsub_data->pubsub_patterns) > 0) {
|
||||
dictIterator *di = dictGetSafeIterator(c->pubsub_data->pubsub_patterns);
|
||||
dictEntry *de;
|
||||
|
||||
while ((de = dictNext(di)) != NULL) {
|
||||
@ -560,6 +597,8 @@ void subscribeCommand(client *c) {
|
||||
|
||||
/* UNSUBSCRIBE [channel ...] */
|
||||
void unsubscribeCommand(client *c) {
|
||||
if (!c->pubsub_data) initClientPubSubData(c);
|
||||
|
||||
if (c->argc == 1) {
|
||||
pubsubUnsubscribeAllChannels(c, 1);
|
||||
} else {
|
||||
@ -732,6 +771,8 @@ void ssubscribeCommand(client *c) {
|
||||
|
||||
/* SUNSUBSCRIBE [shardchannel [shardchannel ...]] */
|
||||
void sunsubscribeCommand(client *c) {
|
||||
if (!c->pubsub_data) initClientPubSubData(c);
|
||||
|
||||
if (c->argc == 1) {
|
||||
pubsubUnsubscribeShardAllChannels(c, 1);
|
||||
} else {
|
||||
@ -745,12 +786,13 @@ void sunsubscribeCommand(client *c) {
|
||||
}
|
||||
|
||||
size_t pubsubMemOverhead(client *c) {
|
||||
if (!c->pubsub_data) return 0;
|
||||
/* PubSub patterns */
|
||||
size_t mem = dictMemUsage(c->pubsub_patterns);
|
||||
size_t mem = dictMemUsage(c->pubsub_data->pubsub_patterns);
|
||||
/* Global PubSub channels */
|
||||
mem += dictMemUsage(c->pubsub_channels);
|
||||
mem += dictMemUsage(c->pubsub_data->pubsub_channels);
|
||||
/* Sharded PubSub channels */
|
||||
mem += dictMemUsage(c->pubsubshard_channels);
|
||||
mem += dictMemUsage(c->pubsub_data->pubsubshard_channels);
|
||||
return mem;
|
||||
}
|
||||
|
||||
|
@ -3573,9 +3573,9 @@ int rdbSaveToReplicasSockets(int req, rdbSaveInfo *rsi) {
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) {
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) {
|
||||
/* Check replica has the exact requirements */
|
||||
if (replica->replica_req != req) continue;
|
||||
if (replica->repl_data->replica_req != req) continue;
|
||||
|
||||
conns[connsnum++] = replica->conn;
|
||||
if (dual_channel) {
|
||||
@ -3646,8 +3646,8 @@ int rdbSaveToReplicasSockets(int req, rdbSaveInfo *rsi) {
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_END) {
|
||||
replica->repl_state = REPLICA_STATE_WAIT_BGSAVE_START;
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END) {
|
||||
replica->repl_data->repl_state = REPLICA_STATE_WAIT_BGSAVE_START;
|
||||
}
|
||||
}
|
||||
if (!dual_channel) {
|
||||
|
@ -82,10 +82,10 @@ char *replicationGetReplicaName(client *c) {
|
||||
|
||||
ip[0] = '\0';
|
||||
buf[0] = '\0';
|
||||
if (c->replica_addr || connAddrPeerName(c->conn, ip, sizeof(ip), NULL) != -1) {
|
||||
char *addr = c->replica_addr ? c->replica_addr : ip;
|
||||
if (c->replica_listening_port)
|
||||
formatAddr(buf, sizeof(buf), addr, c->replica_listening_port);
|
||||
if (c->repl_data->replica_addr || connAddrPeerName(c->conn, ip, sizeof(ip), NULL) != -1) {
|
||||
char *addr = c->repl_data->replica_addr ? c->repl_data->replica_addr : ip;
|
||||
if (c->repl_data->replica_listening_port)
|
||||
formatAddr(buf, sizeof(buf), addr, c->repl_data->replica_listening_port);
|
||||
else
|
||||
snprintf(buf, sizeof(buf), "%s:<unknown-replica-port>", addr);
|
||||
} else {
|
||||
@ -231,7 +231,7 @@ void addRdbReplicaToPsyncWait(client *replica_rdb_client) {
|
||||
dualChannelServerLog(LL_DEBUG, "Add rdb replica %s to waiting psync, with cid %llu, %s ",
|
||||
replicationGetReplicaName(replica_rdb_client), (unsigned long long)replica_rdb_client->id,
|
||||
tail ? "tracking repl-backlog tail" : "no repl-backlog to track");
|
||||
replica_rdb_client->ref_repl_buf_node = tail ? ln : NULL;
|
||||
replica_rdb_client->repl_data->ref_repl_buf_node = tail ? ln : NULL;
|
||||
/* Prevent rdb client from being freed before psync is established. */
|
||||
replica_rdb_client->flag.protected_rdb_channel = 1;
|
||||
uint64_t id = htonu64(replica_rdb_client->id);
|
||||
@ -250,8 +250,8 @@ void backfillRdbReplicasToPsyncWait(void) {
|
||||
raxSeek(&iter, "^", NULL, 0);
|
||||
while (raxNext(&iter)) {
|
||||
client *replica_rdb_client = iter.data;
|
||||
if (replica_rdb_client->ref_repl_buf_node) continue;
|
||||
replica_rdb_client->ref_repl_buf_node = ln;
|
||||
if (replica_rdb_client->repl_data->ref_repl_buf_node) continue;
|
||||
replica_rdb_client->repl_data->ref_repl_buf_node = ln;
|
||||
head->refcount++;
|
||||
dualChannelServerLog(LL_DEBUG, "Attach replica rdb client %llu to repl buf block",
|
||||
(long long unsigned int)replica_rdb_client->id);
|
||||
@ -263,18 +263,18 @@ void removeReplicaFromPsyncWait(client *replica_main_client) {
|
||||
listNode *ln;
|
||||
replBufBlock *o;
|
||||
/* Get replBufBlock pointed by this replica */
|
||||
client *replica_rdb_client = lookupRdbClientByID(replica_main_client->associated_rdb_client_id);
|
||||
ln = replica_rdb_client->ref_repl_buf_node;
|
||||
client *replica_rdb_client = lookupRdbClientByID(replica_main_client->repl_data->associated_rdb_client_id);
|
||||
ln = replica_rdb_client->repl_data->ref_repl_buf_node;
|
||||
o = ln ? listNodeValue(ln) : NULL;
|
||||
if (o != NULL) {
|
||||
serverAssert(o->refcount > 0);
|
||||
o->refcount--;
|
||||
}
|
||||
replica_rdb_client->ref_repl_buf_node = NULL;
|
||||
replica_rdb_client->repl_data->ref_repl_buf_node = NULL;
|
||||
replica_rdb_client->flag.protected_rdb_channel = 0;
|
||||
dualChannelServerLog(LL_DEBUG, "Remove psync waiting replica %s with cid %llu, repl buffer block %s",
|
||||
replicationGetReplicaName(replica_main_client),
|
||||
(long long unsigned int)replica_main_client->associated_rdb_client_id,
|
||||
(long long unsigned int)replica_main_client->repl_data->associated_rdb_client_id,
|
||||
o ? "ref count decreased" : "doesn't exist");
|
||||
uint64_t id = htonu64(replica_rdb_client->id);
|
||||
raxRemove(server.replicas_waiting_psync, (unsigned char *)&id, sizeof(id), NULL);
|
||||
@ -291,7 +291,7 @@ int canFeedReplicaReplBuffer(client *replica) {
|
||||
if (replica->flag.repl_rdbonly) return 0;
|
||||
|
||||
/* Don't feed replicas that are still waiting for BGSAVE to start. */
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) return 0;
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -396,15 +396,15 @@ void freeReplicaReferencedReplBuffer(client *replica) {
|
||||
replicationGetReplicaName(replica), (long long unsigned int)replica->id);
|
||||
}
|
||||
}
|
||||
if (replica->ref_repl_buf_node != NULL) {
|
||||
if (replica->repl_data->ref_repl_buf_node != NULL) {
|
||||
/* Decrease the start buffer node reference count. */
|
||||
replBufBlock *o = listNodeValue(replica->ref_repl_buf_node);
|
||||
replBufBlock *o = listNodeValue(replica->repl_data->ref_repl_buf_node);
|
||||
serverAssert(o->refcount > 0);
|
||||
o->refcount--;
|
||||
incrementalTrimReplicationBacklog(REPL_BACKLOG_TRIM_BLOCKS_PER_CALL);
|
||||
}
|
||||
replica->ref_repl_buf_node = NULL;
|
||||
replica->ref_block_pos = 0;
|
||||
replica->repl_data->ref_repl_buf_node = NULL;
|
||||
replica->repl_data->ref_block_pos = 0;
|
||||
}
|
||||
|
||||
/* Replication: Primary side.
|
||||
@ -486,9 +486,9 @@ void feedReplicationBuffer(char *s, size_t len) {
|
||||
client *replica = ln->value;
|
||||
if (!canFeedReplicaReplBuffer(replica) && !(replica->flag.protected_rdb_channel)) continue;
|
||||
/* Update shared replication buffer start position. */
|
||||
if (replica->ref_repl_buf_node == NULL) {
|
||||
replica->ref_repl_buf_node = start_node;
|
||||
replica->ref_block_pos = start_pos;
|
||||
if (replica->repl_data->ref_repl_buf_node == NULL) {
|
||||
replica->repl_data->ref_repl_buf_node = start_node;
|
||||
replica->repl_data->ref_block_pos = start_pos;
|
||||
/* Only increase the start block reference count. */
|
||||
((replBufBlock *)listNodeValue(start_node))->refcount++;
|
||||
}
|
||||
@ -771,8 +771,8 @@ long long addReplyReplicationBacklog(client *c, long long offset) {
|
||||
/* Setting output buffer of the replica. */
|
||||
replBufBlock *o = listNodeValue(node);
|
||||
o->refcount++;
|
||||
c->ref_repl_buf_node = node;
|
||||
c->ref_block_pos = offset - o->repl_offset;
|
||||
c->repl_data->ref_repl_buf_node = node;
|
||||
c->repl_data->ref_block_pos = offset - o->repl_offset;
|
||||
|
||||
return server.repl_backlog->histlen - skip;
|
||||
}
|
||||
@ -805,8 +805,8 @@ int replicationSetupReplicaForFullResync(client *replica, long long offset) {
|
||||
char buf[128];
|
||||
int buflen;
|
||||
|
||||
replica->psync_initial_offset = offset;
|
||||
replica->repl_state = REPLICA_STATE_WAIT_BGSAVE_END;
|
||||
replica->repl_data->psync_initial_offset = offset;
|
||||
replica->repl_data->repl_state = REPLICA_STATE_WAIT_BGSAVE_END;
|
||||
/* We are going to accumulate the incremental changes for this
|
||||
* replica as well. Set replicas_eldb to -1 in order to force to re-emit
|
||||
* a SELECT statement in the replication stream. */
|
||||
@ -889,19 +889,19 @@ int primaryTryPartialResynchronization(client *c, long long psync_offset) {
|
||||
* 4) Send the backlog data (from the offset to the end) to the replica. */
|
||||
waitForClientIO(c);
|
||||
c->flag.replica = 1;
|
||||
if (c->associated_rdb_client_id && lookupRdbClientByID(c->associated_rdb_client_id)) {
|
||||
c->repl_state = REPLICA_STATE_BG_RDB_LOAD;
|
||||
if (c->repl_data->associated_rdb_client_id && lookupRdbClientByID(c->repl_data->associated_rdb_client_id)) {
|
||||
c->repl_data->repl_state = REPLICA_STATE_BG_RDB_LOAD;
|
||||
removeReplicaFromPsyncWait(c);
|
||||
} else {
|
||||
c->repl_state = REPLICA_STATE_ONLINE;
|
||||
c->repl_data->repl_state = REPLICA_STATE_ONLINE;
|
||||
}
|
||||
c->repl_ack_time = server.unixtime;
|
||||
c->repl_start_cmd_stream_on_ack = 0;
|
||||
c->repl_data->repl_ack_time = server.unixtime;
|
||||
c->repl_data->repl_start_cmd_stream_on_ack = 0;
|
||||
listAddNodeTail(server.replicas, c);
|
||||
/* We can't use the connection buffers since they are used to accumulate
|
||||
* new commands at this stage. But we are sure the socket send buffer is
|
||||
* empty so this write will never fail actually. */
|
||||
if (c->replica_capa & REPLICA_CAPA_PSYNC2) {
|
||||
if (c->repl_data->replica_capa & REPLICA_CAPA_PSYNC2) {
|
||||
buflen = snprintf(buf, sizeof(buf), "+CONTINUE %s\r\n", server.replid);
|
||||
} else {
|
||||
buflen = snprintf(buf, sizeof(buf), "+CONTINUE\r\n");
|
||||
@ -1003,8 +1003,8 @@ int startBgsaveForReplication(int mincapa, int req) {
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) {
|
||||
replica->repl_state = REPL_STATE_NONE;
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) {
|
||||
replica->repl_data->repl_state = REPL_STATE_NONE;
|
||||
replica->flag.replica = 0;
|
||||
listDelNode(server.replicas, ln);
|
||||
addReplyError(replica, "BGSAVE failed, replication can't continue");
|
||||
@ -1021,9 +1021,9 @@ int startBgsaveForReplication(int mincapa, int req) {
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) {
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) {
|
||||
/* Check replica has the exact requirements */
|
||||
if (replica->replica_req != req) continue;
|
||||
if (replica->repl_data->replica_req != req) continue;
|
||||
replicationSetupReplicaForFullResync(replica, getPsyncInitialOffset());
|
||||
}
|
||||
}
|
||||
@ -1037,6 +1037,8 @@ void syncCommand(client *c) {
|
||||
/* ignore SYNC if already replica or in monitor mode */
|
||||
if (c->flag.replica) return;
|
||||
|
||||
initClientReplicationData(c);
|
||||
|
||||
/* Wait for any IO pending operation to finish before changing the client state to replica */
|
||||
waitForClientIO(c);
|
||||
|
||||
@ -1089,7 +1091,7 @@ void syncCommand(client *c) {
|
||||
/* Fail sync if replica doesn't support EOF capability but wants a filtered RDB. This is because we force filtered
|
||||
* RDB's to be generated over a socket and not through a file to avoid conflicts with the snapshot files. Forcing
|
||||
* use of a socket is handled, if needed, in `startBgsaveForReplication`. */
|
||||
if (c->replica_req & REPLICA_REQ_RDB_MASK && !(c->replica_capa & REPLICA_CAPA_EOF)) {
|
||||
if (c->repl_data->replica_req & REPLICA_REQ_RDB_MASK && !(c->repl_data->replica_capa & REPLICA_CAPA_EOF)) {
|
||||
addReplyError(c, "Filtered replica requires EOF capability");
|
||||
return;
|
||||
}
|
||||
@ -1124,7 +1126,7 @@ void syncCommand(client *c) {
|
||||
* resync on purpose when they are not able to partially
|
||||
* resync. */
|
||||
if (primary_replid[0] != '?') server.stat_sync_partial_err++;
|
||||
if (c->replica_capa & REPLICA_CAPA_DUAL_CHANNEL) {
|
||||
if (c->repl_data->replica_capa & REPLICA_CAPA_DUAL_CHANNEL) {
|
||||
dualChannelServerLog(LL_NOTICE,
|
||||
"Replica %s is capable of dual channel synchronization, and partial sync "
|
||||
"isn't possible. "
|
||||
@ -1149,9 +1151,9 @@ void syncCommand(client *c) {
|
||||
|
||||
/* Setup the replica as one waiting for BGSAVE to start. The following code
|
||||
* paths will change the state if we handle the replica differently. */
|
||||
c->repl_state = REPLICA_STATE_WAIT_BGSAVE_START;
|
||||
c->repl_data->repl_state = REPLICA_STATE_WAIT_BGSAVE_START;
|
||||
if (server.repl_disable_tcp_nodelay) connDisableTcpNoDelay(c->conn); /* Non critical if it fails. */
|
||||
c->repldbfd = -1;
|
||||
c->repl_data->repldbfd = -1;
|
||||
c->flag.replica = 1;
|
||||
listAddNodeTail(server.replicas, c);
|
||||
|
||||
@ -1183,20 +1185,20 @@ void syncCommand(client *c) {
|
||||
replica = ln->value;
|
||||
/* If the client needs a buffer of commands, we can't use
|
||||
* a replica without replication buffer. */
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_END &&
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END &&
|
||||
(!(replica->flag.repl_rdbonly) || (c->flag.repl_rdbonly)))
|
||||
break;
|
||||
}
|
||||
/* To attach this replica, we check that it has at least all the
|
||||
* capabilities of the replica that triggered the current BGSAVE
|
||||
* and its exact requirements. */
|
||||
if (ln && ((c->replica_capa & replica->replica_capa) == replica->replica_capa) &&
|
||||
c->replica_req == replica->replica_req) {
|
||||
if (ln && ((c->repl_data->replica_capa & replica->repl_data->replica_capa) == replica->repl_data->replica_capa) &&
|
||||
c->repl_data->replica_req == replica->repl_data->replica_req) {
|
||||
/* Perfect, the server is already registering differences for
|
||||
* another replica. Set the right state, and copy the buffer.
|
||||
* We don't copy buffer if clients don't want. */
|
||||
if (!c->flag.repl_rdbonly) copyReplicaOutputBuffer(c, replica);
|
||||
replicationSetupReplicaForFullResync(c, replica->psync_initial_offset);
|
||||
replicationSetupReplicaForFullResync(c, replica->repl_data->psync_initial_offset);
|
||||
serverLog(LL_NOTICE, "Waiting for end of BGSAVE for SYNC");
|
||||
} else {
|
||||
/* No way, we need to wait for the next BGSAVE in order to
|
||||
@ -1213,7 +1215,7 @@ void syncCommand(client *c) {
|
||||
|
||||
/* CASE 3: There is no BGSAVE is in progress. */
|
||||
} else {
|
||||
if (server.repl_diskless_sync && (c->replica_capa & REPLICA_CAPA_EOF) && server.repl_diskless_sync_delay) {
|
||||
if (server.repl_diskless_sync && (c->repl_data->replica_capa & REPLICA_CAPA_EOF) && server.repl_diskless_sync_delay) {
|
||||
/* Diskless replication RDB child is created inside
|
||||
* replicationCron() since we want to delay its start a
|
||||
* few seconds to wait for more replicas to arrive. */
|
||||
@ -1222,7 +1224,7 @@ void syncCommand(client *c) {
|
||||
/* We don't have a BGSAVE in progress, let's start one. Diskless
|
||||
* or disk-based mode is determined by replica's capacity. */
|
||||
if (!hasActiveChildProcess()) {
|
||||
startBgsaveForReplication(c->replica_capa, c->replica_req);
|
||||
startBgsaveForReplication(c->repl_data->replica_capa, c->repl_data->replica_req);
|
||||
} else {
|
||||
serverLog(LL_NOTICE, "No BGSAVE in progress, but another BG operation is active. "
|
||||
"BGSAVE for replication delayed");
|
||||
@ -1232,6 +1234,72 @@ void syncCommand(client *c) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check if there is any other replica waiting dumping RDB finished expect me.
|
||||
* This function is useful to judge current dumping RDB can be used for full
|
||||
* synchronization or not. */
|
||||
int anyOtherReplicaWaitRdb(client *except_me) {
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
if (replica != except_me && replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void initClientReplicationData(client *c) {
|
||||
if (c->repl_data) return;
|
||||
c->repl_data = (ClientReplicationData *)zcalloc(sizeof(ClientReplicationData));
|
||||
}
|
||||
|
||||
void freeClientReplicationData(client *c) {
|
||||
if (!c->repl_data) return;
|
||||
freeReplicaReferencedReplBuffer(c);
|
||||
/* Primary/replica cleanup Case 1:
|
||||
* we lost the connection with a replica. */
|
||||
if (c->flag.replica) {
|
||||
/* If there is no any other replica waiting dumping RDB finished, the
|
||||
* current child process need not continue to dump RDB, then we kill it.
|
||||
* So child process won't use more memory, and we also can fork a new
|
||||
* child process asap to dump rdb for next full synchronization or bgsave.
|
||||
* But we also need to check if users enable 'save' RDB, if enable, we
|
||||
* should not remove directly since that means RDB is important for users
|
||||
* to keep data safe and we may delay configured 'save' for full sync. */
|
||||
if (server.saveparamslen == 0 && c->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END &&
|
||||
server.child_type == CHILD_TYPE_RDB && server.rdb_child_type == RDB_CHILD_TYPE_DISK &&
|
||||
anyOtherReplicaWaitRdb(c) == 0) {
|
||||
serverLog(LL_NOTICE, "Background saving, persistence disabled, last replica dropped, killing fork child.");
|
||||
killRDBChild();
|
||||
}
|
||||
if (c->repl_data->repl_state == REPLICA_STATE_SEND_BULK) {
|
||||
if (c->repl_data->repldbfd != -1) close(c->repl_data->repldbfd);
|
||||
if (c->repl_data->replpreamble) sdsfree(c->repl_data->replpreamble);
|
||||
}
|
||||
list *l = (c->flag.monitor) ? server.monitors : server.replicas;
|
||||
listNode *ln = listSearchKey(l, c);
|
||||
serverAssert(ln != NULL);
|
||||
listDelNode(l, ln);
|
||||
/* We need to remember the time when we started to have zero
|
||||
* attached replicas, as after some time we'll free the replication
|
||||
* backlog. */
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA && listLength(server.replicas) == 0)
|
||||
server.repl_no_replicas_since = server.unixtime;
|
||||
refreshGoodReplicasCount();
|
||||
/* Fire the replica change modules event. */
|
||||
if (c->repl_data->repl_state == REPLICA_STATE_ONLINE)
|
||||
moduleFireServerEvent(VALKEYMODULE_EVENT_REPLICA_CHANGE, VALKEYMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE,
|
||||
NULL);
|
||||
}
|
||||
if (c->flag.primary) replicationHandlePrimaryDisconnection();
|
||||
sdsfree(c->repl_data->replica_addr);
|
||||
zfree(c->repl_data);
|
||||
c->repl_data = NULL;
|
||||
}
|
||||
|
||||
/* REPLCONF <option> <value> <option> <value> ...
|
||||
* This command is used by a replica in order to configure the replication
|
||||
* process before starting it with the SYNC command.
|
||||
@ -1286,18 +1354,20 @@ void replconfCommand(client *c) {
|
||||
return;
|
||||
}
|
||||
|
||||
initClientReplicationData(c);
|
||||
|
||||
/* Process every option-value pair. */
|
||||
for (j = 1; j < c->argc; j += 2) {
|
||||
if (!strcasecmp(c->argv[j]->ptr, "listening-port")) {
|
||||
long port;
|
||||
|
||||
if ((getLongFromObjectOrReply(c, c->argv[j + 1], &port, NULL) != C_OK)) return;
|
||||
c->replica_listening_port = port;
|
||||
c->repl_data->replica_listening_port = port;
|
||||
} else if (!strcasecmp(c->argv[j]->ptr, "ip-address")) {
|
||||
sds addr = c->argv[j + 1]->ptr;
|
||||
if (sdslen(addr) < NET_HOST_STR_LEN) {
|
||||
if (c->replica_addr) sdsfree(c->replica_addr);
|
||||
c->replica_addr = sdsdup(addr);
|
||||
if (c->repl_data->replica_addr) sdsfree(c->repl_data->replica_addr);
|
||||
c->repl_data->replica_addr = sdsdup(addr);
|
||||
} else {
|
||||
addReplyErrorFormat(c,
|
||||
"REPLCONF ip-address provided by "
|
||||
@ -1308,14 +1378,14 @@ void replconfCommand(client *c) {
|
||||
} else if (!strcasecmp(c->argv[j]->ptr, "capa")) {
|
||||
/* Ignore capabilities not understood by this primary. */
|
||||
if (!strcasecmp(c->argv[j + 1]->ptr, "eof"))
|
||||
c->replica_capa |= REPLICA_CAPA_EOF;
|
||||
c->repl_data->replica_capa |= REPLICA_CAPA_EOF;
|
||||
else if (!strcasecmp(c->argv[j + 1]->ptr, "psync2"))
|
||||
c->replica_capa |= REPLICA_CAPA_PSYNC2;
|
||||
c->repl_data->replica_capa |= REPLICA_CAPA_PSYNC2;
|
||||
else if (!strcasecmp(c->argv[j + 1]->ptr, "dual-channel") && server.dual_channel_replication &&
|
||||
server.repl_diskless_sync) {
|
||||
/* If dual-channel is disable on this primary, treat this command as unrecognized
|
||||
* replconf option. */
|
||||
c->replica_capa |= REPLICA_CAPA_DUAL_CHANNEL;
|
||||
c->repl_data->replica_capa |= REPLICA_CAPA_DUAL_CHANNEL;
|
||||
}
|
||||
} else if (!strcasecmp(c->argv[j]->ptr, "ack")) {
|
||||
/* REPLCONF ACK is used by replica to inform the primary the amount
|
||||
@ -1325,12 +1395,12 @@ void replconfCommand(client *c) {
|
||||
|
||||
if (!c->flag.replica) return;
|
||||
if ((getLongLongFromObject(c->argv[j + 1], &offset) != C_OK)) return;
|
||||
if (offset > c->repl_ack_off) c->repl_ack_off = offset;
|
||||
if (offset > c->repl_data->repl_ack_off) c->repl_data->repl_ack_off = offset;
|
||||
if (c->argc > j + 3 && !strcasecmp(c->argv[j + 2]->ptr, "fack")) {
|
||||
if ((getLongLongFromObject(c->argv[j + 3], &offset) != C_OK)) return;
|
||||
if (offset > c->repl_aof_off) c->repl_aof_off = offset;
|
||||
if (offset > c->repl_data->repl_aof_off) c->repl_data->repl_aof_off = offset;
|
||||
}
|
||||
c->repl_ack_time = server.unixtime;
|
||||
c->repl_data->repl_ack_time = server.unixtime;
|
||||
/* If this was a diskless replication, we need to really put
|
||||
* the replica online when the first ACK is received (which
|
||||
* confirms replica is online and ready to get more data). This
|
||||
@ -1339,10 +1409,10 @@ void replconfCommand(client *c) {
|
||||
* There's a chance the ACK got to us before we detected that the
|
||||
* bgsave is done (since that depends on cron ticks), so run a
|
||||
* quick check first (instead of waiting for the next ACK. */
|
||||
if (server.child_type == CHILD_TYPE_RDB && c->repl_state == REPLICA_STATE_WAIT_BGSAVE_END)
|
||||
if (server.child_type == CHILD_TYPE_RDB && c->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END)
|
||||
checkChildrenDone();
|
||||
if (c->repl_start_cmd_stream_on_ack && c->repl_state == REPLICA_STATE_ONLINE) replicaStartCommandStream(c);
|
||||
if (c->repl_state == REPLICA_STATE_BG_RDB_LOAD) {
|
||||
if (c->repl_data->repl_start_cmd_stream_on_ack && c->repl_data->repl_state == REPLICA_STATE_ONLINE) replicaStartCommandStream(c);
|
||||
if (c->repl_data->repl_state == REPLICA_STATE_BG_RDB_LOAD) {
|
||||
replicaPutOnline(c);
|
||||
}
|
||||
/* Note: this command does not reply anything! */
|
||||
@ -1376,11 +1446,11 @@ void replconfCommand(client *c) {
|
||||
return;
|
||||
}
|
||||
/* By default filter out all parts of the rdb */
|
||||
c->replica_req |= REPLICA_REQ_RDB_EXCLUDE_DATA;
|
||||
c->replica_req |= REPLICA_REQ_RDB_EXCLUDE_FUNCTIONS;
|
||||
c->repl_data->replica_req |= REPLICA_REQ_RDB_EXCLUDE_DATA;
|
||||
c->repl_data->replica_req |= REPLICA_REQ_RDB_EXCLUDE_FUNCTIONS;
|
||||
for (i = 0; i < filter_count; i++) {
|
||||
if (!strcasecmp(filters[i], "functions"))
|
||||
c->replica_req &= ~REPLICA_REQ_RDB_EXCLUDE_FUNCTIONS;
|
||||
c->repl_data->replica_req &= ~REPLICA_REQ_RDB_EXCLUDE_FUNCTIONS;
|
||||
else {
|
||||
addReplyErrorFormat(c, "Unsupported rdb-filter-only option: %s", (char *)filters[i]);
|
||||
sdsfreesplitres(filters, filter_count);
|
||||
@ -1392,7 +1462,7 @@ void replconfCommand(client *c) {
|
||||
/* REPLCONF VERSION x.y.z */
|
||||
int version = version2num(c->argv[j + 1]->ptr);
|
||||
if (version >= 0) {
|
||||
c->replica_version = version;
|
||||
c->repl_data->replica_version = version;
|
||||
} else {
|
||||
addReplyErrorFormat(c, "Unrecognized version format: %s", (char *)c->argv[j + 1]->ptr);
|
||||
return;
|
||||
@ -1404,10 +1474,10 @@ void replconfCommand(client *c) {
|
||||
}
|
||||
if (start_with_offset == 1) {
|
||||
c->flag.repl_rdb_channel = 1;
|
||||
c->replica_req |= REPLICA_REQ_RDB_CHANNEL;
|
||||
c->repl_data->replica_req |= REPLICA_REQ_RDB_CHANNEL;
|
||||
} else {
|
||||
c->flag.repl_rdb_channel = 0;
|
||||
c->replica_req &= ~REPLICA_REQ_RDB_CHANNEL;
|
||||
c->repl_data->replica_req &= ~REPLICA_REQ_RDB_CHANNEL;
|
||||
}
|
||||
} else if (!strcasecmp(c->argv[j]->ptr, "set-rdb-client-id")) {
|
||||
/* REPLCONF identify <client-id> is used to identify the current replica main channel with existing
|
||||
@ -1420,7 +1490,7 @@ void replconfCommand(client *c) {
|
||||
addReplyErrorFormat(c, "Unrecognized RDB client id %lld", client_id);
|
||||
return;
|
||||
}
|
||||
c->associated_rdb_client_id = (uint64_t)client_id;
|
||||
c->repl_data->associated_rdb_client_id = (uint64_t)client_id;
|
||||
} else {
|
||||
addReplyErrorFormat(c, "Unrecognized REPLCONF option: %s", (char *)c->argv[j]->ptr);
|
||||
return;
|
||||
@ -1441,14 +1511,14 @@ void replconfCommand(client *c) {
|
||||
* */
|
||||
int replicaPutOnline(client *replica) {
|
||||
if (replica->flag.repl_rdbonly) {
|
||||
replica->repl_state = REPLICA_STATE_RDB_TRANSMITTED;
|
||||
replica->repl_data->repl_state = REPLICA_STATE_RDB_TRANSMITTED;
|
||||
/* The client asked for RDB only so we should close it ASAP */
|
||||
serverLog(LL_NOTICE, "RDB transfer completed, rdb only replica (%s) should be disconnected asap",
|
||||
replicationGetReplicaName(replica));
|
||||
return 0;
|
||||
}
|
||||
replica->repl_state = REPLICA_STATE_ONLINE;
|
||||
replica->repl_ack_time = server.unixtime; /* Prevent false timeout. */
|
||||
replica->repl_data->repl_state = REPLICA_STATE_ONLINE;
|
||||
replica->repl_data->repl_ack_time = server.unixtime; /* Prevent false timeout. */
|
||||
|
||||
refreshGoodReplicasCount();
|
||||
/* Fire the replica change modules event. */
|
||||
@ -1471,7 +1541,7 @@ int replicaPutOnline(client *replica) {
|
||||
* won't get mixed with the RDB stream. */
|
||||
void replicaStartCommandStream(client *replica) {
|
||||
serverAssert(!(replica->flag.repl_rdbonly));
|
||||
replica->repl_start_cmd_stream_on_ack = 0;
|
||||
replica->repl_data->repl_start_cmd_stream_on_ack = 0;
|
||||
|
||||
putClientInPendingWriteQueue(replica);
|
||||
}
|
||||
@ -1502,9 +1572,9 @@ void removeRDBUsedToSyncReplicas(void) {
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
replica = ln->value;
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_START ||
|
||||
replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_END ||
|
||||
replica->repl_state == REPLICA_STATE_SEND_BULK) {
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_START ||
|
||||
replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END ||
|
||||
replica->repl_data->repl_state == REPLICA_STATE_SEND_BULK) {
|
||||
delrdb = 0;
|
||||
break; /* No need to check the other replicas. */
|
||||
}
|
||||
@ -1530,18 +1600,18 @@ void closeRepldbfd(client *myself) {
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
if (replica != myself && replica->repl_state == REPLICA_STATE_SEND_BULK) {
|
||||
if (replica != myself && replica->repl_data->repl_state == REPLICA_STATE_SEND_BULK) {
|
||||
reclaim = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (reclaim) {
|
||||
bioCreateCloseJob(myself->repldbfd, 0, 1);
|
||||
bioCreateCloseJob(myself->repl_data->repldbfd, 0, 1);
|
||||
} else {
|
||||
close(myself->repldbfd);
|
||||
close(myself->repl_data->repldbfd);
|
||||
}
|
||||
myself->repldbfd = -1;
|
||||
myself->repl_data->repldbfd = -1;
|
||||
}
|
||||
|
||||
void sendBulkToReplica(connection *conn) {
|
||||
@ -1552,18 +1622,18 @@ void sendBulkToReplica(connection *conn) {
|
||||
/* Before sending the RDB file, we send the preamble as configured by the
|
||||
* replication process. Currently the preamble is just the bulk count of
|
||||
* the file in the form "$<length>\r\n". */
|
||||
if (replica->replpreamble) {
|
||||
nwritten = connWrite(conn, replica->replpreamble, sdslen(replica->replpreamble));
|
||||
if (replica->repl_data->replpreamble) {
|
||||
nwritten = connWrite(conn, replica->repl_data->replpreamble, sdslen(replica->repl_data->replpreamble));
|
||||
if (nwritten == -1) {
|
||||
serverLog(LL_WARNING, "Write error sending RDB preamble to replica: %s", connGetLastError(conn));
|
||||
freeClient(replica);
|
||||
return;
|
||||
}
|
||||
server.stat_net_repl_output_bytes += nwritten;
|
||||
sdsrange(replica->replpreamble, nwritten, -1);
|
||||
if (sdslen(replica->replpreamble) == 0) {
|
||||
sdsfree(replica->replpreamble);
|
||||
replica->replpreamble = NULL;
|
||||
sdsrange(replica->repl_data->replpreamble, nwritten, -1);
|
||||
if (sdslen(replica->repl_data->replpreamble) == 0) {
|
||||
sdsfree(replica->repl_data->replpreamble);
|
||||
replica->repl_data->replpreamble = NULL;
|
||||
/* fall through sending data. */
|
||||
} else {
|
||||
return;
|
||||
@ -1571,8 +1641,8 @@ void sendBulkToReplica(connection *conn) {
|
||||
}
|
||||
|
||||
/* If the preamble was already transferred, send the RDB bulk data. */
|
||||
lseek(replica->repldbfd, replica->repldboff, SEEK_SET);
|
||||
buflen = read(replica->repldbfd, buf, PROTO_IOBUF_LEN);
|
||||
lseek(replica->repl_data->repldbfd, replica->repl_data->repldboff, SEEK_SET);
|
||||
buflen = read(replica->repl_data->repldbfd, buf, PROTO_IOBUF_LEN);
|
||||
if (buflen <= 0) {
|
||||
serverLog(LL_WARNING, "Read error sending DB to replica: %s",
|
||||
(buflen == 0) ? "premature EOF" : strerror(errno));
|
||||
@ -1586,9 +1656,9 @@ void sendBulkToReplica(connection *conn) {
|
||||
}
|
||||
return;
|
||||
}
|
||||
replica->repldboff += nwritten;
|
||||
replica->repl_data->repldboff += nwritten;
|
||||
server.stat_net_repl_output_bytes += nwritten;
|
||||
if (replica->repldboff == replica->repldbsize) {
|
||||
if (replica->repl_data->repldboff == replica->repl_data->repldbsize) {
|
||||
closeRepldbfd(replica);
|
||||
connSetWriteHandler(replica->conn, NULL);
|
||||
if (!replicaPutOnline(replica)) {
|
||||
@ -1605,7 +1675,7 @@ void rdbPipeWriteHandlerConnRemoved(struct connection *conn) {
|
||||
if (!connHasWriteHandler(conn)) return;
|
||||
connSetWriteHandler(conn, NULL);
|
||||
client *replica = connGetPrivateData(conn);
|
||||
replica->repl_last_partial_write = 0;
|
||||
replica->repl_data->repl_last_partial_write = 0;
|
||||
server.rdb_pipe_numconns_writing--;
|
||||
/* if there are no more writes for now for this conn, or write error: */
|
||||
if (server.rdb_pipe_numconns_writing == 0) {
|
||||
@ -1621,17 +1691,17 @@ void rdbPipeWriteHandler(struct connection *conn) {
|
||||
serverAssert(server.rdb_pipe_bufflen > 0);
|
||||
client *replica = connGetPrivateData(conn);
|
||||
ssize_t nwritten;
|
||||
if ((nwritten = connWrite(conn, server.rdb_pipe_buff + replica->repldboff,
|
||||
server.rdb_pipe_bufflen - replica->repldboff)) == -1) {
|
||||
if ((nwritten = connWrite(conn, server.rdb_pipe_buff + replica->repl_data->repldboff,
|
||||
server.rdb_pipe_bufflen - replica->repl_data->repldboff)) == -1) {
|
||||
if (connGetState(conn) == CONN_STATE_CONNECTED) return; /* equivalent to EAGAIN */
|
||||
serverLog(LL_WARNING, "Write error sending DB to replica: %s", connGetLastError(conn));
|
||||
freeClient(replica);
|
||||
return;
|
||||
} else {
|
||||
replica->repldboff += nwritten;
|
||||
replica->repl_data->repldboff += nwritten;
|
||||
server.stat_net_repl_output_bytes += nwritten;
|
||||
if (replica->repldboff < server.rdb_pipe_bufflen) {
|
||||
replica->repl_last_partial_write = server.unixtime;
|
||||
if (replica->repl_data->repldboff < server.rdb_pipe_bufflen) {
|
||||
replica->repl_data->repl_last_partial_write = server.unixtime;
|
||||
return; /* more data to write.. */
|
||||
}
|
||||
}
|
||||
@ -1698,17 +1768,17 @@ void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData,
|
||||
continue;
|
||||
}
|
||||
/* An error and still in connected state, is equivalent to EAGAIN */
|
||||
replica->repldboff = 0;
|
||||
replica->repl_data->repldboff = 0;
|
||||
} else {
|
||||
/* Note: when use diskless replication, 'repldboff' is the offset
|
||||
* of 'rdb_pipe_buff' sent rather than the offset of entire RDB. */
|
||||
replica->repldboff = nwritten;
|
||||
replica->repl_data->repldboff = nwritten;
|
||||
server.stat_net_repl_output_bytes += nwritten;
|
||||
}
|
||||
/* If we were unable to write all the data to one of the replicas,
|
||||
* setup write handler (and disable pipe read handler, below) */
|
||||
if (nwritten != server.rdb_pipe_bufflen) {
|
||||
replica->repl_last_partial_write = server.unixtime;
|
||||
replica->repl_data->repl_last_partial_write = server.unixtime;
|
||||
server.rdb_pipe_numconns_writing++;
|
||||
connSetWriteHandler(conn, rdbPipeWriteHandler);
|
||||
}
|
||||
@ -1739,7 +1809,7 @@ void updateReplicasWaitingBgsave(int bgsaveerr, int type) {
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_END) {
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END) {
|
||||
int repldbfd;
|
||||
struct valkey_stat buf;
|
||||
|
||||
@ -1790,7 +1860,7 @@ void updateReplicasWaitingBgsave(int bgsaveerr, int type) {
|
||||
freeClientAsync(replica);
|
||||
continue;
|
||||
}
|
||||
replica->repl_start_cmd_stream_on_ack = 1;
|
||||
replica->repl_data->repl_start_cmd_stream_on_ack = 1;
|
||||
} else {
|
||||
repldbfd = open(server.rdb_filename, O_RDONLY);
|
||||
if (repldbfd == -1) {
|
||||
@ -1804,11 +1874,11 @@ void updateReplicasWaitingBgsave(int bgsaveerr, int type) {
|
||||
close(repldbfd);
|
||||
continue;
|
||||
}
|
||||
replica->repldbfd = repldbfd;
|
||||
replica->repldboff = 0;
|
||||
replica->repldbsize = buf.st_size;
|
||||
replica->repl_state = REPLICA_STATE_SEND_BULK;
|
||||
replica->replpreamble = sdscatprintf(sdsempty(), "$%lld\r\n", (unsigned long long)replica->repldbsize);
|
||||
replica->repl_data->repldbfd = repldbfd;
|
||||
replica->repl_data->repldboff = 0;
|
||||
replica->repl_data->repldbsize = buf.st_size;
|
||||
replica->repl_data->repl_state = REPLICA_STATE_SEND_BULK;
|
||||
replica->repl_data->replpreamble = sdscatprintf(sdsempty(), "$%lld\r\n", (unsigned long long)replica->repl_data->repldbsize);
|
||||
|
||||
/* When repl_state changes to REPLICA_STATE_SEND_BULK, we will release
|
||||
* the resources in freeClient. */
|
||||
@ -1917,13 +1987,14 @@ void replicationCreatePrimaryClientWithHandler(connection *conn, int dbid, Conne
|
||||
/* Allocate a private query buffer for the primary client instead of using the shared query buffer.
|
||||
* This is done because the primary's query buffer data needs to be preserved for my sub-replicas to use. */
|
||||
server.primary->querybuf = sdsempty();
|
||||
server.primary->reploff = server.primary_initial_offset;
|
||||
server.primary->read_reploff = server.primary->reploff;
|
||||
initClientReplicationData(server.primary);
|
||||
server.primary->repl_data->reploff = server.primary_initial_offset;
|
||||
server.primary->repl_data->read_reploff = server.primary->repl_data->reploff;
|
||||
server.primary->user = NULL; /* This client can do everything. */
|
||||
memcpy(server.primary->replid, server.primary_replid, sizeof(server.primary_replid));
|
||||
memcpy(server.primary->repl_data->replid, server.primary_replid, sizeof(server.primary_replid));
|
||||
/* If primary offset is set to -1, this primary is old and is not
|
||||
* PSYNC capable, so we flag it accordingly. */
|
||||
if (server.primary->reploff == -1) server.primary->flag.pre_psync = 1;
|
||||
if (server.primary->repl_data->reploff == -1) server.primary->flag.pre_psync = 1;
|
||||
if (dbid != -1) selectDb(server.primary, dbid);
|
||||
}
|
||||
|
||||
@ -2418,8 +2489,8 @@ void readSyncBulkPayload(connection *conn) {
|
||||
/* After a full resynchronization we use the replication ID and
|
||||
* offset of the primary. The secondary ID / offset are cleared since
|
||||
* we are starting a new history. */
|
||||
memcpy(server.replid, server.primary->replid, sizeof(server.replid));
|
||||
server.primary_repl_offset = server.primary->reploff;
|
||||
memcpy(server.replid, server.primary->repl_data->replid, sizeof(server.replid));
|
||||
server.primary_repl_offset = server.primary->repl_data->reploff;
|
||||
}
|
||||
clearReplicationId2();
|
||||
|
||||
@ -2914,7 +2985,7 @@ int streamReplDataBufToDb(client *c) {
|
||||
replDataBufBlock *o = listNodeValue(cur);
|
||||
used = o->used;
|
||||
c->querybuf = sdscatlen(c->querybuf, o->buf, used);
|
||||
c->read_reploff += used;
|
||||
c->repl_data->read_reploff += used;
|
||||
processInputBuffer(c);
|
||||
server.pending_repl_data.len -= used;
|
||||
offset += used;
|
||||
@ -3072,8 +3143,8 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) {
|
||||
"Trying a partial resynchronization using main channel (request %s:%s).",
|
||||
psync_replid, psync_offset);
|
||||
} else if (server.cached_primary) {
|
||||
psync_replid = server.cached_primary->replid;
|
||||
snprintf(psync_offset, sizeof(psync_offset), "%lld", server.cached_primary->reploff + 1);
|
||||
psync_replid = server.cached_primary->repl_data->replid;
|
||||
snprintf(psync_offset, sizeof(psync_offset), "%lld", server.cached_primary->repl_data->reploff + 1);
|
||||
serverLog(LL_NOTICE, "Trying a partial resynchronization (request %s:%s).", psync_replid, psync_offset);
|
||||
} else {
|
||||
serverLog(LL_NOTICE, "Partial resynchronization not possible (no cached primary)");
|
||||
@ -3168,18 +3239,18 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) {
|
||||
memcpy(new, start, CONFIG_RUN_ID_SIZE);
|
||||
new[CONFIG_RUN_ID_SIZE] = '\0';
|
||||
|
||||
if (strcmp(new, server.cached_primary->replid)) {
|
||||
if (strcmp(new, server.cached_primary->repl_data->replid)) {
|
||||
/* Primary ID changed. */
|
||||
serverLog(LL_NOTICE, "Primary replication ID changed to %s", new);
|
||||
|
||||
/* Set the old ID as our ID2, up to the current offset+1. */
|
||||
memcpy(server.replid2, server.cached_primary->replid, sizeof(server.replid2));
|
||||
memcpy(server.replid2, server.cached_primary->repl_data->replid, sizeof(server.replid2));
|
||||
server.second_replid_offset = server.primary_repl_offset + 1;
|
||||
|
||||
/* Update the cached primary ID and our own primary ID to the
|
||||
* new one. */
|
||||
memcpy(server.replid, new, sizeof(server.replid));
|
||||
memcpy(server.cached_primary->replid, new, sizeof(server.replid));
|
||||
memcpy(server.cached_primary->repl_data->replid, new, sizeof(server.replid));
|
||||
|
||||
/* Disconnect all the sub-replicas: they need to be notified. */
|
||||
disconnectReplicas();
|
||||
@ -4048,17 +4119,17 @@ void roleCommand(client *c) {
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
char ip[NET_IP_STR_LEN], *replica_addr = replica->replica_addr;
|
||||
char ip[NET_IP_STR_LEN], *replica_addr = replica->repl_data->replica_addr;
|
||||
|
||||
if (!replica_addr) {
|
||||
if (connAddrPeerName(replica->conn, ip, sizeof(ip), NULL) == -1) continue;
|
||||
replica_addr = ip;
|
||||
}
|
||||
if (replica->repl_state != REPLICA_STATE_ONLINE) continue;
|
||||
if (replica->repl_data->repl_state != REPLICA_STATE_ONLINE) continue;
|
||||
addReplyArrayLen(c, 3);
|
||||
addReplyBulkCString(c, replica_addr);
|
||||
addReplyBulkLongLong(c, replica->replica_listening_port);
|
||||
addReplyBulkLongLong(c, replica->repl_ack_off);
|
||||
addReplyBulkLongLong(c, replica->repl_data->replica_listening_port);
|
||||
addReplyBulkLongLong(c, replica->repl_data->repl_ack_off);
|
||||
replicas++;
|
||||
}
|
||||
setDeferredArrayLen(c, mbcount, replicas);
|
||||
@ -4082,7 +4153,7 @@ void roleCommand(client *c) {
|
||||
}
|
||||
}
|
||||
addReplyBulkCString(c, replica_state);
|
||||
addReplyLongLong(c, server.primary ? server.primary->reploff : -1);
|
||||
addReplyLongLong(c, server.primary ? server.primary->repl_data->reploff : -1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4098,7 +4169,7 @@ void replicationSendAck(void) {
|
||||
addReplyArrayLen(c, send_fack ? 5 : 3);
|
||||
addReplyBulkCString(c, "REPLCONF");
|
||||
addReplyBulkCString(c, "ACK");
|
||||
addReplyBulkLongLong(c, c->reploff);
|
||||
addReplyBulkLongLong(c, c->repl_data->reploff);
|
||||
if (send_fack) {
|
||||
addReplyBulkCString(c, "FACK");
|
||||
addReplyBulkLongLong(c, server.fsynced_reploff);
|
||||
@ -4146,8 +4217,8 @@ void replicationCachePrimary(client *c) {
|
||||
* pending outputs to the primary. */
|
||||
sdsclear(server.primary->querybuf);
|
||||
server.primary->qb_pos = 0;
|
||||
server.primary->repl_applied = 0;
|
||||
server.primary->read_reploff = server.primary->reploff;
|
||||
server.primary->repl_data->repl_applied = 0;
|
||||
server.primary->repl_data->read_reploff = server.primary->repl_data->reploff;
|
||||
if (c->flag.multi) discardTransaction(c);
|
||||
listEmpty(c->reply);
|
||||
c->sentlen = 0;
|
||||
@ -4191,7 +4262,7 @@ void replicationCachePrimaryUsingMyself(void) {
|
||||
"to synthesize a cached primary: I may be able to synchronize with "
|
||||
"the new primary with just a partial transfer.");
|
||||
|
||||
/* This will be used to populate the field server.primary->reploff
|
||||
/* This will be used to populate the field server.primary->repl_data->reploff
|
||||
* by replicationCreatePrimaryClient(). We'll later set the created
|
||||
* primary as server.cached_primary, so the replica will use such
|
||||
* offset for PSYNC. */
|
||||
@ -4202,7 +4273,7 @@ void replicationCachePrimaryUsingMyself(void) {
|
||||
replicationCreatePrimaryClient(NULL, -1);
|
||||
|
||||
/* Use our own ID / offset. */
|
||||
memcpy(server.primary->replid, server.replid, sizeof(server.replid));
|
||||
memcpy(server.primary->repl_data->replid, server.replid, sizeof(server.replid));
|
||||
|
||||
/* Set as cached primary. */
|
||||
unlinkClient(server.primary);
|
||||
@ -4283,11 +4354,11 @@ void replicationResurrectProvisionalPrimary(void) {
|
||||
/* Create a primary client, but do not initialize the read handler yet, as this replica still has a local buffer to
|
||||
* drain. */
|
||||
replicationCreatePrimaryClientWithHandler(server.repl_transfer_s, server.repl_provisional_primary.dbid, NULL);
|
||||
memcpy(server.primary->replid, server.repl_provisional_primary.replid, sizeof(server.repl_provisional_primary.replid));
|
||||
server.primary->reploff = server.repl_provisional_primary.reploff;
|
||||
server.primary->read_reploff = server.repl_provisional_primary.read_reploff;
|
||||
server.primary_repl_offset = server.primary->reploff;
|
||||
memcpy(server.replid, server.primary->replid, sizeof(server.primary->replid));
|
||||
memcpy(server.primary->repl_data->replid, server.repl_provisional_primary.replid, sizeof(server.repl_provisional_primary.replid));
|
||||
server.primary->repl_data->reploff = server.repl_provisional_primary.reploff;
|
||||
server.primary->repl_data->read_reploff = server.repl_provisional_primary.read_reploff;
|
||||
server.primary_repl_offset = server.primary->repl_data->reploff;
|
||||
memcpy(server.replid, server.primary->repl_data->replid, sizeof(server.primary->repl_data->replid));
|
||||
establishPrimaryConnection();
|
||||
}
|
||||
|
||||
@ -4306,9 +4377,9 @@ void refreshGoodReplicasCount(void) {
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
time_t lag = server.unixtime - replica->repl_ack_time;
|
||||
time_t lag = server.unixtime - replica->repl_data->repl_ack_time;
|
||||
|
||||
if (replica->repl_state == REPLICA_STATE_ONLINE && lag <= server.repl_min_replicas_max_lag) good++;
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_ONLINE && lag <= server.repl_min_replicas_max_lag) good++;
|
||||
}
|
||||
server.repl_good_replicas_count = good;
|
||||
}
|
||||
@ -4378,8 +4449,8 @@ int replicationCountAcksByOffset(long long offset) {
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
|
||||
if (replica->repl_state != REPLICA_STATE_ONLINE) continue;
|
||||
if (replica->repl_ack_off >= offset) count++;
|
||||
if (replica->repl_data->repl_state != REPLICA_STATE_ONLINE) continue;
|
||||
if (replica->repl_data->repl_ack_off >= offset) count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
@ -4395,8 +4466,8 @@ int replicationCountAOFAcksByOffset(long long offset) {
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
|
||||
if (replica->repl_state != REPLICA_STATE_ONLINE) continue;
|
||||
if (replica->repl_aof_off >= offset) count++;
|
||||
if (replica->repl_data->repl_state != REPLICA_STATE_ONLINE) continue;
|
||||
if (replica->repl_data->repl_aof_off >= offset) count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
@ -4482,9 +4553,9 @@ void waitaofCommand(client *c) {
|
||||
* waiting for replica acks. Never call it directly, call unblockClient()
|
||||
* instead. */
|
||||
void unblockClientWaitingReplicas(client *c) {
|
||||
serverAssert(c->bstate.client_waiting_acks_list_node);
|
||||
listDelNode(server.clients_waiting_acks, c->bstate.client_waiting_acks_list_node);
|
||||
c->bstate.client_waiting_acks_list_node = NULL;
|
||||
serverAssert(c->bstate->client_waiting_acks_list_node);
|
||||
listDelNode(server.clients_waiting_acks, c->bstate->client_waiting_acks_list_node);
|
||||
c->bstate->client_waiting_acks_list_node = NULL;
|
||||
updateStatsOnUnblock(c, 0, 0, 0);
|
||||
}
|
||||
|
||||
@ -4507,7 +4578,7 @@ void processClientsWaitingReplicas(void) {
|
||||
client *c = ln->value;
|
||||
int is_wait_aof = c->cmd->proc == waitaofCommand;
|
||||
|
||||
if (is_wait_aof && c->bstate.numlocal && !server.aof_enabled) {
|
||||
if (is_wait_aof && c->bstate->numlocal && !server.aof_enabled) {
|
||||
addReplyError(c, "WAITAOF cannot be used when numlocal is set but appendonly is disabled.");
|
||||
unblockClient(c, 1);
|
||||
continue;
|
||||
@ -4518,32 +4589,32 @@ void processClientsWaitingReplicas(void) {
|
||||
* may be unblocked without calling replicationCountAcksByOffset()
|
||||
* or calling replicationCountAOFAcksByOffset()
|
||||
* if the requested offset / replicas were equal or less. */
|
||||
if (!is_wait_aof && last_offset && last_offset >= c->bstate.reploffset &&
|
||||
last_numreplicas >= c->bstate.numreplicas) {
|
||||
if (!is_wait_aof && last_offset && last_offset >= c->bstate->reploffset &&
|
||||
last_numreplicas >= c->bstate->numreplicas) {
|
||||
numreplicas = last_numreplicas;
|
||||
} else if (is_wait_aof && last_aof_offset && last_aof_offset >= c->bstate.reploffset &&
|
||||
last_aof_numreplicas >= c->bstate.numreplicas) {
|
||||
} else if (is_wait_aof && last_aof_offset && last_aof_offset >= c->bstate->reploffset &&
|
||||
last_aof_numreplicas >= c->bstate->numreplicas) {
|
||||
numreplicas = last_aof_numreplicas;
|
||||
} else {
|
||||
numreplicas = is_wait_aof ? replicationCountAOFAcksByOffset(c->bstate.reploffset)
|
||||
: replicationCountAcksByOffset(c->bstate.reploffset);
|
||||
numreplicas = is_wait_aof ? replicationCountAOFAcksByOffset(c->bstate->reploffset)
|
||||
: replicationCountAcksByOffset(c->bstate->reploffset);
|
||||
|
||||
/* Check if the number of replicas is satisfied. */
|
||||
if (numreplicas < c->bstate.numreplicas) continue;
|
||||
if (numreplicas < c->bstate->numreplicas) continue;
|
||||
|
||||
if (is_wait_aof) {
|
||||
last_aof_offset = c->bstate.reploffset;
|
||||
last_aof_offset = c->bstate->reploffset;
|
||||
last_aof_numreplicas = numreplicas;
|
||||
} else {
|
||||
last_offset = c->bstate.reploffset;
|
||||
last_offset = c->bstate->reploffset;
|
||||
last_numreplicas = numreplicas;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if the local constraint of WAITAOF is served */
|
||||
if (is_wait_aof) {
|
||||
numlocal = server.fsynced_reploff >= c->bstate.reploffset;
|
||||
if (numlocal < c->bstate.numlocal) continue;
|
||||
numlocal = server.fsynced_reploff >= c->bstate->reploffset;
|
||||
if (numlocal < c->bstate->numlocal) continue;
|
||||
}
|
||||
|
||||
/* Reply before unblocking, because unblock client calls reqresAppendResponse */
|
||||
@ -4569,9 +4640,9 @@ long long replicationGetReplicaOffset(void) {
|
||||
|
||||
if (server.primary_host != NULL) {
|
||||
if (server.primary) {
|
||||
offset = server.primary->reploff;
|
||||
offset = server.primary->repl_data->reploff;
|
||||
} else if (server.cached_primary) {
|
||||
offset = server.cached_primary->reploff;
|
||||
offset = server.cached_primary->repl_data->reploff;
|
||||
}
|
||||
}
|
||||
/* offset may be -1 when the primary does not support it at all, however
|
||||
@ -4668,8 +4739,8 @@ void replicationCron(void) {
|
||||
client *replica = ln->value;
|
||||
|
||||
int is_presync =
|
||||
(replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_START ||
|
||||
(replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_END && server.rdb_child_type != RDB_CHILD_TYPE_SOCKET));
|
||||
(replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_START ||
|
||||
(replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END && server.rdb_child_type != RDB_CHILD_TYPE_SOCKET));
|
||||
|
||||
if (is_presync) {
|
||||
connWrite(replica->conn, "\n", 1);
|
||||
@ -4685,9 +4756,9 @@ void replicationCron(void) {
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
|
||||
if (replica->repl_state == REPLICA_STATE_ONLINE) {
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_ONLINE) {
|
||||
if (replica->flag.pre_psync) continue;
|
||||
if ((server.unixtime - replica->repl_ack_time) > server.repl_timeout) {
|
||||
if ((server.unixtime - replica->repl_data->repl_ack_time) > server.repl_timeout) {
|
||||
serverLog(LL_WARNING, "Disconnecting timedout replica (streaming sync): %s",
|
||||
replicationGetReplicaName(replica));
|
||||
freeClient(replica);
|
||||
@ -4697,10 +4768,10 @@ void replicationCron(void) {
|
||||
/* We consider disconnecting only diskless replicas because disk-based replicas aren't fed
|
||||
* by the fork child so if a disk-based replica is stuck it doesn't prevent the fork child
|
||||
* from terminating. */
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_END &&
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_END &&
|
||||
server.rdb_child_type == RDB_CHILD_TYPE_SOCKET) {
|
||||
if (replica->repl_last_partial_write != 0 &&
|
||||
(server.unixtime - replica->repl_last_partial_write) > server.repl_timeout) {
|
||||
if (replica->repl_data->repl_last_partial_write != 0 &&
|
||||
(server.unixtime - replica->repl_data->repl_last_partial_write) > server.repl_timeout) {
|
||||
serverLog(LL_WARNING, "Disconnecting timedout replica (full sync): %s",
|
||||
replicationGetReplicaName(replica));
|
||||
freeClient(replica);
|
||||
@ -4786,18 +4857,18 @@ int shouldStartChildReplication(int *mincapa_out, int *req_out) {
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = ln->value;
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) {
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) {
|
||||
if (first) {
|
||||
/* Get first replica's requirements */
|
||||
req = replica->replica_req;
|
||||
} else if (req != replica->replica_req) {
|
||||
req = replica->repl_data->replica_req;
|
||||
} else if (req != replica->repl_data->replica_req) {
|
||||
/* Skip replicas that don't match */
|
||||
continue;
|
||||
}
|
||||
idle = server.unixtime - replica->last_interaction;
|
||||
if (idle > max_idle) max_idle = idle;
|
||||
replicas_waiting++;
|
||||
mincapa = first ? replica->replica_capa : (mincapa & replica->replica_capa);
|
||||
mincapa = first ? replica->repl_data->replica_capa : (mincapa & replica->repl_data->replica_capa);
|
||||
first = 0;
|
||||
}
|
||||
}
|
||||
@ -4836,14 +4907,14 @@ static client *findReplica(char *host, int port) {
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
replica = ln->value;
|
||||
char ip[NET_IP_STR_LEN], *replicaip = replica->replica_addr;
|
||||
char ip[NET_IP_STR_LEN], *replicaip = replica->repl_data->replica_addr;
|
||||
|
||||
if (!replicaip) {
|
||||
if (connAddrPeerName(replica->conn, ip, sizeof(ip), NULL) == -1) continue;
|
||||
replicaip = ip;
|
||||
}
|
||||
|
||||
if (!strcasecmp(host, replicaip) && (port == replica->replica_listening_port)) return replica;
|
||||
if (!strcasecmp(host, replicaip) && (port == replica->repl_data->replica_listening_port)) return replica;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@ -4989,7 +5060,7 @@ void failoverCommand(client *c) {
|
||||
}
|
||||
|
||||
/* Check if requested replica is online */
|
||||
if (replica->repl_state != REPLICA_STATE_ONLINE) {
|
||||
if (replica->repl_data->repl_state != REPLICA_STATE_ONLINE) {
|
||||
addReplyError(c, "FAILOVER target replica is not online.");
|
||||
return;
|
||||
}
|
||||
@ -5052,8 +5123,8 @@ void updateFailoverStatus(void) {
|
||||
/* Find any replica that has matched our repl_offset */
|
||||
while ((ln = listNext(&li))) {
|
||||
replica = ln->value;
|
||||
if (replica->repl_ack_off == server.primary_repl_offset) {
|
||||
char ip[NET_IP_STR_LEN], *replicaaddr = replica->replica_addr;
|
||||
if (replica->repl_data->repl_ack_off == server.primary_repl_offset) {
|
||||
char ip[NET_IP_STR_LEN], *replicaaddr = replica->repl_data->replica_addr;
|
||||
|
||||
if (!replicaaddr) {
|
||||
if (connAddrPeerName(replica->conn, ip, sizeof(ip), NULL) == -1) continue;
|
||||
@ -5062,14 +5133,14 @@ void updateFailoverStatus(void) {
|
||||
|
||||
/* We are now failing over to this specific node */
|
||||
server.target_replica_host = zstrdup(replicaaddr);
|
||||
server.target_replica_port = replica->replica_listening_port;
|
||||
server.target_replica_port = replica->repl_data->replica_listening_port;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* We've found a replica that is caught up */
|
||||
if (replica && (replica->repl_ack_off == server.primary_repl_offset)) {
|
||||
if (replica && (replica->repl_data->repl_ack_off == server.primary_repl_offset)) {
|
||||
server.failover_state = FAILOVER_IN_PROGRESS;
|
||||
serverLog(LL_NOTICE, "Failover target %s:%d is synced, failing over.", server.target_replica_host,
|
||||
server.target_replica_port);
|
||||
|
@ -228,6 +228,7 @@ int scriptPrepareForRun(scriptRunCtx *run_ctx,
|
||||
/* If we are in MULTI context, flag Lua client as CLIENT_MULTI. */
|
||||
if (curr_client->flag.multi) {
|
||||
script_client->flag.multi = 1;
|
||||
initClientMultiState(script_client);
|
||||
}
|
||||
|
||||
run_ctx->start_time = getMonotonicUs();
|
||||
|
65
src/server.c
65
src/server.c
@ -4027,21 +4027,20 @@ int processCommand(client *c) {
|
||||
|
||||
uint64_t cmd_flags = getCommandFlags(c);
|
||||
|
||||
int is_read_command =
|
||||
(cmd_flags & CMD_READONLY) || (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_READONLY));
|
||||
int is_write_command =
|
||||
(cmd_flags & CMD_WRITE) || (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE));
|
||||
int is_denyoom_command =
|
||||
(cmd_flags & CMD_DENYOOM) || (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_DENYOOM));
|
||||
int is_denystale_command =
|
||||
!(cmd_flags & CMD_STALE) || (c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_STALE));
|
||||
int is_denyloading_command =
|
||||
!(cmd_flags & CMD_LOADING) || (c->cmd->proc == execCommand && (c->mstate.cmd_inv_flags & CMD_LOADING));
|
||||
int is_may_replicate_command =
|
||||
(cmd_flags & (CMD_WRITE | CMD_MAY_REPLICATE)) ||
|
||||
(c->cmd->proc == execCommand && (c->mstate.cmd_flags & (CMD_WRITE | CMD_MAY_REPLICATE)));
|
||||
int is_deny_async_loading_command = (cmd_flags & CMD_NO_ASYNC_LOADING) ||
|
||||
(c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_NO_ASYNC_LOADING));
|
||||
int is_exec = (c->mstate && c->cmd->proc == execCommand);
|
||||
int ms_flags = is_exec ? c->mstate->cmd_flags : 0;
|
||||
int ms_inv_flags = is_exec ? c->mstate->cmd_inv_flags : 0;
|
||||
int combined_flags = cmd_flags | ms_flags;
|
||||
int combined_inv_flags = (~cmd_flags | ms_inv_flags);
|
||||
|
||||
int is_read_command = (combined_flags & CMD_READONLY);
|
||||
int is_write_command = (combined_flags & CMD_WRITE);
|
||||
int is_denyoom_command = (combined_flags & CMD_DENYOOM);
|
||||
int is_denystale_command = (combined_inv_flags & CMD_STALE);
|
||||
int is_denyloading_command = (combined_inv_flags & CMD_LOADING);
|
||||
int is_may_replicate_command = (combined_flags & (CMD_WRITE | CMD_MAY_REPLICATE));
|
||||
int is_deny_async_loading_command = (combined_flags & CMD_NO_ASYNC_LOADING);
|
||||
|
||||
const int obey_client = mustObeyClient(c);
|
||||
|
||||
if (authRequired(c)) {
|
||||
@ -4414,7 +4413,7 @@ int isReadyToShutdown(void) {
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
client *replica = listNodeValue(ln);
|
||||
if (replica->repl_ack_off != server.primary_repl_offset) return 0;
|
||||
if (replica->repl_data->repl_ack_off != server.primary_repl_offset) return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
@ -4460,12 +4459,12 @@ int finishShutdown(void) {
|
||||
while ((replicas_list_node = listNext(&replicas_iter)) != NULL) {
|
||||
client *replica = listNodeValue(replicas_list_node);
|
||||
num_replicas++;
|
||||
if (replica->repl_ack_off != server.primary_repl_offset) {
|
||||
if (replica->repl_data->repl_ack_off != server.primary_repl_offset) {
|
||||
num_lagging_replicas++;
|
||||
long lag = replica->repl_state == REPLICA_STATE_ONLINE ? time(NULL) - replica->repl_ack_time : 0;
|
||||
long lag = replica->repl_data->repl_state == REPLICA_STATE_ONLINE ? time(NULL) - replica->repl_data->repl_ack_time : 0;
|
||||
serverLog(LL_NOTICE, "Lagging replica %s reported offset %lld behind master, lag=%ld, state=%s.",
|
||||
replicationGetReplicaName(replica), server.primary_repl_offset - replica->repl_ack_off, lag,
|
||||
replstateToString(replica->repl_state));
|
||||
replicationGetReplicaName(replica), server.primary_repl_offset - replica->repl_data->repl_ack_off, lag,
|
||||
replstateToString(replica->repl_data->repl_state));
|
||||
}
|
||||
}
|
||||
if (num_replicas > 0) {
|
||||
@ -5946,11 +5945,11 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
|
||||
long long replica_read_repl_offset = 1;
|
||||
|
||||
if (server.primary) {
|
||||
replica_repl_offset = server.primary->reploff;
|
||||
replica_read_repl_offset = server.primary->read_reploff;
|
||||
replica_repl_offset = server.primary->repl_data->reploff;
|
||||
replica_read_repl_offset = server.primary->repl_data->read_reploff;
|
||||
} else if (server.cached_primary) {
|
||||
replica_repl_offset = server.cached_primary->reploff;
|
||||
replica_read_repl_offset = server.cached_primary->read_reploff;
|
||||
replica_repl_offset = server.cached_primary->repl_data->reploff;
|
||||
replica_read_repl_offset = server.cached_primary->repl_data->read_reploff;
|
||||
}
|
||||
|
||||
info = sdscatprintf(
|
||||
@ -6009,7 +6008,7 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *replica = listNodeValue(ln);
|
||||
char ip[NET_IP_STR_LEN], *replica_ip = replica->replica_addr;
|
||||
char ip[NET_IP_STR_LEN], *replica_ip = replica->repl_data->replica_addr;
|
||||
int port;
|
||||
long lag = 0;
|
||||
|
||||
@ -6017,18 +6016,18 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
|
||||
if (connAddrPeerName(replica->conn, ip, sizeof(ip), &port) == -1) continue;
|
||||
replica_ip = ip;
|
||||
}
|
||||
const char *state = replstateToString(replica->repl_state);
|
||||
const char *state = replstateToString(replica->repl_data->repl_state);
|
||||
if (state[0] == '\0') continue;
|
||||
if (replica->repl_state == REPLICA_STATE_ONLINE) lag = time(NULL) - replica->repl_ack_time;
|
||||
if (replica->repl_data->repl_state == REPLICA_STATE_ONLINE) lag = time(NULL) - replica->repl_data->repl_ack_time;
|
||||
|
||||
info = sdscatprintf(info,
|
||||
"slave%d:ip=%s,port=%d,state=%s,"
|
||||
"offset=%lld,lag=%ld,type=%s\r\n",
|
||||
replica_id, replica_ip, replica->replica_listening_port, state,
|
||||
replica->repl_ack_off, lag,
|
||||
replica->flag.repl_rdb_channel ? "rdb-channel"
|
||||
: replica->repl_state == REPLICA_STATE_BG_RDB_LOAD ? "main-channel"
|
||||
: "replica");
|
||||
replica_id, replica_ip, replica->repl_data->replica_listening_port, state,
|
||||
replica->repl_data->repl_ack_off, lag,
|
||||
replica->flag.repl_rdb_channel ? "rdb-channel"
|
||||
: replica->repl_data->repl_state == REPLICA_STATE_BG_RDB_LOAD ? "main-channel"
|
||||
: "replica");
|
||||
replica_id++;
|
||||
}
|
||||
}
|
||||
@ -6195,6 +6194,8 @@ void monitorCommand(client *c) {
|
||||
/* ignore MONITOR if already replica or in monitor mode */
|
||||
if (c->flag.replica) return;
|
||||
|
||||
initClientReplicationData(c);
|
||||
|
||||
c->flag.replica = 1;
|
||||
c->flag.monitor = 1;
|
||||
listAddNodeTail(server.monitors, c);
|
||||
|
260
src/server.h
260
src/server.h
@ -704,7 +704,7 @@ typedef enum {
|
||||
typedef struct ValkeyModuleType moduleType;
|
||||
|
||||
/* Macro to check if the client is in the middle of module based authentication. */
|
||||
#define clientHasModuleAuthInProgress(c) ((c)->module_auth_ctx != NULL)
|
||||
#define clientHasModuleAuthInProgress(c) (((c)->module_data && (c)->module_data->module_auth_ctx != NULL))
|
||||
|
||||
/* Objects encoding. Some kind of objects like Strings and Hashes can be
|
||||
* internally represented in multiple ways. The 'encoding' field of the object
|
||||
@ -850,6 +850,7 @@ typedef struct multiState {
|
||||
certain flag. */
|
||||
size_t argv_len_sums; /* mem used by all commands arguments */
|
||||
int alloc_count; /* total number of multiCmd struct memory reserved. */
|
||||
list watched_keys;
|
||||
} multiState;
|
||||
|
||||
/* This structure holds the blocking operation state for a client.
|
||||
@ -1090,93 +1091,52 @@ typedef struct ClientFlags {
|
||||
uint64_t reserved : 4; /* Reserved for future use */
|
||||
} ClientFlags;
|
||||
|
||||
typedef struct client {
|
||||
uint64_t id; /* Client incremental unique ID. */
|
||||
union {
|
||||
uint64_t raw_flag;
|
||||
struct ClientFlags flag;
|
||||
};
|
||||
connection *conn;
|
||||
int resp; /* RESP protocol version. Can be 2 or 3. */
|
||||
uint32_t capa; /* Client capabilities: CLIENT_CAPA* macros. */
|
||||
serverDb *db; /* Pointer to currently SELECTed DB. */
|
||||
robj *name; /* As set by CLIENT SETNAME. */
|
||||
robj *lib_name; /* The client library name as set by CLIENT SETINFO. */
|
||||
robj *lib_ver; /* The client library version as set by CLIENT SETINFO. */
|
||||
sds querybuf; /* Buffer we use to accumulate client queries. */
|
||||
size_t qb_pos; /* The position we have read in querybuf. */
|
||||
size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */
|
||||
int argc; /* Num of arguments of current command. */
|
||||
robj **argv; /* Arguments of current command. */
|
||||
int argv_len; /* Size of argv array (may be more than argc) */
|
||||
int original_argc; /* Num of arguments of original command if arguments were rewritten. */
|
||||
robj **original_argv; /* Arguments of original command if arguments were rewritten. */
|
||||
size_t argv_len_sum; /* Sum of lengths of objects in argv list. */
|
||||
volatile uint8_t io_read_state; /* Indicate the IO read state of the client */
|
||||
volatile uint8_t io_write_state; /* Indicate the IO write state of the client */
|
||||
uint8_t cur_tid; /* ID of IO thread currently performing IO for this client */
|
||||
int nread; /* Number of bytes of the last read. */
|
||||
int nwritten; /* Number of bytes of the last write. */
|
||||
int read_flags; /* Client Read flags - used to communicate the client read state. */
|
||||
uint16_t write_flags; /* Client Write flags - used to communicate the client write state. */
|
||||
struct serverCommand *cmd, *lastcmd; /* Last command executed. */
|
||||
struct serverCommand *realcmd; /* The original command that was executed by the client,
|
||||
Used to update error stats in case the c->cmd was modified
|
||||
during the command invocation (like on GEOADD for example). */
|
||||
struct serverCommand *io_parsed_cmd; /* The command that was parsed by the IO thread. */
|
||||
user *user; /* User associated with this connection. If the
|
||||
user is set to NULL the connection can do
|
||||
anything (admin). */
|
||||
int reqtype; /* Request protocol type: PROTO_REQ_* */
|
||||
int multibulklen; /* Number of multi bulk arguments left to read. */
|
||||
long bulklen; /* Length of bulk argument in multi bulk request. */
|
||||
list *reply; /* List of reply objects to send to the client. */
|
||||
listNode *io_last_reply_block; /* Last client reply block when sent to IO thread */
|
||||
unsigned long long reply_bytes; /* Tot bytes of objects in reply list. */
|
||||
list *deferred_reply_errors; /* Used for module thread safe contexts. */
|
||||
size_t sentlen; /* Amount of bytes already sent in the current
|
||||
buffer or object being sent. */
|
||||
time_t ctime; /* Client creation time. */
|
||||
long duration; /* Current command duration. Used for measuring latency of blocking/non-blocking cmds */
|
||||
int slot; /* The slot the client is executing against. Set to -1 if no slot is being used */
|
||||
dictEntry *cur_script; /* Cached pointer to the dictEntry of the script being executed. */
|
||||
time_t last_interaction; /* Time of the last interaction, used for timeout */
|
||||
time_t obuf_soft_limit_reached_time;
|
||||
int repl_state; /* Replication state if this is a replica. */
|
||||
int repl_start_cmd_stream_on_ack; /* Install replica write handler on first ACK. */
|
||||
int repldbfd; /* Replication DB file descriptor. */
|
||||
off_t repldboff; /* Replication DB file offset. */
|
||||
off_t repldbsize; /* Replication DB file size. */
|
||||
sds replpreamble; /* Replication DB preamble. */
|
||||
long long read_reploff; /* Read replication offset if this is a primary. */
|
||||
long long reploff; /* Applied replication offset if this is a primary. */
|
||||
long long repl_applied; /* Applied replication data count in querybuf, if this is a replica. */
|
||||
long long repl_ack_off; /* Replication ack offset, if this is a replica. */
|
||||
long long repl_aof_off; /* Replication AOF fsync ack offset, if this is a replica. */
|
||||
long long repl_ack_time; /* Replication ack time, if this is a replica. */
|
||||
long long repl_last_partial_write; /* The last time the server did a partial write from the RDB child pipe to this
|
||||
replica */
|
||||
long long psync_initial_offset; /* FULLRESYNC reply offset other replicas
|
||||
copying this replica output buffer
|
||||
should use. */
|
||||
char replid[CONFIG_RUN_ID_SIZE + 1]; /* primary replication ID (if primary). */
|
||||
int replica_listening_port; /* As configured with: REPLCONF listening-port */
|
||||
char *replica_addr; /* Optionally given by REPLCONF ip-address */
|
||||
int replica_version; /* Version on the form 0xMMmmpp. */
|
||||
short replica_capa; /* Replica capabilities: REPLICA_CAPA_* bitwise OR. */
|
||||
short replica_req; /* Replica requirements: REPLICA_REQ_* */
|
||||
uint64_t associated_rdb_client_id; /* The client id of this replica's rdb connection */
|
||||
time_t rdb_client_disconnect_time; /* Time of the first freeClient call on this client. Used for delaying free. */
|
||||
multiState mstate; /* MULTI/EXEC state */
|
||||
blockingState bstate; /* blocking state */
|
||||
long long woff; /* Last write global replication offset. */
|
||||
list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
|
||||
dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
|
||||
dict *pubsub_patterns; /* patterns a client is interested in (PSUBSCRIBE) */
|
||||
dict *pubsubshard_channels; /* shard level channels a client is interested in (SSUBSCRIBE) */
|
||||
sds peerid; /* Cached peer ID. */
|
||||
sds sockname; /* Cached connection target address. */
|
||||
listNode *client_list_node; /* list node in client list */
|
||||
typedef struct ClientPubSubData {
|
||||
dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
|
||||
dict *pubsub_patterns; /* patterns a client is interested in (PSUBSCRIBE) */
|
||||
dict *pubsubshard_channels; /* shard level channels a client is interested in (SSUBSCRIBE) */
|
||||
/* If this client is in tracking mode and this field is non zero,
|
||||
* invalidation messages for keys fetched by this client will be sent to
|
||||
* the specified client ID. */
|
||||
uint64_t client_tracking_redirection;
|
||||
rax *client_tracking_prefixes; /* A dictionary of prefixes we are already
|
||||
subscribed to in BCAST mode, in the
|
||||
context of client side caching. */
|
||||
} ClientPubSubData;
|
||||
|
||||
typedef struct ClientReplicationData {
|
||||
int repl_state; /* Replication state if this is a replica. */
|
||||
int repl_start_cmd_stream_on_ack; /* Install replica write handler on first ACK. */
|
||||
int repldbfd; /* Replication DB file descriptor. */
|
||||
off_t repldboff; /* Replication DB file offset. */
|
||||
off_t repldbsize; /* Replication DB file size. */
|
||||
sds replpreamble; /* Replication DB preamble. */
|
||||
long long read_reploff; /* Read replication offset if this is a primary. */
|
||||
long long reploff; /* Applied replication offset if this is a primary. */
|
||||
long long repl_applied; /* Applied replication data count in querybuf, if this is a replica. */
|
||||
long long repl_ack_off; /* Replication ack offset, if this is a replica. */
|
||||
long long repl_aof_off; /* Replication AOF fsync ack offset, if this is a replica. */
|
||||
long long repl_ack_time; /* Replication ack time, if this is a replica. */
|
||||
long long repl_last_partial_write; /* The last time the server did a partial write from the RDB child pipe to this
|
||||
replica */
|
||||
long long psync_initial_offset; /* FULLRESYNC reply offset other replicas
|
||||
copying this replica output buffer
|
||||
should use. */
|
||||
char replid[CONFIG_RUN_ID_SIZE + 1]; /* primary replication ID (if primary). */
|
||||
int replica_listening_port; /* As configured with: REPLCONF listening-port */
|
||||
char *replica_addr; /* Optionally given by REPLCONF ip-address */
|
||||
int replica_version; /* Version on the form 0xMMmmpp. */
|
||||
short replica_capa; /* Replica capabilities: REPLICA_CAPA_* bitwise OR. */
|
||||
short replica_req; /* Replica requirements: REPLICA_REQ_* */
|
||||
uint64_t associated_rdb_client_id; /* The client id of this replica's rdb connection */
|
||||
time_t rdb_client_disconnect_time; /* Time of the first freeClient call on this client. Used for delaying free. */
|
||||
listNode *ref_repl_buf_node; /* Referenced node of replication buffer blocks,
|
||||
see the definition of replBufBlock. */
|
||||
size_t ref_block_pos; /* Access position of referenced buffer block,
|
||||
i.e. the next offset to send. */
|
||||
} ClientReplicationData;
|
||||
|
||||
typedef struct ClientModuleData {
|
||||
void *module_blocked_client; /* Pointer to the ValkeyModuleBlockedClient associated with this
|
||||
* client. This is set in case of module authentication before the
|
||||
* unblocked client is reprocessed to handle reply callbacks. */
|
||||
@ -1192,50 +1152,103 @@ typedef struct client {
|
||||
void *auth_module; /* The module that owns the callback, which is used
|
||||
* to disconnect the client if the module is
|
||||
* unloaded for cleanup. Opaque for the Server Core.*/
|
||||
} ClientModuleData;
|
||||
|
||||
/* If this client is in tracking mode and this field is non zero,
|
||||
* invalidation messages for keys fetched by this client will be sent to
|
||||
* the specified client ID. */
|
||||
uint64_t client_tracking_redirection;
|
||||
rax *client_tracking_prefixes; /* A dictionary of prefixes we are already
|
||||
subscribed to in BCAST mode, in the
|
||||
context of client side caching. */
|
||||
typedef struct client {
|
||||
/* Basic client information and connection. */
|
||||
uint64_t id; /* Client incremental unique ID. */
|
||||
connection *conn;
|
||||
/* Input buffer and command parsing fields */
|
||||
sds querybuf; /* Buffer we use to accumulate client queries. */
|
||||
size_t qb_pos; /* The position we have read in querybuf. */
|
||||
robj **argv; /* Arguments of current command. */
|
||||
int argc; /* Num of arguments of current command. */
|
||||
int argv_len; /* Size of argv array (may be more than argc) */
|
||||
size_t argv_len_sum; /* Sum of lengths of objects in argv list. */
|
||||
int reqtype; /* Request protocol type: PROTO_REQ_* */
|
||||
int multibulklen; /* Number of multi bulk arguments left to read. */
|
||||
long bulklen; /* Length of bulk argument in multi bulk request. */
|
||||
long long woff; /* Last write global replication offset. */
|
||||
/* Command execution state and command information */
|
||||
struct serverCommand *cmd; /* Current command. */
|
||||
struct serverCommand *lastcmd; /* Last command executed. */
|
||||
struct serverCommand *realcmd; /* The original command that was executed by the client */
|
||||
struct serverCommand *io_parsed_cmd; /* The command that was parsed by the IO thread. */
|
||||
time_t last_interaction; /* Time of the last interaction, used for timeout */
|
||||
serverDb *db; /* Pointer to currently SELECTed DB. */
|
||||
/* Client state structs. */
|
||||
ClientPubSubData *pubsub_data; /* Required for: pubsub commands and tracking. lazily initialized when first needed */
|
||||
ClientReplicationData *repl_data; /* Required for Replication operations. lazily initialized when first needed */
|
||||
ClientModuleData *module_data; /* Required for Module operations. lazily initialized when first needed */
|
||||
multiState *mstate; /* MULTI/EXEC state, lazily initialized when first needed */
|
||||
blockingState *bstate; /* Blocking state, lazily initialized when first needed */
|
||||
/* Output buffer and reply handling */
|
||||
long duration; /* Current command duration. Used for measuring latency of blocking/non-blocking cmds */
|
||||
char *buf; /* Output buffer */
|
||||
size_t buf_usable_size; /* Usable size of buffer. */
|
||||
list *reply; /* List of reply objects to send to the client. */
|
||||
listNode *io_last_reply_block; /* Last client reply block when sent to IO thread */
|
||||
size_t io_last_bufpos; /* The client's bufpos at the time it was sent to the IO thread */
|
||||
unsigned long long reply_bytes; /* Tot bytes of objects in reply list. */
|
||||
size_t sentlen; /* Amount of bytes already sent in the current buffer or object being sent. */
|
||||
listNode clients_pending_write_node; /* list node in clients_pending_write or in clients_pending_io_write list */
|
||||
int bufpos;
|
||||
int original_argc; /* Num of arguments of original command if arguments were rewritten. */
|
||||
robj **original_argv; /* Arguments of original command if arguments were rewritten. */
|
||||
/* Client flags and state indicators */
|
||||
union {
|
||||
uint64_t raw_flag;
|
||||
struct ClientFlags flag;
|
||||
};
|
||||
uint16_t write_flags; /* Client Write flags - used to communicate the client write state. */
|
||||
volatile uint8_t io_read_state; /* Indicate the IO read state of the client */
|
||||
volatile uint8_t io_write_state; /* Indicate the IO write state of the client */
|
||||
uint8_t resp; /* RESP protocol version. Can be 2 or 3. */
|
||||
uint8_t cur_tid; /* ID of IO thread currently performing IO for this client */
|
||||
/* In updateClientMemoryUsage() we track the memory usage of
|
||||
* each client and add it to the sum of all the clients of a given type,
|
||||
* however we need to remember what was the old contribution of each
|
||||
* client, and in which category the client was, in order to remove it
|
||||
* before adding it the new value. */
|
||||
uint8_t last_memory_type;
|
||||
uint8_t capa; /* Client capabilities: CLIENT_CAPA* macros. */
|
||||
listNode pending_read_list_node; /* IO thread only ?*/
|
||||
/* Statistics and metrics */
|
||||
unsigned long long net_input_bytes; /* Total network input bytes read from this client. */
|
||||
unsigned long long net_input_bytes_curr_cmd; /* Total network input bytes read for the* execution of this client's current command. */
|
||||
unsigned long long net_output_bytes; /* Total network output bytes sent to this client. */
|
||||
unsigned long long commands_processed; /* Total count of commands this client executed. */
|
||||
unsigned long long net_output_bytes_curr_cmd; /* Total network output bytes sent to this client, by the current command. */
|
||||
size_t buf_peak; /* Peak used size of buffer in last 5 sec interval. */
|
||||
int nwritten; /* Number of bytes of the last write. */
|
||||
int nread; /* Number of bytes of the last read. */
|
||||
int read_flags; /* Client Read flags - used to communicate the client read state. */
|
||||
int slot; /* The slot the client is executing against. Set to -1 if no slot is being used */
|
||||
listNode *mem_usage_bucket_node;
|
||||
clientMemUsageBucket *mem_usage_bucket;
|
||||
/* In updateClientMemoryUsage() we track the memory usage of
|
||||
* each client and add it to the sum of all the clients of a given type,
|
||||
* however we need to remember what was the old contribution of each
|
||||
* client, and in which category the client was, in order to remove it
|
||||
* before adding it the new value. */
|
||||
size_t last_memory_usage;
|
||||
int last_memory_type;
|
||||
|
||||
listNode *mem_usage_bucket_node;
|
||||
clientMemUsageBucket *mem_usage_bucket;
|
||||
|
||||
listNode *ref_repl_buf_node; /* Referenced node of replication buffer blocks,
|
||||
* see the definition of replBufBlock. */
|
||||
size_t ref_block_pos; /* Access position of referenced buffer block,
|
||||
* i.e. the next offset to send. */
|
||||
|
||||
/* list node in clients_pending_write or in clients_pending_io_write list */
|
||||
listNode clients_pending_write_node;
|
||||
listNode pending_read_list_node; /* list node in clients_pending_io_read list */
|
||||
/* Response buffer */
|
||||
size_t buf_peak; /* Peak used size of buffer in last 5 sec interval. */
|
||||
/* Fields after this point are less frequently used */
|
||||
listNode *client_list_node; /* list node in client list */
|
||||
mstime_t buf_peak_last_reset_time; /* keeps the last time the buffer peak value was reset */
|
||||
int bufpos;
|
||||
size_t io_last_bufpos; /* The client's bufpos at the time it was sent to the IO thread */
|
||||
size_t buf_usable_size; /* Usable size of buffer. */
|
||||
char *buf;
|
||||
size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */
|
||||
dictEntry *cur_script; /* Cached pointer to the dictEntry of the script being executed. */
|
||||
user *user; /* User associated with this connection */
|
||||
time_t obuf_soft_limit_reached_time;
|
||||
list *deferred_reply_errors; /* Used for module thread safe contexts. */
|
||||
robj *name; /* As set by CLIENT SETNAME. */
|
||||
robj *lib_name; /* The client library name as set by CLIENT SETINFO. */
|
||||
robj *lib_ver; /* The client library version as set by CLIENT SETINFO. */
|
||||
sds peerid; /* Cached peer ID. */
|
||||
sds sockname; /* Cached connection target address. */
|
||||
time_t ctime; /* Client creation time. */
|
||||
#ifdef LOG_REQ_RES
|
||||
clientReqResInfo reqres;
|
||||
#endif
|
||||
unsigned long long net_input_bytes; /* Total network input bytes read from this client. */
|
||||
unsigned long long net_input_bytes_curr_cmd; /* Total network input bytes read for the
|
||||
* execution of this client's current command. */
|
||||
unsigned long long net_output_bytes; /* Total network output bytes sent to this client. */
|
||||
unsigned long long commands_processed; /* Total count of commands this client executed. */
|
||||
unsigned long long
|
||||
net_output_bytes_curr_cmd; /* Total network output bytes sent to this client, by the current command. */
|
||||
} client;
|
||||
|
||||
/* When a command generates a lot of discrete elements to the client output buffer, it is much faster to
|
||||
@ -2919,6 +2932,8 @@ void abortFailover(const char *err);
|
||||
const char *getFailoverStateString(void);
|
||||
int sendCurrentOffsetToReplica(client *replica);
|
||||
void addRdbReplicaToPsyncWait(client *replica);
|
||||
void initClientReplicationData(client *c);
|
||||
void freeClientReplicationData(client *c);
|
||||
|
||||
/* Generic persistence functions */
|
||||
void startLoadingFile(size_t size, char *filename, int rdbflags);
|
||||
@ -3257,6 +3272,8 @@ void unmarkClientAsPubSub(client *c);
|
||||
int pubsubTotalSubscriptions(void);
|
||||
dict *getClientPubSubChannels(client *c);
|
||||
dict *getClientPubSubShardChannels(client *c);
|
||||
void initClientPubSubData(client *c);
|
||||
void freeClientPubSubData(client *c);
|
||||
|
||||
/* Keyspace events notification */
|
||||
void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid);
|
||||
@ -3499,6 +3516,7 @@ typedef struct luaScript {
|
||||
/* Blocked clients API */
|
||||
void processUnblockedClients(void);
|
||||
void initClientBlockingState(client *c);
|
||||
void freeClientBlockingState(client *c);
|
||||
void blockClient(client *c, int btype);
|
||||
void unblockClient(client *c, int queue_for_reprocessing);
|
||||
void unblockClientOnTimeout(client *c);
|
||||
|
@ -37,7 +37,7 @@
|
||||
* not blocked right now). If so send a reply, unblock it, and return 1.
|
||||
* Otherwise 0 is returned and no operation is performed. */
|
||||
int checkBlockedClientTimeout(client *c, mstime_t now) {
|
||||
if (c->flag.blocked && c->bstate.timeout != 0 && c->bstate.timeout < now) {
|
||||
if (c->flag.blocked && c->bstate->timeout != 0 && c->bstate->timeout < now) {
|
||||
/* Handle blocking operation specific timeout. */
|
||||
unblockClientOnTimeout(c);
|
||||
return 1;
|
||||
@ -108,8 +108,8 @@ void decodeTimeoutKey(unsigned char *buf, uint64_t *toptr, client **cptr) {
|
||||
* to handle blocked clients timeouts. The client is not added to the list
|
||||
* if its timeout is zero (block forever). */
|
||||
void addClientToTimeoutTable(client *c) {
|
||||
if (c->bstate.timeout == 0) return;
|
||||
uint64_t timeout = c->bstate.timeout;
|
||||
if (c->bstate->timeout == 0) return;
|
||||
uint64_t timeout = c->bstate->timeout;
|
||||
unsigned char buf[CLIENT_ST_KEYLEN];
|
||||
encodeTimeoutKey(buf, timeout, c);
|
||||
if (raxTryInsert(server.clients_timeout_table, buf, sizeof(buf), NULL, NULL)) c->flag.in_to_table = 1;
|
||||
@ -120,7 +120,7 @@ void addClientToTimeoutTable(client *c) {
|
||||
void removeClientFromTimeoutTable(client *c) {
|
||||
if (!c->flag.in_to_table) return;
|
||||
c->flag.in_to_table = 0;
|
||||
uint64_t timeout = c->bstate.timeout;
|
||||
uint64_t timeout = c->bstate->timeout;
|
||||
unsigned char buf[CLIENT_ST_KEYLEN];
|
||||
encodeTimeoutKey(buf, timeout, c);
|
||||
raxRemove(server.clients_timeout_table, buf, sizeof(buf), NULL);
|
||||
|
@ -69,7 +69,7 @@ void disableTracking(client *c) {
|
||||
* from all the prefixes it is registered to. */
|
||||
if (c->flag.tracking_bcast) {
|
||||
raxIterator ri;
|
||||
raxStart(&ri, c->client_tracking_prefixes);
|
||||
raxStart(&ri, c->pubsub_data->client_tracking_prefixes);
|
||||
raxSeek(&ri, "^", NULL, 0);
|
||||
while (raxNext(&ri)) {
|
||||
void *result;
|
||||
@ -87,8 +87,8 @@ void disableTracking(client *c) {
|
||||
}
|
||||
}
|
||||
raxStop(&ri);
|
||||
raxFree(c->client_tracking_prefixes);
|
||||
c->client_tracking_prefixes = NULL;
|
||||
raxFree(c->pubsub_data->client_tracking_prefixes);
|
||||
c->pubsub_data->client_tracking_prefixes = NULL;
|
||||
}
|
||||
|
||||
/* Clear flags and adjust the count. */
|
||||
@ -117,9 +117,9 @@ static int stringCheckPrefix(unsigned char *s1, size_t s1_len, unsigned char *s2
|
||||
int checkPrefixCollisionsOrReply(client *c, robj **prefixes, size_t numprefix) {
|
||||
for (size_t i = 0; i < numprefix; i++) {
|
||||
/* Check input list has no overlap with existing prefixes. */
|
||||
if (c->client_tracking_prefixes) {
|
||||
if (c->pubsub_data->client_tracking_prefixes) {
|
||||
raxIterator ri;
|
||||
raxStart(&ri, c->client_tracking_prefixes);
|
||||
raxStart(&ri, c->pubsub_data->client_tracking_prefixes);
|
||||
raxSeek(&ri, "^", NULL, 0);
|
||||
while (raxNext(&ri)) {
|
||||
if (stringCheckPrefix(ri.key, ri.key_len, prefixes[i]->ptr, sdslen(prefixes[i]->ptr))) {
|
||||
@ -166,8 +166,8 @@ void enableBcastTrackingForPrefix(client *c, char *prefix, size_t plen) {
|
||||
bs = result;
|
||||
}
|
||||
if (raxTryInsert(bs->clients, (unsigned char *)&c, sizeof(c), NULL, NULL)) {
|
||||
if (c->client_tracking_prefixes == NULL) c->client_tracking_prefixes = raxNew();
|
||||
raxInsert(c->client_tracking_prefixes, (unsigned char *)prefix, plen, NULL, NULL);
|
||||
if (c->pubsub_data->client_tracking_prefixes == NULL) c->pubsub_data->client_tracking_prefixes = raxNew();
|
||||
raxInsert(c->pubsub_data->client_tracking_prefixes, (unsigned char *)prefix, plen, NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@ -186,7 +186,8 @@ void enableTracking(client *c, uint64_t redirect_to, struct ClientFlags options,
|
||||
c->flag.tracking_optin = 0;
|
||||
c->flag.tracking_optout = 0;
|
||||
c->flag.tracking_noloop = 0;
|
||||
c->client_tracking_redirection = redirect_to;
|
||||
initClientPubSubData(c);
|
||||
c->pubsub_data->client_tracking_redirection = redirect_to;
|
||||
|
||||
/* This may be the first client we ever enable. Create the tracking
|
||||
* table if it does not exist. */
|
||||
@ -277,8 +278,8 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
|
||||
c->flag.pushing = 1;
|
||||
|
||||
int using_redirection = 0;
|
||||
if (c->client_tracking_redirection) {
|
||||
client *redir = lookupClientByID(c->client_tracking_redirection);
|
||||
if (c->pubsub_data->client_tracking_redirection) {
|
||||
client *redir = lookupClientByID(c->pubsub_data->client_tracking_redirection);
|
||||
if (!redir) {
|
||||
c->flag.tracking_broken_redir = 1;
|
||||
/* We need to signal to the original connection that we
|
||||
@ -287,7 +288,7 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
|
||||
if (c->resp > 2) {
|
||||
addReplyPushLen(c, 2);
|
||||
addReplyBulkCBuffer(c, "tracking-redir-broken", 21);
|
||||
addReplyLongLong(c, c->client_tracking_redirection);
|
||||
addReplyLongLong(c, c->pubsub_data->client_tracking_redirection);
|
||||
}
|
||||
if (!old_flags.pushing) c->flag.pushing = 0;
|
||||
return;
|
||||
|
Loading…
x
Reference in New Issue
Block a user