Remove master
and slave
from source code (#591)
External facing interfaces are not affected. --------- Signed-off-by: Ping Xie <pingxie@google.com>
This commit is contained in:
parent
bce240eab7
commit
54c9747935
2
.github/workflows/clang-format.yml
vendored
2
.github/workflows/clang-format.yml
vendored
@ -41,7 +41,7 @@ jobs:
|
||||
- name: Check for formatting changes
|
||||
if: ${{ steps.clang-format.outputs.diff }}
|
||||
run: |
|
||||
echo "Code is not formatted correctly. Here is the diff:"
|
||||
echo "ERROR: Code is not formatted correctly. Here is the diff:"
|
||||
# Decode the Base64 diff to display it
|
||||
echo "${{ steps.clang-format.outputs.diff }}" | base64 --decode
|
||||
exit 1
|
||||
|
23
src/aof.c
23
src/aof.c
@ -904,12 +904,12 @@ int aofFsyncInProgress(void) {
|
||||
/* Starts a background task that performs fsync() against the specified
|
||||
* file descriptor (the one of the AOF file) in another thread. */
|
||||
void aof_background_fsync(int fd) {
|
||||
bioCreateFsyncJob(fd, server.master_repl_offset, 1);
|
||||
bioCreateFsyncJob(fd, server.primary_repl_offset, 1);
|
||||
}
|
||||
|
||||
/* Close the fd on the basis of aof_background_fsync. */
|
||||
void aof_background_fsync_and_close(int fd) {
|
||||
bioCreateCloseAofJob(fd, server.master_repl_offset, 1);
|
||||
bioCreateCloseAofJob(fd, server.primary_repl_offset, 1);
|
||||
}
|
||||
|
||||
/* Kills an AOFRW child process if exists */
|
||||
@ -1069,11 +1069,12 @@ void flushAppendOnlyFile(int force) {
|
||||
} else {
|
||||
/* All data is fsync'd already: Update fsynced_reploff_pending just in case.
|
||||
* This is needed to avoid a WAITAOF hang in case a module used RM_Call with the NO_AOF flag,
|
||||
* in which case master_repl_offset will increase but fsynced_reploff_pending won't be updated
|
||||
* in which case primary_repl_offset will increase but fsynced_reploff_pending won't be updated
|
||||
* (because there's no reason, from the AOF POV, to call fsync) and then WAITAOF may wait on
|
||||
* the higher offset (which contains data that was only propagated to replicas, and not to AOF) */
|
||||
if (!sync_in_progress && server.aof_fsync != AOF_FSYNC_NO)
|
||||
atomic_store_explicit(&server.fsynced_reploff_pending, server.master_repl_offset, memory_order_relaxed);
|
||||
atomic_store_explicit(&server.fsynced_reploff_pending, server.primary_repl_offset,
|
||||
memory_order_relaxed);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -1243,7 +1244,7 @@ try_fsync:
|
||||
latencyAddSampleIfNeeded("aof-fsync-always", latency);
|
||||
server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
|
||||
server.aof_last_fsync = server.mstime;
|
||||
atomic_store_explicit(&server.fsynced_reploff_pending, server.master_repl_offset, memory_order_relaxed);
|
||||
atomic_store_explicit(&server.fsynced_reploff_pending, server.primary_repl_offset, memory_order_relaxed);
|
||||
} else if (server.aof_fsync == AOF_FSYNC_EVERYSEC && server.mstime - server.aof_last_fsync >= 1000) {
|
||||
if (!sync_in_progress) {
|
||||
aof_background_fsync(server.aof_fd);
|
||||
@ -1355,7 +1356,7 @@ struct client *createAOFClient(void) {
|
||||
c->id = CLIENT_ID_AOF; /* So modules can identify it's the AOF client. */
|
||||
|
||||
/*
|
||||
* The AOF client should never be blocked (unlike master
|
||||
* The AOF client should never be blocked (unlike primary
|
||||
* replication connection).
|
||||
* This is because blocking the AOF client might cause
|
||||
* deadlock (because potentially no one will unblock it).
|
||||
@ -1365,9 +1366,9 @@ struct client *createAOFClient(void) {
|
||||
*/
|
||||
c->flags = CLIENT_DENY_BLOCKING;
|
||||
|
||||
/* We set the fake client as a slave waiting for the synchronization
|
||||
/* We set the fake client as a replica waiting for the synchronization
|
||||
* so that the server will not try to send replies to this client. */
|
||||
c->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
|
||||
c->repl_state = REPLICA_STATE_WAIT_BGSAVE_START;
|
||||
return c;
|
||||
}
|
||||
|
||||
@ -2320,7 +2321,7 @@ int rewriteAppendOnlyFile(char *filename) {
|
||||
|
||||
if (server.aof_use_rdb_preamble) {
|
||||
int error;
|
||||
if (rdbSaveRio(SLAVE_REQ_NONE, &aof, &error, RDBFLAGS_AOF_PREAMBLE, NULL) == C_ERR) {
|
||||
if (rdbSaveRio(REPLICA_REQ_NONE, &aof, &error, RDBFLAGS_AOF_PREAMBLE, NULL) == C_ERR) {
|
||||
errno = error;
|
||||
goto werr;
|
||||
}
|
||||
@ -2403,12 +2404,12 @@ int rewriteAppendOnlyFileBackground(void) {
|
||||
* between updates to `fsynced_reploff_pending` of the worker thread, belonging
|
||||
* to the previous AOF, and the new one. This concern is specific for a full
|
||||
* sync scenario where we don't wanna risk the ACKed replication offset
|
||||
* jumping backwards or forward when switching to a different master. */
|
||||
* jumping backwards or forward when switching to a different primary. */
|
||||
bioDrainWorker(BIO_AOF_FSYNC);
|
||||
|
||||
/* Set the initial repl_offset, which will be applied to fsynced_reploff
|
||||
* when AOFRW finishes (after possibly being updated by a bio thread) */
|
||||
atomic_store_explicit(&server.fsynced_reploff_pending, server.master_repl_offset, memory_order_relaxed);
|
||||
atomic_store_explicit(&server.fsynced_reploff_pending, server.primary_repl_offset, memory_order_relaxed);
|
||||
server.fsynced_reploff = 0;
|
||||
}
|
||||
|
||||
|
@ -87,7 +87,7 @@ void initClientBlockingState(client *c) {
|
||||
* and will be processed when the client is unblocked. */
|
||||
void blockClient(client *c, int btype) {
|
||||
/* Master client should never be blocked unless pause or module */
|
||||
serverAssert(!(c->flags & CLIENT_MASTER && btype != BLOCKED_MODULE && btype != BLOCKED_POSTPONE));
|
||||
serverAssert(!(c->flags & CLIENT_PRIMARY && btype != BLOCKED_MODULE && btype != BLOCKED_POSTPONE));
|
||||
|
||||
c->flags |= CLIENT_BLOCKED;
|
||||
c->bstate.btype = btype;
|
||||
@ -265,8 +265,8 @@ void replyToClientsBlockedOnShutdown(void) {
|
||||
|
||||
/* Mass-unblock clients because something changed in the instance that makes
|
||||
* blocking no longer safe. For example clients blocked in list operations
|
||||
* in an instance which turns from master to slave is unsafe, so this function
|
||||
* is called when a master turns into a slave.
|
||||
* in an instance which turns from master to replica is unsafe, so this function
|
||||
* is called when a master turns into a replica.
|
||||
*
|
||||
* The semantics is to send an -UNBLOCKED error to the client, disconnecting
|
||||
* it at the same time. */
|
||||
|
@ -900,7 +900,6 @@ void clusterCommand(client *c) {
|
||||
}
|
||||
kvstoreReleaseDictIterator(kvs_di);
|
||||
} else if ((!strcasecmp(c->argv[1]->ptr, "slaves") || !strcasecmp(c->argv[1]->ptr, "replicas")) && c->argc == 3) {
|
||||
/* CLUSTER SLAVES <NODE ID> */
|
||||
/* CLUSTER REPLICAS <NODE ID> */
|
||||
clusterNode *n = clusterLookupNode(c->argv[2]->ptr, sdslen(c->argv[2]->ptr));
|
||||
int j;
|
||||
@ -911,15 +910,15 @@ void clusterCommand(client *c) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (clusterNodeIsSlave(n)) {
|
||||
if (clusterNodeIsReplica(n)) {
|
||||
addReplyError(c, "The specified node is not a master");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Report TLS ports to TLS client, and report non-TLS port to non-TLS client. */
|
||||
addReplyArrayLen(c, clusterNodeNumSlaves(n));
|
||||
for (j = 0; j < clusterNodeNumSlaves(n); j++) {
|
||||
sds ni = clusterGenNodeDescription(c, clusterNodeGetSlave(n, j), shouldReturnTlsInfo());
|
||||
addReplyArrayLen(c, clusterNodeNumReplicas(n));
|
||||
for (j = 0; j < clusterNodeNumReplicas(n); j++) {
|
||||
sds ni = clusterGenNodeDescription(c, clusterNodeGetReplica(n, j), shouldReturnTlsInfo());
|
||||
addReplyBulkCString(c, ni);
|
||||
sdsfree(ni);
|
||||
}
|
||||
@ -1048,8 +1047,8 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
|
||||
* can safely serve the request, otherwise we return a TRYAGAIN
|
||||
* error). To do so we set the importing/migrating state and
|
||||
* increment a counter for every missing key. */
|
||||
if (clusterNodeIsMaster(myself) || c->flags & CLIENT_READONLY) {
|
||||
if (n == clusterNodeGetMaster(myself) && getMigratingSlotDest(slot) != NULL) {
|
||||
if (clusterNodeIsPrimary(myself) || c->flags & CLIENT_READONLY) {
|
||||
if (n == clusterNodeGetPrimary(myself) && getMigratingSlotDest(slot) != NULL) {
|
||||
migrating_slot = 1;
|
||||
} else if (getImportingSlotSource(slot) != NULL) {
|
||||
importing_slot = 1;
|
||||
@ -1122,7 +1121,7 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
|
||||
/* MIGRATE always works in the context of the local node if the slot
|
||||
* is open (migrating or importing state). We need to be able to freely
|
||||
* move keys among instances in this case. */
|
||||
if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand && clusterNodeIsMaster(myself)) {
|
||||
if ((migrating_slot || importing_slot) && cmd->proc == migrateCommand && clusterNodeIsPrimary(myself)) {
|
||||
return myself;
|
||||
}
|
||||
|
||||
@ -1152,13 +1151,13 @@ getNodeByQuery(client *c, struct serverCommand *cmd, robj **argv, int argc, int
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle the read-only client case reading from a slave: if this
|
||||
* node is a slave and the request is about a hash slot our master
|
||||
/* Handle the read-only client case reading from a replica: if this
|
||||
* node is a replica and the request is about a hash slot our primary
|
||||
* is serving, we can reply without redirection. */
|
||||
int is_write_command =
|
||||
(cmd_flags & CMD_WRITE) || (c->cmd->proc == execCommand && (c->mstate.cmd_flags & CMD_WRITE));
|
||||
if (((c->flags & CLIENT_READONLY) || pubsubshard_included) && !is_write_command && clusterNodeIsSlave(myself) &&
|
||||
clusterNodeGetMaster(myself) == n) {
|
||||
if (((c->flags & CLIENT_READONLY) || pubsubshard_included) && !is_write_command && clusterNodeIsReplica(myself) &&
|
||||
clusterNodeGetPrimary(myself) == n) {
|
||||
return myself;
|
||||
}
|
||||
|
||||
@ -1204,7 +1203,7 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
|
||||
* to detect timeouts, in order to handle the following case:
|
||||
*
|
||||
* 1) A client blocks with BLPOP or similar blocking operation.
|
||||
* 2) The master migrates the hash slot elsewhere or turns into a slave.
|
||||
* 2) The primary migrates the hash slot elsewhere or turns into a replica.
|
||||
* 3) The client may remain blocked forever (or up to the max timeout time)
|
||||
* waiting for a key change that will never happen.
|
||||
*
|
||||
@ -1240,8 +1239,8 @@ int clusterRedirectBlockedClientIfNeeded(client *c) {
|
||||
|
||||
/* if the client is read-only and attempting to access key that our
|
||||
* replica can handle, allow it. */
|
||||
if ((c->flags & CLIENT_READONLY) && !(c->lastcmd->flags & CMD_WRITE) && clusterNodeIsSlave(myself) &&
|
||||
clusterNodeGetMaster(myself) == node) {
|
||||
if ((c->flags & CLIENT_READONLY) && !(c->lastcmd->flags & CMD_WRITE) && clusterNodeIsReplica(myself) &&
|
||||
clusterNodeGetPrimary(myself) == node) {
|
||||
node = myself;
|
||||
}
|
||||
|
||||
@ -1331,9 +1330,9 @@ int isNodeAvailable(clusterNode *node) {
|
||||
}
|
||||
|
||||
void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, int end_slot) {
|
||||
int i, nested_elements = 3; /* slots (2) + master addr (1) */
|
||||
for (i = 0; i < clusterNodeNumSlaves(node); i++) {
|
||||
if (!isNodeAvailable(clusterNodeGetSlave(node, i))) continue;
|
||||
int i, nested_elements = 3; /* slots (2) + primary addr (1) */
|
||||
for (i = 0; i < clusterNodeNumReplicas(node); i++) {
|
||||
if (!isNodeAvailable(clusterNodeGetReplica(node, i))) continue;
|
||||
nested_elements++;
|
||||
}
|
||||
addReplyArrayLen(c, nested_elements);
|
||||
@ -1342,11 +1341,11 @@ void addNodeReplyForClusterSlot(client *c, clusterNode *node, int start_slot, in
|
||||
addNodeToNodeReply(c, node);
|
||||
|
||||
/* Remaining nodes in reply are replicas for slot range */
|
||||
for (i = 0; i < clusterNodeNumSlaves(node); i++) {
|
||||
for (i = 0; i < clusterNodeNumReplicas(node); i++) {
|
||||
/* This loop is copy/pasted from clusterGenNodeDescription()
|
||||
* with modifications for per-slot node aggregation. */
|
||||
if (!isNodeAvailable(clusterNodeGetSlave(node, i))) continue;
|
||||
addNodeToNodeReply(c, clusterNodeGetSlave(node, i));
|
||||
if (!isNodeAvailable(clusterNodeGetReplica(node, i))) continue;
|
||||
addNodeToNodeReply(c, clusterNodeGetReplica(node, i));
|
||||
nested_elements--;
|
||||
}
|
||||
serverAssert(nested_elements == 3); /* Original 3 elements */
|
||||
@ -1364,7 +1363,7 @@ void clearCachedClusterSlotsResponse(void) {
|
||||
sds generateClusterSlotResponse(void) {
|
||||
client *recording_client = createCachedResponseClient();
|
||||
clusterNode *n = NULL;
|
||||
int num_masters = 0, start = -1;
|
||||
int num_primaries = 0, start = -1;
|
||||
void *slot_replylen = addReplyDeferredLen(recording_client);
|
||||
|
||||
for (int i = 0; i <= CLUSTER_SLOTS; i++) {
|
||||
@ -1380,13 +1379,13 @@ sds generateClusterSlotResponse(void) {
|
||||
* or end of slot. */
|
||||
if (i == CLUSTER_SLOTS || n != getNodeBySlot(i)) {
|
||||
addNodeReplyForClusterSlot(recording_client, n, start, i - 1);
|
||||
num_masters++;
|
||||
num_primaries++;
|
||||
if (i == CLUSTER_SLOTS) break;
|
||||
n = getNodeBySlot(i);
|
||||
start = i;
|
||||
}
|
||||
}
|
||||
setDeferredArrayLen(recording_client, slot_replylen, num_masters);
|
||||
setDeferredArrayLen(recording_client, slot_replylen, num_primaries);
|
||||
sds cluster_slot_response = aggregateClientOutputBuffer(recording_client);
|
||||
deleteCachedResponseClient(recording_client);
|
||||
return cluster_slot_response;
|
||||
@ -1405,8 +1404,8 @@ int verifyCachedClusterSlotsResponse(sds cached_response) {
|
||||
void clusterCommandSlots(client *c) {
|
||||
/* Format: 1) 1) start slot
|
||||
* 2) end slot
|
||||
* 3) 1) master IP
|
||||
* 2) master port
|
||||
* 3) 1) primary IP
|
||||
* 2) primary port
|
||||
* 3) node ID
|
||||
* 4) 1) replica IP
|
||||
* 2) replica port
|
||||
@ -1446,8 +1445,8 @@ void askingCommand(client *c) {
|
||||
}
|
||||
|
||||
/* The READONLY command is used by clients to enter the read-only mode.
|
||||
* In this mode slaves will not redirect clients as long as clients access
|
||||
* with read-only commands to keys that are served by the slave's master. */
|
||||
* In this mode replica will not redirect clients as long as clients access
|
||||
* with read-only commands to keys that are served by the replica's primary. */
|
||||
void readonlyCommand(client *c) {
|
||||
if (server.cluster_enabled == 0) {
|
||||
addReplyError(c, "This instance has cluster support disabled");
|
||||
|
@ -67,7 +67,7 @@ int clusterCommandSpecial(client *c);
|
||||
const char **clusterCommandExtendedHelp(void);
|
||||
|
||||
int clusterAllowFailoverCmd(client *c);
|
||||
void clusterPromoteSelfToMaster(void);
|
||||
void clusterPromoteSelfToPrimary(void);
|
||||
int clusterManualFailoverTimeLimit(void);
|
||||
|
||||
void clusterCommandSlots(client *c);
|
||||
@ -83,18 +83,18 @@ int getClusterSize(void);
|
||||
int getMyShardSlotCount(void);
|
||||
int handleDebugClusterCommand(client *c);
|
||||
int clusterNodePending(clusterNode *node);
|
||||
int clusterNodeIsMaster(clusterNode *n);
|
||||
int clusterNodeIsPrimary(clusterNode *n);
|
||||
char **getClusterNodesList(size_t *numnodes);
|
||||
char *clusterNodeIp(clusterNode *node);
|
||||
int clusterNodeIsSlave(clusterNode *node);
|
||||
clusterNode *clusterNodeGetMaster(clusterNode *node);
|
||||
int clusterNodeIsReplica(clusterNode *node);
|
||||
clusterNode *clusterNodeGetPrimary(clusterNode *node);
|
||||
char *clusterNodeGetName(clusterNode *node);
|
||||
int clusterNodeTimedOut(clusterNode *node);
|
||||
int clusterNodeIsFailing(clusterNode *node);
|
||||
int clusterNodeIsNoFailover(clusterNode *node);
|
||||
char *clusterNodeGetShardId(clusterNode *node);
|
||||
int clusterNodeNumSlaves(clusterNode *node);
|
||||
clusterNode *clusterNodeGetSlave(clusterNode *node, int slave_idx);
|
||||
int clusterNodeNumReplicas(clusterNode *node);
|
||||
clusterNode *clusterNodeGetReplica(clusterNode *node, int slave_idx);
|
||||
clusterNode *getMigratingSlotDest(int slot);
|
||||
clusterNode *getImportingSlotSource(int slot);
|
||||
clusterNode *getNodeBySlot(int slot);
|
||||
|
1070
src/cluster_legacy.c
1070
src/cluster_legacy.c
File diff suppressed because it is too large
Load Diff
@ -5,13 +5,13 @@
|
||||
|
||||
/* The following defines are amount of time, sometimes expressed as
|
||||
* multiplicators of the node timeout value (when ending with MULT). */
|
||||
#define CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
|
||||
#define CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
|
||||
#define CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */
|
||||
#define CLUSTER_MF_PAUSE_MULT 2 /* Master pause manual failover mult. */
|
||||
#define CLUSTER_SLAVE_MIGRATION_DELAY 5000 /* Delay for slave migration. */
|
||||
#define CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
|
||||
#define CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if primary is back. */
|
||||
#define CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */
|
||||
#define CLUSTER_MF_PAUSE_MULT 2 /* Primary pause manual failover mult. */
|
||||
#define CLUSTER_REPLICA_MIGRATION_DELAY 5000 /* Delay for replica migration. */
|
||||
|
||||
/* Reasons why a slave is not able to failover. */
|
||||
/* Reasons why a replica is not able to failover. */
|
||||
#define CLUSTER_CANT_FAILOVER_NONE 0
|
||||
#define CLUSTER_CANT_FAILOVER_DATA_AGE 1
|
||||
#define CLUSTER_CANT_FAILOVER_WAITING_DELAY 2
|
||||
@ -41,23 +41,23 @@ typedef struct clusterLink {
|
||||
} clusterLink;
|
||||
|
||||
/* Cluster node flags and macros. */
|
||||
#define CLUSTER_NODE_MASTER 1 /* The node is a master */
|
||||
#define CLUSTER_NODE_SLAVE 2 /* The node is a slave */
|
||||
#define CLUSTER_NODE_PRIMARY 1 /* The node is a primary */
|
||||
#define CLUSTER_NODE_REPLICA 2 /* The node is a replica */
|
||||
#define CLUSTER_NODE_PFAIL 4 /* Failure? Need acknowledge */
|
||||
#define CLUSTER_NODE_FAIL 8 /* The node is believed to be malfunctioning */
|
||||
#define CLUSTER_NODE_MYSELF 16 /* This node is myself */
|
||||
#define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
|
||||
#define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */
|
||||
#define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */
|
||||
#define CLUSTER_NODE_MIGRATE_TO 256 /* Master eligible for replica migration. */
|
||||
#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failover. */
|
||||
#define CLUSTER_NODE_MIGRATE_TO 256 /* Primary eligible for replica migration. */
|
||||
#define CLUSTER_NODE_NOFAILOVER 512 /* replica will not try to failover. */
|
||||
#define CLUSTER_NODE_EXTENSIONS_SUPPORTED 1024 /* This node supports extensions. */
|
||||
#define CLUSTER_NODE_NULL_NAME \
|
||||
"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \
|
||||
"\000\000\000\000\000\000\000\000\000\000\000\000"
|
||||
|
||||
#define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER)
|
||||
#define nodeIsSlave(n) ((n)->flags & CLUSTER_NODE_SLAVE)
|
||||
#define nodeIsPrimary(n) ((n)->flags & CLUSTER_NODE_PRIMARY)
|
||||
#define nodeIsReplica(n) ((n)->flags & CLUSTER_NODE_REPLICA)
|
||||
#define nodeInHandshake(n) ((n)->flags & CLUSTER_NODE_HANDSHAKE)
|
||||
#define nodeHasAddr(n) (!((n)->flags & CLUSTER_NODE_NOADDR))
|
||||
#define nodeTimedOut(n) ((n)->flags & CLUSTER_NODE_PFAIL)
|
||||
@ -216,14 +216,14 @@ typedef struct {
|
||||
uint16_t type; /* Message type */
|
||||
uint16_t count; /* Number of gossip sections. */
|
||||
uint64_t currentEpoch; /* The epoch accordingly to the sending node. */
|
||||
uint64_t configEpoch; /* The config epoch if it's a master, or the last
|
||||
epoch advertised by its master if it is a
|
||||
slave. */
|
||||
uint64_t offset; /* Master replication offset if node is a master or
|
||||
processed replication offset if node is a slave. */
|
||||
uint64_t configEpoch; /* The config epoch if it's a primary, or the last
|
||||
epoch advertised by its primary if it is a
|
||||
replica. */
|
||||
uint64_t offset; /* Primary replication offset if node is a primary or
|
||||
processed replication offset if node is a replica. */
|
||||
char sender[CLUSTER_NAMELEN]; /* Name of the sender node */
|
||||
unsigned char myslots[CLUSTER_SLOTS / 8];
|
||||
char slaveof[CLUSTER_NAMELEN];
|
||||
char replicaof[CLUSTER_NAMELEN];
|
||||
char myip[NET_IP_STR_LEN]; /* Sender IP, if not all zeroed. */
|
||||
uint16_t extensions; /* Number of extensions sent along with this packet. */
|
||||
char notused1[30]; /* 30 bytes reserved for future usage. */
|
||||
@ -256,7 +256,7 @@ static_assert(offsetof(clusterMsg, configEpoch) == 24, "unexpected field offset"
|
||||
static_assert(offsetof(clusterMsg, offset) == 32, "unexpected field offset");
|
||||
static_assert(offsetof(clusterMsg, sender) == 40, "unexpected field offset");
|
||||
static_assert(offsetof(clusterMsg, myslots) == 80, "unexpected field offset");
|
||||
static_assert(offsetof(clusterMsg, slaveof) == 2128, "unexpected field offset");
|
||||
static_assert(offsetof(clusterMsg, replicaof) == 2128, "unexpected field offset");
|
||||
static_assert(offsetof(clusterMsg, myip) == 2168, "unexpected field offset");
|
||||
static_assert(offsetof(clusterMsg, extensions) == 2214, "unexpected field offset");
|
||||
static_assert(offsetof(clusterMsg, notused1) == 2216, "unexpected field offset");
|
||||
@ -271,10 +271,10 @@ static_assert(offsetof(clusterMsg, data) == 2256, "unexpected field offset");
|
||||
|
||||
/* Message flags better specify the packet content or are used to
|
||||
* provide some information about the node state. */
|
||||
#define CLUSTERMSG_FLAG0_PAUSED (1 << 0) /* Master paused for manual failover. */
|
||||
#define CLUSTERMSG_FLAG0_PAUSED (1 << 0) /* Primary paused for manual failover. */
|
||||
#define CLUSTERMSG_FLAG0_FORCEACK \
|
||||
(1 << 1) /* Give ACK to AUTH_REQUEST even if \
|
||||
master is up. */
|
||||
primary is up. */
|
||||
#define CLUSTERMSG_FLAG0_EXT_DATA (1 << 2) /* Message contains extension data */
|
||||
|
||||
struct _clusterNode {
|
||||
@ -287,20 +287,20 @@ struct _clusterNode {
|
||||
uint16_t *slot_info_pairs; /* Slots info represented as (start/end) pair (consecutive index). */
|
||||
int slot_info_pairs_count; /* Used number of slots in slot_info_pairs */
|
||||
int numslots; /* Number of slots handled by this node */
|
||||
int numslaves; /* Number of slave nodes, if this is a master */
|
||||
clusterNode **slaves; /* pointers to slave nodes */
|
||||
clusterNode *slaveof; /* pointer to the master node. Note that it
|
||||
may be NULL even if the node is a slave
|
||||
if we don't have the master node in our
|
||||
tables. */
|
||||
int num_replicas; /* Number of replica nodes, if this is a primar */
|
||||
clusterNode **replicas; /* pointers to replica nodes */
|
||||
clusterNode *replicaof; /* pointer to the primary node. Note that it
|
||||
may be NULL even if the node is a replica
|
||||
if we don't have the parimary node in our
|
||||
tables. */
|
||||
unsigned long long last_in_ping_gossip; /* The number of the last carried in the ping gossip section */
|
||||
mstime_t ping_sent; /* Unix time we sent latest ping */
|
||||
mstime_t pong_received; /* Unix time we received the pong */
|
||||
mstime_t data_received; /* Unix time we received any data */
|
||||
mstime_t fail_time; /* Unix time when FAIL flag was set */
|
||||
mstime_t voted_time; /* Last time we voted for a slave of this master */
|
||||
mstime_t voted_time; /* Last time we voted for a replica of this parimary */
|
||||
mstime_t repl_offset_time; /* Unix time we received offset for this node */
|
||||
mstime_t orphaned_time; /* Starting time of orphaned master condition */
|
||||
mstime_t orphaned_time; /* Starting time of orphaned primary condition */
|
||||
long long repl_offset; /* Last known repl offset for this node. */
|
||||
char ip[NET_IP_STR_LEN]; /* Latest known IP address of this node */
|
||||
sds hostname; /* The known hostname for this node */
|
||||
@ -319,32 +319,32 @@ struct clusterState {
|
||||
clusterNode *myself; /* This node */
|
||||
uint64_t currentEpoch;
|
||||
int state; /* CLUSTER_OK, CLUSTER_FAIL, ... */
|
||||
int size; /* Num of master nodes with at least one slot */
|
||||
int size; /* Num of primary nodes with at least one slot */
|
||||
dict *nodes; /* Hash table of name -> clusterNode structures */
|
||||
dict *shards; /* Hash table of shard_id -> list (of nodes) structures */
|
||||
dict *nodes_black_list; /* Nodes we don't re-add for a few seconds. */
|
||||
clusterNode *migrating_slots_to[CLUSTER_SLOTS];
|
||||
clusterNode *importing_slots_from[CLUSTER_SLOTS];
|
||||
clusterNode *slots[CLUSTER_SLOTS];
|
||||
/* The following fields are used to take the slave state on elections. */
|
||||
/* The following fields are used to take the replica state on elections. */
|
||||
mstime_t failover_auth_time; /* Time of previous or next election. */
|
||||
int failover_auth_count; /* Number of votes received so far. */
|
||||
int failover_auth_sent; /* True if we already asked for votes. */
|
||||
int failover_auth_rank; /* This slave rank for current auth request. */
|
||||
int failover_auth_rank; /* This replica rank for current auth request. */
|
||||
uint64_t failover_auth_epoch; /* Epoch of the current election. */
|
||||
int cant_failover_reason; /* Why a slave is currently not able to
|
||||
int cant_failover_reason; /* Why a replica is currently not able to
|
||||
failover. See the CANT_FAILOVER_* macros. */
|
||||
/* Manual failover state in common. */
|
||||
mstime_t mf_end; /* Manual failover time limit (ms unixtime).
|
||||
It is zero if there is no MF in progress. */
|
||||
/* Manual failover state of master. */
|
||||
clusterNode *mf_slave; /* Slave performing the manual failover. */
|
||||
/* Manual failover state of slave. */
|
||||
long long mf_master_offset; /* Master offset the slave needs to start MF
|
||||
/* Manual failover state of primary. */
|
||||
clusterNode *mf_replica; /* replica performing the manual failover. */
|
||||
/* Manual failover state of replica. */
|
||||
long long mf_primary_offset; /* Primary offset the replica needs to start MF
|
||||
or -1 if still not received. */
|
||||
int mf_can_start; /* If non-zero signal that the manual failover
|
||||
can start requesting masters vote. */
|
||||
/* The following fields are used by masters to take state on elections. */
|
||||
int mf_can_start; /* If non-zero signal that the manual failover
|
||||
can start requesting primary vote. */
|
||||
/* The following fields are used by primaries to take state on elections. */
|
||||
uint64_t lastVoteEpoch; /* Epoch of the last vote granted. */
|
||||
int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */
|
||||
/* Stats */
|
||||
|
62
src/config.c
62
src/config.c
@ -124,7 +124,7 @@ configEnum propagation_error_behavior_enum[] = {{"ignore", PROPAGATION_ERR_BEHAV
|
||||
/* Output buffer limits presets. */
|
||||
clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = {
|
||||
{0, 0, 0}, /* normal */
|
||||
{1024 * 1024 * 256, 1024 * 1024 * 64, 60}, /* slave */
|
||||
{1024 * 1024 * 256, 1024 * 1024 * 64, 60}, /* replica */
|
||||
{1024 * 1024 * 32, 1024 * 1024 * 8, 60} /* pubsub */
|
||||
};
|
||||
|
||||
@ -373,7 +373,7 @@ static int updateClientOutputBufferLimit(sds *args, int arg_len, const char **er
|
||||
* error in a single client class is present. */
|
||||
for (j = 0; j < arg_len; j += 4) {
|
||||
class = getClientTypeByName(args[j]);
|
||||
if (class == -1 || class == CLIENT_TYPE_MASTER) {
|
||||
if (class == -1 || class == CLIENT_TYPE_PRIMARY) {
|
||||
if (err)
|
||||
*err = "Invalid client class specified in "
|
||||
"buffer limit configuration.";
|
||||
@ -574,7 +574,7 @@ void loadServerConfigFromString(char *config) {
|
||||
}
|
||||
|
||||
/* Sanity checks. */
|
||||
if (server.cluster_enabled && server.masterhost) {
|
||||
if (server.cluster_enabled && server.primary_host) {
|
||||
err = "replicaof directive not allowed in cluster mode";
|
||||
goto loaderr;
|
||||
}
|
||||
@ -1422,19 +1422,19 @@ void rewriteConfigDirOption(standardConfig *config, const char *name, struct rew
|
||||
rewriteConfigStringOption(state, name, cwd, NULL);
|
||||
}
|
||||
|
||||
/* Rewrite the slaveof option. */
|
||||
/* Rewrite the replicaof option. */
|
||||
void rewriteConfigReplicaOfOption(standardConfig *config, const char *name, struct rewriteConfigState *state) {
|
||||
UNUSED(config);
|
||||
sds line;
|
||||
|
||||
/* If this is a master, we want all the slaveof config options
|
||||
/* If this is a primary, we want all the replicaof config options
|
||||
* in the file to be removed. Note that if this is a cluster instance
|
||||
* we don't want a slaveof directive inside valkey.conf. */
|
||||
if (server.cluster_enabled || server.masterhost == NULL) {
|
||||
* we don't want a replicaof directive inside valkey.conf. */
|
||||
if (server.cluster_enabled || server.primary_host == NULL) {
|
||||
rewriteConfigMarkAsProcessed(state, name);
|
||||
return;
|
||||
}
|
||||
line = sdscatprintf(sdsempty(), "%s %s %d", name, server.masterhost, server.masterport);
|
||||
line = sdscatprintf(sdsempty(), "%s %s %d", name, server.primary_host, server.primary_port);
|
||||
rewriteConfigRewriteLine(state, name, line, 1);
|
||||
}
|
||||
|
||||
@ -2452,9 +2452,9 @@ static int updateMaxmemory(const char **err) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int updateGoodSlaves(const char **err) {
|
||||
static int updateGoodReplicas(const char **err) {
|
||||
UNUSED(err);
|
||||
refreshGoodSlavesCount();
|
||||
refreshGoodReplicasCount();
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -2788,7 +2788,7 @@ static int setConfigOOMScoreAdjValuesOption(standardConfig *config, sds *argv, i
|
||||
* keep the configuration, which may still be valid for privileged processes.
|
||||
*/
|
||||
|
||||
if (values[CONFIG_OOM_REPLICA] < values[CONFIG_OOM_MASTER] ||
|
||||
if (values[CONFIG_OOM_REPLICA] < values[CONFIG_OOM_PRIMARY] ||
|
||||
values[CONFIG_OOM_BGCHILD] < values[CONFIG_OOM_REPLICA]) {
|
||||
serverLog(LL_WARNING, "The oom-score-adj-values configuration may not work for non-privileged processes! "
|
||||
"Please consult the documentation.");
|
||||
@ -2867,18 +2867,18 @@ static int setConfigReplicaOfOption(standardConfig *config, sds *argv, int argc,
|
||||
return 0;
|
||||
}
|
||||
|
||||
sdsfree(server.masterhost);
|
||||
server.masterhost = NULL;
|
||||
sdsfree(server.primary_host);
|
||||
server.primary_host = NULL;
|
||||
if (!strcasecmp(argv[0], "no") && !strcasecmp(argv[1], "one")) {
|
||||
return 1;
|
||||
}
|
||||
char *ptr;
|
||||
server.masterport = strtol(argv[1], &ptr, 10);
|
||||
if (server.masterport < 0 || server.masterport > 65535 || *ptr != '\0') {
|
||||
server.primary_port = strtol(argv[1], &ptr, 10);
|
||||
if (server.primary_port < 0 || server.primary_port > 65535 || *ptr != '\0') {
|
||||
*err = "Invalid master port";
|
||||
return 0;
|
||||
}
|
||||
server.masterhost = sdsnew(argv[0]);
|
||||
server.primary_host = sdsnew(argv[0]);
|
||||
server.repl_state = REPL_STATE_CONNECT;
|
||||
return 1;
|
||||
}
|
||||
@ -2891,8 +2891,8 @@ static sds getConfigBindOption(standardConfig *config) {
|
||||
static sds getConfigReplicaOfOption(standardConfig *config) {
|
||||
UNUSED(config);
|
||||
char buf[256];
|
||||
if (server.masterhost)
|
||||
snprintf(buf, sizeof(buf), "%s %d", server.masterhost, server.masterport);
|
||||
if (server.primary_host)
|
||||
snprintf(buf, sizeof(buf), "%s %d", server.primary_host, server.primary_port);
|
||||
else
|
||||
buf[0] = '\0';
|
||||
return sdsnew(buf);
|
||||
@ -3030,11 +3030,11 @@ standardConfig static_configs[] = {
|
||||
createBoolConfig("aof-load-truncated", NULL, MODIFIABLE_CONFIG, server.aof_load_truncated, 1, NULL, NULL),
|
||||
createBoolConfig("aof-use-rdb-preamble", NULL, MODIFIABLE_CONFIG, server.aof_use_rdb_preamble, 1, NULL, NULL),
|
||||
createBoolConfig("aof-timestamp-enabled", NULL, MODIFIABLE_CONFIG, server.aof_timestamp_enabled, 0, NULL, NULL),
|
||||
createBoolConfig("cluster-replica-no-failover", "cluster-slave-no-failover", MODIFIABLE_CONFIG, server.cluster_slave_no_failover, 0, NULL, updateClusterFlags), /* Failover by default. */
|
||||
createBoolConfig("replica-lazy-flush", "slave-lazy-flush", MODIFIABLE_CONFIG, server.repl_slave_lazy_flush, 0, NULL, NULL),
|
||||
createBoolConfig("cluster-replica-no-failover", "cluster-slave-no-failover", MODIFIABLE_CONFIG, server.cluster_replica_no_failover, 0, NULL, updateClusterFlags), /* Failover by default. */
|
||||
createBoolConfig("replica-lazy-flush", "slave-lazy-flush", MODIFIABLE_CONFIG, server.repl_replica_lazy_flush, 0, NULL, NULL),
|
||||
createBoolConfig("replica-serve-stale-data", "slave-serve-stale-data", MODIFIABLE_CONFIG, server.repl_serve_stale_data, 1, NULL, NULL),
|
||||
createBoolConfig("replica-read-only", "slave-read-only", DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_slave_ro, 1, NULL, NULL),
|
||||
createBoolConfig("replica-ignore-maxmemory", "slave-ignore-maxmemory", MODIFIABLE_CONFIG, server.repl_slave_ignore_maxmemory, 1, NULL, NULL),
|
||||
createBoolConfig("replica-read-only", "slave-read-only", DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_replica_ro, 1, NULL, NULL),
|
||||
createBoolConfig("replica-ignore-maxmemory", "slave-ignore-maxmemory", MODIFIABLE_CONFIG, server.repl_replica_ignore_maxmemory, 1, NULL, NULL),
|
||||
createBoolConfig("jemalloc-bg-thread", NULL, MODIFIABLE_CONFIG, server.jemalloc_bg_thread, 1, NULL, updateJemallocBgThread),
|
||||
createBoolConfig("activedefrag", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.active_defrag_enabled, 0, isValidActiveDefrag, NULL),
|
||||
createBoolConfig("syslog-enabled", NULL, IMMUTABLE_CONFIG, server.syslog_enabled, 0, NULL, NULL),
|
||||
@ -3058,8 +3058,8 @@ standardConfig static_configs[] = {
|
||||
createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL),
|
||||
createStringConfig("unixsocket", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.unixsocket, NULL, NULL, NULL),
|
||||
createStringConfig("pidfile", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.pidfile, NULL, NULL, NULL),
|
||||
createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.slave_announce_ip, NULL, NULL, NULL),
|
||||
createStringConfig("primaryuser", "masteruser", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.masteruser, NULL, NULL, NULL),
|
||||
createStringConfig("replica-announce-ip", "slave-announce-ip", MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.replica_announce_ip, NULL, NULL, NULL),
|
||||
createStringConfig("primaryuser", "masteruser", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.primary_user, NULL, NULL, NULL),
|
||||
createStringConfig("cluster-announce-ip", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_ip, NULL, NULL, updateClusterIp),
|
||||
createStringConfig("cluster-config-file", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.cluster_configfile, "nodes.conf", NULL, NULL),
|
||||
createStringConfig("cluster-announce-hostname", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.cluster_announce_hostname, NULL, isValidAnnouncedHostname, updateClusterHostname),
|
||||
@ -3082,7 +3082,7 @@ standardConfig static_configs[] = {
|
||||
createStringConfig("locale-collate", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.locale_collate, "", NULL, updateLocaleCollate),
|
||||
|
||||
/* SDS Configs */
|
||||
createSDSConfig("primaryauth", "masterauth", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.masterauth, NULL, NULL, NULL),
|
||||
createSDSConfig("primaryauth", "masterauth", MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.primary_auth, NULL, NULL, NULL),
|
||||
createSDSConfig("requirepass", NULL, MODIFIABLE_CONFIG | SENSITIVE_CONFIG, EMPTY_STRING_IS_NULL, server.requirepass, NULL, NULL, updateRequirePass),
|
||||
|
||||
/* Enum Configs */
|
||||
@ -3108,7 +3108,7 @@ standardConfig static_configs[] = {
|
||||
createIntConfig("port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.port, 6379, INTEGER_CONFIG, NULL, updatePort), /* TCP port. */
|
||||
createIntConfig("io-threads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, 1, 128, server.io_threads_num, 1, INTEGER_CONFIG, NULL, NULL), /* Single threaded by default */
|
||||
createIntConfig("auto-aof-rewrite-percentage", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.aof_rewrite_perc, 100, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("cluster-replica-validity-factor", "cluster-slave-validity-factor", MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_slave_validity_factor, 10, INTEGER_CONFIG, NULL, NULL), /* Slave max data age factor. */
|
||||
createIntConfig("cluster-replica-validity-factor", "cluster-slave-validity-factor", MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_replica_validity_factor, 10, INTEGER_CONFIG, NULL, NULL), /* replica max data age factor. */
|
||||
createIntConfig("list-max-listpack-size", "list-max-ziplist-size", MODIFIABLE_CONFIG, INT_MIN, INT_MAX, server.list_max_listpack_size, -2, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("tcp-keepalive", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tcpkeepalive, 300, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("cluster-migration-barrier", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.cluster_migration_barrier, 1, INTEGER_CONFIG, NULL, NULL),
|
||||
@ -3118,26 +3118,26 @@ standardConfig static_configs[] = {
|
||||
createIntConfig("active-defrag-threshold-upper", NULL, MODIFIABLE_CONFIG, 0, 1000, server.active_defrag_threshold_upper, 100, INTEGER_CONFIG, NULL, updateDefragConfiguration), /* Default: maximum defrag force at 100% fragmentation */
|
||||
createIntConfig("lfu-log-factor", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.lfu_log_factor, 10, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("lfu-decay-time", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.lfu_decay_time, 1, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("replica-priority", "slave-priority", MODIFIABLE_CONFIG, 0, INT_MAX, server.slave_priority, 100, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("replica-priority", "slave-priority", MODIFIABLE_CONFIG, 0, INT_MAX, server.replica_priority, 100, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("repl-diskless-sync-delay", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_diskless_sync_delay, 5, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("maxmemory-samples", NULL, MODIFIABLE_CONFIG, 1, 64, server.maxmemory_samples, 5, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("maxmemory-eviction-tenacity", NULL, MODIFIABLE_CONFIG, 0, 100, server.maxmemory_eviction_tenacity, 10, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.maxidletime, 0, INTEGER_CONFIG, NULL, NULL), /* Default client timeout: infinite */
|
||||
createIntConfig("replica-announce-port", "slave-announce-port", MODIFIABLE_CONFIG, 0, 65535, server.slave_announce_port, 0, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("replica-announce-port", "slave-announce-port", MODIFIABLE_CONFIG, 0, 65535, server.replica_announce_port, 0, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("tcp-backlog", NULL, IMMUTABLE_CONFIG, 0, INT_MAX, server.tcp_backlog, 511, INTEGER_CONFIG, NULL, NULL), /* TCP listen backlog. */
|
||||
createIntConfig("cluster-port", NULL, IMMUTABLE_CONFIG, 0, 65535, server.cluster_port, 0, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("cluster-announce-bus-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.cluster_announce_bus_port, 0, INTEGER_CONFIG, NULL, updateClusterAnnouncedPort), /* Default: Use +10000 offset. */
|
||||
createIntConfig("cluster-announce-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.cluster_announce_port, 0, INTEGER_CONFIG, NULL, updateClusterAnnouncedPort), /* Use server.port */
|
||||
createIntConfig("cluster-announce-tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.cluster_announce_tls_port, 0, INTEGER_CONFIG, NULL, updateClusterAnnouncedPort), /* Use server.tls_port */
|
||||
createIntConfig("repl-timeout", NULL, MODIFIABLE_CONFIG, 1, INT_MAX, server.repl_timeout, 60, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("repl-ping-replica-period", "repl-ping-slave-period", MODIFIABLE_CONFIG, 1, INT_MAX, server.repl_ping_slave_period, 10, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("repl-ping-replica-period", "repl-ping-slave-period", MODIFIABLE_CONFIG, 1, INT_MAX, server.repl_ping_replica_period, 10, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("list-compress-depth", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 0, INT_MAX, server.list_compress_depth, 0, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("rdb-key-save-delay", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, INT_MIN, INT_MAX, server.rdb_key_save_delay, 0, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("key-load-delay", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, INT_MIN, INT_MAX, server.key_load_delay, 0, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("active-expire-effort", NULL, MODIFIABLE_CONFIG, 1, 10, server.active_expire_effort, 1, INTEGER_CONFIG, NULL, NULL), /* From 1 to 10. */
|
||||
createIntConfig("hz", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.config_hz, CONFIG_DEFAULT_HZ, INTEGER_CONFIG, NULL, updateHZ),
|
||||
createIntConfig("min-replicas-to-write", "min-slaves-to-write", MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_min_slaves_to_write, 0, INTEGER_CONFIG, NULL, updateGoodSlaves),
|
||||
createIntConfig("min-replicas-max-lag", "min-slaves-max-lag", MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_min_slaves_max_lag, 10, INTEGER_CONFIG, NULL, updateGoodSlaves),
|
||||
createIntConfig("min-replicas-to-write", "min-slaves-to-write", MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_min_replicas_to_write, 0, INTEGER_CONFIG, NULL, updateGoodReplicas),
|
||||
createIntConfig("min-replicas-max-lag", "min-slaves-max-lag", MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_min_replicas_max_lag, 10, INTEGER_CONFIG, NULL, updateGoodReplicas),
|
||||
createIntConfig("watchdog-period", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, INT_MAX, server.watchdog_period, 0, INTEGER_CONFIG, NULL, updateWatchdogPeriod),
|
||||
createIntConfig("shutdown-timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.shutdown_timeout, 10, INTEGER_CONFIG, NULL, NULL),
|
||||
createIntConfig("repl-diskless-sync-max-replicas", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_diskless_sync_max_replicas, 0, INTEGER_CONFIG, NULL, NULL),
|
||||
|
44
src/db.c
44
src/db.c
@ -88,8 +88,8 @@ void updateLFU(robj *val) {
|
||||
*
|
||||
* Note: this function also returns NULL if the key is logically expired but
|
||||
* still existing, in case this is a replica and the LOOKUP_WRITE is not set.
|
||||
* Even if the key expiry is master-driven, we can correctly report a key is
|
||||
* expired on replicas even if the master is lagging expiring our key via DELs
|
||||
* Even if the key expiry is primary-driven, we can correctly report a key is
|
||||
* expired on replicas even if the primary is lagging expiring our key via DELs
|
||||
* in the replication link. */
|
||||
robj *lookupKey(serverDb *db, robj *key, int flags) {
|
||||
dictEntry *de = dbFind(db, key->ptr);
|
||||
@ -97,14 +97,14 @@ robj *lookupKey(serverDb *db, robj *key, int flags) {
|
||||
if (de) {
|
||||
val = dictGetVal(de);
|
||||
/* Forcing deletion of expired keys on a replica makes the replica
|
||||
* inconsistent with the master. We forbid it on readonly replicas, but
|
||||
* inconsistent with the primary. We forbid it on readonly replicas, but
|
||||
* we have to allow it on writable replicas to make write commands
|
||||
* behave consistently.
|
||||
*
|
||||
* It's possible that the WRITE flag is set even during a readonly
|
||||
* command, since the command may trigger events that cause modules to
|
||||
* perform additional writes. */
|
||||
int is_ro_replica = server.masterhost && server.repl_slave_ro;
|
||||
int is_ro_replica = server.primary_host && server.repl_replica_ro;
|
||||
int expire_flags = 0;
|
||||
if (flags & LOOKUP_WRITE && !is_ro_replica) expire_flags |= EXPIRE_FORCE_DELETE_EXPIRED;
|
||||
if (flags & LOOKUP_NOEXPIRE) expire_flags |= EXPIRE_AVOID_DELETE_EXPIRED;
|
||||
@ -361,10 +361,10 @@ robj *dbRandomKey(serverDb *db) {
|
||||
key = dictGetKey(de);
|
||||
keyobj = createStringObject(key, sdslen(key));
|
||||
if (dbFindExpires(db, key)) {
|
||||
if (allvolatile && server.masterhost && --maxtries == 0) {
|
||||
if (allvolatile && server.primary_host && --maxtries == 0) {
|
||||
/* If the DB is composed only of keys with an expire set,
|
||||
* it could happen that all the keys are already logically
|
||||
* expired in the slave, so the function cannot stop because
|
||||
* expired in the repilca, so the function cannot stop because
|
||||
* expireIfNeeded() is false, nor it can stop because
|
||||
* dictGetFairRandomKey() returns NULL (there are keys to return).
|
||||
* To prevent the infinite loop we do some tries, but if there
|
||||
@ -540,7 +540,7 @@ long long emptyData(int dbnum, int flags, void(callback)(dict *)) {
|
||||
/* Empty the database structure. */
|
||||
removed = emptyDbStructure(server.db, dbnum, async, callback);
|
||||
|
||||
if (dbnum == -1) flushSlaveKeysWithExpireList();
|
||||
if (dbnum == -1) flushReplicaKeysWithExpireList();
|
||||
|
||||
if (with_functions) {
|
||||
serverAssert(dbnum == -1);
|
||||
@ -673,7 +673,7 @@ void flushAllDataAndResetRDB(int flags) {
|
||||
if (server.saveparamslen > 0) {
|
||||
rdbSaveInfo rsi, *rsiptr;
|
||||
rsiptr = rdbPopulateSaveInfo(&rsi);
|
||||
rdbSave(SLAVE_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE);
|
||||
rdbSave(REPLICA_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE);
|
||||
}
|
||||
|
||||
#if defined(USE_JEMALLOC)
|
||||
@ -1610,7 +1610,7 @@ void swapMainDbWithTempDb(serverDb *tempDb) {
|
||||
}
|
||||
|
||||
trackingInvalidateKeysOnFlush(1);
|
||||
flushSlaveKeysWithExpireList();
|
||||
flushReplicaKeysWithExpireList();
|
||||
}
|
||||
|
||||
/* SWAPDB db1 db2 */
|
||||
@ -1666,8 +1666,8 @@ void setExpire(client *c, serverDb *db, robj *key, long long when) {
|
||||
dictSetSignedIntegerVal(de, when);
|
||||
}
|
||||
|
||||
int writable_slave = server.masterhost && server.repl_slave_ro == 0;
|
||||
if (c && writable_slave && !(c->flags & CLIENT_MASTER)) rememberSlaveKeyWithExpire(db, key);
|
||||
int writable_replica = server.primary_host && server.repl_replica_ro == 0;
|
||||
if (c && writable_replica && !(c->flags & CLIENT_PRIMARY)) rememberReplicaKeyWithExpire(db, key);
|
||||
}
|
||||
|
||||
/* Return the expire time of the specified key, or -1 if no expire
|
||||
@ -1694,7 +1694,7 @@ void deleteExpiredKeyAndPropagate(serverDb *db, robj *keyobj) {
|
||||
}
|
||||
|
||||
/* Propagate an implicit key deletion into replicas and the AOF file.
|
||||
* When a key was deleted in the master by eviction, expiration or a similar
|
||||
* When a key was deleted in the primary by eviction, expiration or a similar
|
||||
* mechanism a DEL/UNLINK operation for this key is sent
|
||||
* to all the replicas and the AOF file if enabled.
|
||||
*
|
||||
@ -1720,7 +1720,7 @@ void propagateDeletion(serverDb *db, robj *key, int lazy) {
|
||||
incrRefCount(argv[0]);
|
||||
incrRefCount(argv[1]);
|
||||
|
||||
/* If the master decided to delete a key we must propagate it to replicas no matter what.
|
||||
/* If the primary decided to delete a key we must propagate it to replicas no matter what.
|
||||
* Even if module executed a command without asking for propagation. */
|
||||
int prev_replication_allowed = server.replication_allowed;
|
||||
server.replication_allowed = 1;
|
||||
@ -1755,13 +1755,13 @@ int keyIsExpired(serverDb *db, robj *key) {
|
||||
*
|
||||
* The behavior of the function depends on the replication role of the
|
||||
* instance, because by default replicas do not delete expired keys. They
|
||||
* wait for DELs from the master for consistency matters. However even
|
||||
* wait for DELs from the primary for consistency matters. However even
|
||||
* replicas will try to have a coherent return value for the function,
|
||||
* so that read commands executed in the replica side will be able to
|
||||
* behave like if the key is expired even if still present (because the
|
||||
* master has yet to propagate the DEL).
|
||||
* primary has yet to propagate the DEL).
|
||||
*
|
||||
* In masters as a side effect of finding a key which is expired, such
|
||||
* In primary as a side effect of finding a key which is expired, such
|
||||
* key will be evicted from the database. Also this may trigger the
|
||||
* propagation of a DEL/UNLINK command in AOF / replication stream.
|
||||
*
|
||||
@ -1769,7 +1769,7 @@ int keyIsExpired(serverDb *db, robj *key) {
|
||||
* it still returns KEY_EXPIRED if the key is logically expired. To force deletion
|
||||
* of logically expired keys even on replicas, use the EXPIRE_FORCE_DELETE_EXPIRED
|
||||
* flag. Note though that if the current client is executing
|
||||
* replicated commands from the master, keys are never considered expired.
|
||||
* replicated commands from the primary, keys are never considered expired.
|
||||
*
|
||||
* On the other hand, if you just want expiration check, but need to avoid
|
||||
* the actual key deletion and propagation of the deletion, use the
|
||||
@ -1784,7 +1784,7 @@ keyStatus expireIfNeeded(serverDb *db, robj *key, int flags) {
|
||||
|
||||
/* If we are running in the context of a replica, instead of
|
||||
* evicting the expired key from the database, we return ASAP:
|
||||
* the replica key expiration is controlled by the master that will
|
||||
* the replica key expiration is controlled by the primary that will
|
||||
* send us synthesized DEL operations for expired keys. The
|
||||
* exception is when write operations are performed on writable
|
||||
* replicas.
|
||||
@ -1793,15 +1793,15 @@ keyStatus expireIfNeeded(serverDb *db, robj *key, int flags) {
|
||||
* that is, KEY_VALID if we think the key should still be valid,
|
||||
* KEY_EXPIRED if we think the key is expired but don't want to delete it at this time.
|
||||
*
|
||||
* When replicating commands from the master, keys are never considered
|
||||
* When replicating commands from the primary, keys are never considered
|
||||
* expired. */
|
||||
if (server.masterhost != NULL) {
|
||||
if (server.current_client && (server.current_client->flags & CLIENT_MASTER)) return KEY_VALID;
|
||||
if (server.primary_host != NULL) {
|
||||
if (server.current_client && (server.current_client->flags & CLIENT_PRIMARY)) return KEY_VALID;
|
||||
if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return KEY_EXPIRED;
|
||||
}
|
||||
|
||||
/* In some cases we're explicitly instructed to return an indication of a
|
||||
* missing key without actually deleting it, even on masters. */
|
||||
* missing key without actually deleting it, even on primaries. */
|
||||
if (flags & EXPIRE_AVOID_DELETE_EXPIRED) return KEY_EXPIRED;
|
||||
|
||||
/* If 'expire' action is paused, for whatever reason, then don't expire any key.
|
||||
|
@ -552,7 +552,7 @@ void debugCommand(client *c) {
|
||||
if (save) {
|
||||
rdbSaveInfo rsi, *rsiptr;
|
||||
rsiptr = rdbPopulateSaveInfo(&rsi);
|
||||
if (rdbSave(SLAVE_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE) != C_OK) {
|
||||
if (rdbSave(REPLICA_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE) != C_OK) {
|
||||
addReplyErrorObject(c, shared.err);
|
||||
return;
|
||||
}
|
||||
@ -845,7 +845,7 @@ void debugCommand(client *c) {
|
||||
server.aof_flush_sleep = atoi(c->argv[2]->ptr);
|
||||
addReply(c, shared.ok);
|
||||
} else if (!strcasecmp(c->argv[1]->ptr, "replicate") && c->argc >= 3) {
|
||||
replicationFeedSlaves(-1, c->argv + 2, c->argc - 2);
|
||||
replicationFeedReplicas(-1, c->argv + 2, c->argc - 2);
|
||||
addReply(c, shared.ok);
|
||||
} else if (!strcasecmp(c->argv[1]->ptr, "error") && c->argc == 3) {
|
||||
sds errstr = sdsnewlen("-", 1);
|
||||
|
16
src/evict.c
16
src/evict.c
@ -321,7 +321,7 @@ unsigned long LFUDecrAndReturn(robj *o) {
|
||||
return counter;
|
||||
}
|
||||
|
||||
/* We don't want to count AOF buffers and slaves output buffers as
|
||||
/* We don't want to count AOF buffers and replicas output buffers as
|
||||
* used memory: the eviction should use mostly data size, because
|
||||
* it can cause feedback-loop when we push DELs into them, putting
|
||||
* more and more DELs will make them bigger, if we count them, we
|
||||
@ -377,7 +377,7 @@ size_t freeMemoryGetNotCountedMemory(void) {
|
||||
* 'total' total amount of bytes used.
|
||||
* (Populated both for C_ERR and C_OK)
|
||||
*
|
||||
* 'logical' the amount of memory used minus the slaves/AOF buffers.
|
||||
* 'logical' the amount of memory used minus the replicas/AOF buffers.
|
||||
* (Populated when C_ERR is returned)
|
||||
*
|
||||
* 'tofree' the amount of memory that should be released
|
||||
@ -393,7 +393,7 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev
|
||||
size_t mem_reported, mem_used, mem_tofree;
|
||||
|
||||
/* Check if we are over the memory usage limit. If we are not, no need
|
||||
* to subtract the slaves output buffers. We can just return ASAP. */
|
||||
* to subtract the replicas output buffers. We can just return ASAP. */
|
||||
mem_reported = zmalloc_used_memory();
|
||||
if (total) *total = mem_reported;
|
||||
|
||||
@ -404,7 +404,7 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev
|
||||
}
|
||||
if (mem_reported <= server.maxmemory && !level) return C_OK;
|
||||
|
||||
/* Remove the size of slaves output buffers and AOF buffer from the
|
||||
/* Remove the size of replicas output buffers and AOF buffer from the
|
||||
* count of used memory. */
|
||||
mem_used = mem_reported;
|
||||
size_t overhead = freeMemoryGetNotCountedMemory();
|
||||
@ -477,8 +477,8 @@ static int isSafeToPerformEvictions(void) {
|
||||
if (isInsideYieldingLongCommand() || server.loading) return 0;
|
||||
|
||||
/* By default replicas should ignore maxmemory
|
||||
* and just be masters exact copies. */
|
||||
if (server.masterhost && server.repl_slave_ignore_maxmemory) return 0;
|
||||
* and just be primaries exact copies. */
|
||||
if (server.primary_host && server.repl_replica_ignore_maxmemory) return 0;
|
||||
|
||||
/* If 'evict' action is paused, for whatever reason, then return false */
|
||||
if (isPausedActionsWithUpdate(PAUSE_ACTION_EVICT)) return 0;
|
||||
@ -538,7 +538,7 @@ int performEvictions(void) {
|
||||
long long mem_freed = 0; /* Maybe become negative */
|
||||
mstime_t latency, eviction_latency;
|
||||
long long delta;
|
||||
int slaves = listLength(server.slaves);
|
||||
int replicas = listLength(server.replicas);
|
||||
int result = EVICT_FAIL;
|
||||
|
||||
if (getMaxmemoryState(&mem_reported, NULL, &mem_tofree, NULL) == C_OK) {
|
||||
@ -697,7 +697,7 @@ int performEvictions(void) {
|
||||
* start spending so much time here that is impossible to
|
||||
* deliver data to the replicas fast enough, so we force the
|
||||
* transmission here inside the loop. */
|
||||
if (slaves) flushSlavesOutputBuffers();
|
||||
if (replicas) flushReplicasOutputBuffers();
|
||||
|
||||
/* Normally our stop condition is the ability to release
|
||||
* a fixed, pre-computed amount of memory. However when we
|
||||
|
76
src/expire.c
76
src/expire.c
@ -368,21 +368,21 @@ void activeExpireCycle(int type) {
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------------------
|
||||
* Expires of keys created in writable slaves
|
||||
* Expires of keys created in writable replicas
|
||||
*
|
||||
* Normally slaves do not process expires: they wait the masters to synthesize
|
||||
* DEL operations in order to retain consistency. However writable slaves are
|
||||
* an exception: if a key is created in the slave and an expire is assigned
|
||||
* to it, we need a way to expire such a key, since the master does not know
|
||||
* Normally replicas do not process expires: they wait the primaries to synthesize
|
||||
* DEL operations in order to retain consistency. However writable replicas are
|
||||
* an exception: if a key is created in the replica and an expire is assigned
|
||||
* to it, we need a way to expire such a key, since the primary does not know
|
||||
* anything about such a key.
|
||||
*
|
||||
* In order to do so, we track keys created in the slave side with an expire
|
||||
* set, and call the expireSlaveKeys() function from time to time in order to
|
||||
* In order to do so, we track keys created in the replica side with an expire
|
||||
* set, and call the expirereplicaKeys() function from time to time in order to
|
||||
* reclaim the keys if they already expired.
|
||||
*
|
||||
* Note that the use case we are trying to cover here, is a popular one where
|
||||
* slaves are put in writable mode in order to compute slow operations in
|
||||
* the slave side that are mostly useful to actually read data in a more
|
||||
* replicas are put in writable mode in order to compute slow operations in
|
||||
* the replica side that are mostly useful to actually read data in a more
|
||||
* processed way. Think at sets intersections in a tmp key, with an expire so
|
||||
* that it is also used as a cache to avoid intersecting every time.
|
||||
*
|
||||
@ -391,9 +391,9 @@ void activeExpireCycle(int type) {
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
/* The dictionary where we remember key names and database ID of keys we may
|
||||
* want to expire from the slave. Since this function is not often used we
|
||||
* want to expire from the replica. Since this function is not often used we
|
||||
* don't even care to initialize the database at startup. We'll do it once
|
||||
* the feature is used the first time, that is, when rememberSlaveKeyWithExpire()
|
||||
* the feature is used the first time, that is, when rememberreplicaKeyWithExpire()
|
||||
* is called.
|
||||
*
|
||||
* The dictionary has an SDS string representing the key as the hash table
|
||||
@ -402,17 +402,17 @@ void activeExpireCycle(int type) {
|
||||
* with a DB id > 63 are not expired, but a trivial fix is to set the bitmap
|
||||
* to the max 64 bit unsigned value when we know there is a key with a DB
|
||||
* ID greater than 63, and check all the configured DBs in such a case. */
|
||||
dict *slaveKeysWithExpire = NULL;
|
||||
dict *replicaKeysWithExpire = NULL;
|
||||
|
||||
/* Check the set of keys created by the master with an expire set in order to
|
||||
/* Check the set of keys created by the primary with an expire set in order to
|
||||
* check if they should be evicted. */
|
||||
void expireSlaveKeys(void) {
|
||||
if (slaveKeysWithExpire == NULL || dictSize(slaveKeysWithExpire) == 0) return;
|
||||
void expireReplicaKeys(void) {
|
||||
if (replicaKeysWithExpire == NULL || dictSize(replicaKeysWithExpire) == 0) return;
|
||||
|
||||
int cycles = 0, noexpire = 0;
|
||||
mstime_t start = mstime();
|
||||
while (1) {
|
||||
dictEntry *de = dictGetRandomKey(slaveKeysWithExpire);
|
||||
dictEntry *de = dictGetRandomKey(replicaKeysWithExpire);
|
||||
sds keyname = dictGetKey(de);
|
||||
uint64_t dbids = dictGetUnsignedIntegerVal(de);
|
||||
uint64_t new_dbids = 0;
|
||||
@ -447,26 +447,26 @@ void expireSlaveKeys(void) {
|
||||
}
|
||||
|
||||
/* Set the new bitmap as value of the key, in the dictionary
|
||||
* of keys with an expire set directly in the writable slave. Otherwise
|
||||
* of keys with an expire set directly in the writable replica. Otherwise
|
||||
* if the bitmap is zero, we no longer need to keep track of it. */
|
||||
if (new_dbids)
|
||||
dictSetUnsignedIntegerVal(de, new_dbids);
|
||||
else
|
||||
dictDelete(slaveKeysWithExpire, keyname);
|
||||
dictDelete(replicaKeysWithExpire, keyname);
|
||||
|
||||
/* Stop conditions: found 3 keys we can't expire in a row or
|
||||
* time limit was reached. */
|
||||
cycles++;
|
||||
if (noexpire > 3) break;
|
||||
if ((cycles % 64) == 0 && mstime() - start > 1) break;
|
||||
if (dictSize(slaveKeysWithExpire) == 0) break;
|
||||
if (dictSize(replicaKeysWithExpire) == 0) break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Track keys that received an EXPIRE or similar command in the context
|
||||
* of a writable slave. */
|
||||
void rememberSlaveKeyWithExpire(serverDb *db, robj *key) {
|
||||
if (slaveKeysWithExpire == NULL) {
|
||||
* of a writable replica. */
|
||||
void rememberReplicaKeyWithExpire(serverDb *db, robj *key) {
|
||||
if (replicaKeysWithExpire == NULL) {
|
||||
static dictType dt = {
|
||||
dictSdsHash, /* hash function */
|
||||
NULL, /* key dup */
|
||||
@ -475,17 +475,17 @@ void rememberSlaveKeyWithExpire(serverDb *db, robj *key) {
|
||||
NULL, /* val destructor */
|
||||
NULL /* allow to expand */
|
||||
};
|
||||
slaveKeysWithExpire = dictCreate(&dt);
|
||||
replicaKeysWithExpire = dictCreate(&dt);
|
||||
}
|
||||
if (db->id > 63) return;
|
||||
|
||||
dictEntry *de = dictAddOrFind(slaveKeysWithExpire, key->ptr);
|
||||
dictEntry *de = dictAddOrFind(replicaKeysWithExpire, key->ptr);
|
||||
/* If the entry was just created, set it to a copy of the SDS string
|
||||
* representing the key: we don't want to need to take those keys
|
||||
* in sync with the main DB. The keys will be removed by expireSlaveKeys()
|
||||
* in sync with the main DB. The keys will be removed by expireReplicaKeys()
|
||||
* as it scans to find keys to remove. */
|
||||
if (dictGetKey(de) == key->ptr) {
|
||||
dictSetKey(slaveKeysWithExpire, de, sdsdup(key->ptr));
|
||||
dictSetKey(replicaKeysWithExpire, de, sdsdup(key->ptr));
|
||||
dictSetUnsignedIntegerVal(de, 0);
|
||||
}
|
||||
|
||||
@ -495,34 +495,34 @@ void rememberSlaveKeyWithExpire(serverDb *db, robj *key) {
|
||||
}
|
||||
|
||||
/* Return the number of keys we are tracking. */
|
||||
size_t getSlaveKeyWithExpireCount(void) {
|
||||
if (slaveKeysWithExpire == NULL) return 0;
|
||||
return dictSize(slaveKeysWithExpire);
|
||||
size_t getReplicaKeyWithExpireCount(void) {
|
||||
if (replicaKeysWithExpire == NULL) return 0;
|
||||
return dictSize(replicaKeysWithExpire);
|
||||
}
|
||||
|
||||
/* Remove the keys in the hash table. We need to do that when data is
|
||||
* flushed from the server. We may receive new keys from the master with
|
||||
* flushed from the server. We may receive new keys from the primary with
|
||||
* the same name/db and it is no longer a good idea to expire them.
|
||||
*
|
||||
* Note: technically we should handle the case of a single DB being flushed
|
||||
* but it is not worth it since anyway race conditions using the same set
|
||||
* of key names in a writable slave and in its master will lead to
|
||||
* of key names in a writable replica and in its primary will lead to
|
||||
* inconsistencies. This is just a best-effort thing we do. */
|
||||
void flushSlaveKeysWithExpireList(void) {
|
||||
if (slaveKeysWithExpire) {
|
||||
dictRelease(slaveKeysWithExpire);
|
||||
slaveKeysWithExpire = NULL;
|
||||
void flushReplicaKeysWithExpireList(void) {
|
||||
if (replicaKeysWithExpire) {
|
||||
dictRelease(replicaKeysWithExpire);
|
||||
replicaKeysWithExpire = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int checkAlreadyExpired(long long when) {
|
||||
/* EXPIRE with negative TTL, or EXPIREAT with a timestamp into the past
|
||||
* should never be executed as a DEL when load the AOF or in the context
|
||||
* of a slave instance.
|
||||
* of a replica instance.
|
||||
*
|
||||
* Instead we add the already expired key to the database with expire time
|
||||
* (possibly in the past) and wait for an explicit DEL from the master. */
|
||||
return (when <= commandTimeSnapshot() && !server.loading && !server.masterhost);
|
||||
* (possibly in the past) and wait for an explicit DEL from the primary. */
|
||||
return (when <= commandTimeSnapshot() && !server.loading && !server.primary_host);
|
||||
}
|
||||
|
||||
#define EXPIRE_NX (1 << 0)
|
||||
|
@ -903,8 +903,8 @@ promote: /* Promote to dense representat
|
||||
* convert from sparse to dense a register requires to be updated.
|
||||
*
|
||||
* Note that this in turn means that PFADD will make sure the command
|
||||
* is propagated to slaves / AOF, so if there is a sparse -> dense
|
||||
* conversion, it will be performed in all the slaves as well. */
|
||||
* is propagated to replicas / AOF, so if there is a sparse -> dense
|
||||
* conversion, it will be performed in all the replicas as well. */
|
||||
int dense_retval = hllDenseSet(hdr->registers, index, count);
|
||||
serverAssert(dense_retval == 1);
|
||||
return dense_retval;
|
||||
|
@ -78,10 +78,10 @@ static int reqresShouldLog(client *c) {
|
||||
if (!server.req_res_logfile) return 0;
|
||||
|
||||
/* Ignore client with streaming non-standard response */
|
||||
if (c->flags & (CLIENT_PUBSUB | CLIENT_MONITOR | CLIENT_SLAVE)) return 0;
|
||||
if (c->flags & (CLIENT_PUBSUB | CLIENT_MONITOR | CLIENT_REPLICA)) return 0;
|
||||
|
||||
/* We only work on masters (didn't implement reqresAppendResponse to work on shared slave buffers) */
|
||||
if (getClientType(c) == CLIENT_TYPE_MASTER) return 0;
|
||||
/* We only work on primaries (didn't implement reqresAppendResponse to work on shared replica buffers) */
|
||||
if (getClientType(c) == CLIENT_TYPE_PRIMARY) return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
112
src/module.c
112
src/module.c
@ -1220,7 +1220,7 @@ ValkeyModuleCommand *moduleCreateCommandProxy(struct ValkeyModule *module,
|
||||
* Starting from Redis OSS 7.0 this flag has been deprecated.
|
||||
* Declaring a command as "random" can be done using
|
||||
* command tips, see https://valkey.io/topics/command-tips.
|
||||
* * **"allow-stale"**: The command is allowed to run on slaves that don't
|
||||
* * **"allow-stale"**: The command is allowed to run on replicas that don't
|
||||
* serve stale data. Don't use if you don't know what
|
||||
* this means.
|
||||
* * **"no-monitor"**: Don't propagate the command on monitor. Use this if
|
||||
@ -3491,7 +3491,7 @@ int VM_ReplyWithLongDouble(ValkeyModuleCtx *ctx, long double ld) {
|
||||
* ## Commands replication API
|
||||
* -------------------------------------------------------------------------- */
|
||||
|
||||
/* Replicate the specified command and arguments to slaves and AOF, as effect
|
||||
/* Replicate the specified command and arguments to replicas and AOF, as effect
|
||||
* of execution of the calling command implementation.
|
||||
*
|
||||
* The replicated commands are always wrapped into the MULTI/EXEC that
|
||||
@ -3565,7 +3565,7 @@ int VM_Replicate(ValkeyModuleCtx *ctx, const char *cmdname, const char *fmt, ...
|
||||
* commands.
|
||||
*
|
||||
* Basically this form of replication is useful when you want to propagate
|
||||
* the command to the slaves and AOF file exactly as it was called, since
|
||||
* the command to the replicas and AOF file exactly as it was called, since
|
||||
* the command can just be re-executed to deterministically re-create the
|
||||
* new state starting from the old one.
|
||||
*
|
||||
@ -3664,12 +3664,12 @@ int modulePopulateReplicationInfoStructure(void *ri, int structver) {
|
||||
ValkeyModuleReplicationInfoV1 *ri1 = ri;
|
||||
memset(ri1, 0, sizeof(*ri1));
|
||||
ri1->version = structver;
|
||||
ri1->master = server.masterhost == NULL;
|
||||
ri1->masterhost = server.masterhost ? server.masterhost : "";
|
||||
ri1->masterport = server.masterport;
|
||||
ri1->primary = server.primary_host == NULL;
|
||||
ri1->primary_host = server.primary_host ? server.primary_host : "";
|
||||
ri1->primary_port = server.primary_port;
|
||||
ri1->replid1 = server.replid;
|
||||
ri1->replid2 = server.replid2;
|
||||
ri1->repl1_offset = server.master_repl_offset;
|
||||
ri1->repl1_offset = server.primary_repl_offset;
|
||||
ri1->repl2_offset = server.second_replid_offset;
|
||||
return VALKEYMODULE_OK;
|
||||
}
|
||||
@ -3794,7 +3794,7 @@ int VM_GetSelectedDb(ValkeyModuleCtx *ctx) {
|
||||
* * VALKEYMODULE_CTX_FLAGS_MULTI: The command is running inside a transaction
|
||||
*
|
||||
* * VALKEYMODULE_CTX_FLAGS_REPLICATED: The command was sent over the replication
|
||||
* link by the MASTER
|
||||
* link by the PRIMARY
|
||||
*
|
||||
* * VALKEYMODULE_CTX_FLAGS_PRIMARY: The instance is a primary
|
||||
*
|
||||
@ -3821,16 +3821,16 @@ int VM_GetSelectedDb(ValkeyModuleCtx *ctx) {
|
||||
*
|
||||
* * VALKEYMODULE_CTX_FLAGS_LOADING: Server is loading RDB/AOF
|
||||
*
|
||||
* * VALKEYMODULE_CTX_FLAGS_REPLICA_IS_STALE: No active link with the master.
|
||||
* * VALKEYMODULE_CTX_FLAGS_REPLICA_IS_STALE: No active link with the primary.
|
||||
*
|
||||
* * VALKEYMODULE_CTX_FLAGS_REPLICA_IS_CONNECTING: The replica is trying to
|
||||
* connect with the master.
|
||||
* connect with the primary.
|
||||
*
|
||||
* * VALKEYMODULE_CTX_FLAGS_REPLICA_IS_TRANSFERRING: Master -> Replica RDB
|
||||
* * VALKEYMODULE_CTX_FLAGS_REPLICA_IS_TRANSFERRING: primary -> Replica RDB
|
||||
* transfer is in progress.
|
||||
*
|
||||
* * VALKEYMODULE_CTX_FLAGS_REPLICA_IS_ONLINE: The replica has an active link
|
||||
* with its master. This is the
|
||||
* with its primary. This is the
|
||||
* contrary of STALE state.
|
||||
*
|
||||
* * VALKEYMODULE_CTX_FLAGS_ACTIVE_CHILD: There is currently some background
|
||||
@ -3854,8 +3854,8 @@ int VM_GetContextFlags(ValkeyModuleCtx *ctx) {
|
||||
if (ctx) {
|
||||
if (ctx->client) {
|
||||
if (ctx->client->flags & CLIENT_DENY_BLOCKING) flags |= VALKEYMODULE_CTX_FLAGS_DENY_BLOCKING;
|
||||
/* Module command received from MASTER, is replicated. */
|
||||
if (ctx->client->flags & CLIENT_MASTER) flags |= VALKEYMODULE_CTX_FLAGS_REPLICATED;
|
||||
/* Module command received from PRIMARY, is replicated. */
|
||||
if (ctx->client->flags & CLIENT_PRIMARY) flags |= VALKEYMODULE_CTX_FLAGS_REPLICATED;
|
||||
if (ctx->client->resp == 3) {
|
||||
flags |= VALKEYMODULE_CTX_FLAGS_RESP3;
|
||||
}
|
||||
@ -3880,7 +3880,7 @@ int VM_GetContextFlags(ValkeyModuleCtx *ctx) {
|
||||
flags |= VALKEYMODULE_CTX_FLAGS_LOADING;
|
||||
|
||||
/* Maxmemory and eviction policy */
|
||||
if (server.maxmemory > 0 && (!server.masterhost || !server.repl_slave_ignore_maxmemory)) {
|
||||
if (server.maxmemory > 0 && (!server.primary_host || !server.repl_replica_ignore_maxmemory)) {
|
||||
flags |= VALKEYMODULE_CTX_FLAGS_MAXMEMORY;
|
||||
|
||||
if (server.maxmemory_policy != MAXMEMORY_NO_EVICTION) flags |= VALKEYMODULE_CTX_FLAGS_EVICT;
|
||||
@ -3891,11 +3891,11 @@ int VM_GetContextFlags(ValkeyModuleCtx *ctx) {
|
||||
if (server.saveparamslen > 0) flags |= VALKEYMODULE_CTX_FLAGS_RDB;
|
||||
|
||||
/* Replication flags */
|
||||
if (server.masterhost == NULL) {
|
||||
if (server.primary_host == NULL) {
|
||||
flags |= VALKEYMODULE_CTX_FLAGS_PRIMARY;
|
||||
} else {
|
||||
flags |= VALKEYMODULE_CTX_FLAGS_REPLICA;
|
||||
if (server.repl_slave_ro) flags |= VALKEYMODULE_CTX_FLAGS_READONLY;
|
||||
if (server.repl_replica_ro) flags |= VALKEYMODULE_CTX_FLAGS_READONLY;
|
||||
|
||||
/* Replica state flags. */
|
||||
if (server.repl_state == REPL_STATE_CONNECT || server.repl_state == REPL_STATE_CONNECTING) {
|
||||
@ -3927,16 +3927,16 @@ int VM_GetContextFlags(ValkeyModuleCtx *ctx) {
|
||||
|
||||
/* Returns true if a client sent the CLIENT PAUSE command to the server or
|
||||
* if the Cluster does a manual failover, pausing the clients.
|
||||
* This is needed when we have a master with replicas, and want to write,
|
||||
* This is needed when we have a primary with replicas, and want to write,
|
||||
* without adding further data to the replication channel, that the replicas
|
||||
* replication offset, match the one of the master. When this happens, it is
|
||||
* safe to failover the master without data loss.
|
||||
* replication offset, match the one of the primary. When this happens, it is
|
||||
* safe to failover the primary without data loss.
|
||||
*
|
||||
* However modules may generate traffic by calling ValkeyModule_Call() with
|
||||
* the "!" flag, or by calling ValkeyModule_Replicate(), in a context outside
|
||||
* commands execution, for instance in timeout callbacks, threads safe
|
||||
* contexts, and so forth. When modules will generate too much traffic, it
|
||||
* will be hard for the master and replicas offset to match, because there
|
||||
* will be hard for the primary and replicas offset to match, because there
|
||||
* is more data to send in the replication channel.
|
||||
*
|
||||
* So modules may want to try to avoid very heavy background work that has
|
||||
@ -6369,21 +6369,21 @@ ValkeyModuleCallReply *VM_Call(ValkeyModuleCtx *ctx, const char *cmdname, const
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (server.masterhost && server.repl_slave_ro && !obey_client) {
|
||||
if (server.primary_host && server.repl_replica_ro && !obey_client) {
|
||||
errno = ESPIPE;
|
||||
if (error_as_call_replies) {
|
||||
sds msg = sdsdup(shared.roslaveerr->ptr);
|
||||
sds msg = sdsdup(shared.roreplicaerr->ptr);
|
||||
reply = callReplyCreateError(msg, ctx);
|
||||
}
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED && server.repl_serve_stale_data == 0 &&
|
||||
if (server.primary_host && server.repl_state != REPL_STATE_CONNECTED && server.repl_serve_stale_data == 0 &&
|
||||
!(cmd_flags & CMD_STALE)) {
|
||||
errno = ESPIPE;
|
||||
if (error_as_call_replies) {
|
||||
sds msg = sdsdup(shared.masterdownerr->ptr);
|
||||
sds msg = sdsdup(shared.primarydownerr->ptr);
|
||||
reply = callReplyCreateError(msg, ctx);
|
||||
}
|
||||
goto cleanup;
|
||||
@ -6418,7 +6418,7 @@ ValkeyModuleCallReply *VM_Call(ValkeyModuleCtx *ctx, const char *cmdname, const
|
||||
|
||||
/* If this is a Cluster node, we need to make sure the module is not
|
||||
* trying to access non-local keys, with the exception of commands
|
||||
* received from our master. */
|
||||
* received from our primary. */
|
||||
if (server.cluster_enabled && !mustObeyClient(ctx->client)) {
|
||||
int error_code;
|
||||
/* Duplicate relevant flags in the module client. */
|
||||
@ -8293,7 +8293,7 @@ void moduleHandleBlockedClients(void) {
|
||||
|
||||
/* Update the wait offset, we don't know if this blocked client propagated anything,
|
||||
* currently we rather not add any API for that, so we just assume it did. */
|
||||
c->woff = server.master_repl_offset;
|
||||
c->woff = server.primary_repl_offset;
|
||||
|
||||
/* Put the client in the list of clients that need to write
|
||||
* if there are pending replies here. This is needed since
|
||||
@ -8687,7 +8687,7 @@ int VM_AddPostNotificationJob(ValkeyModuleCtx *ctx,
|
||||
ValkeyModulePostNotificationJobFunc callback,
|
||||
void *privdata,
|
||||
void (*free_privdata)(void *)) {
|
||||
if (server.loading || (server.masterhost && server.repl_slave_ro)) {
|
||||
if (server.loading || (server.primary_host && server.repl_replica_ro)) {
|
||||
return VALKEYMODULE_ERR;
|
||||
}
|
||||
ValkeyModulePostExecUnitJob *job = zmalloc(sizeof(*job));
|
||||
@ -8812,7 +8812,7 @@ typedef struct moduleClusterNodeInfo {
|
||||
int flags;
|
||||
char ip[NET_IP_STR_LEN];
|
||||
int port;
|
||||
char master_id[40]; /* Only if flags & VALKEYMODULE_NODE_PRIMARY is true. */
|
||||
char primary_id[40]; /* Only if flags & VALKEYMODULE_NODE_PRIMARY is true. */
|
||||
} mdouleClusterNodeInfo;
|
||||
|
||||
/* We have an array of message types: each bucket is a linked list of
|
||||
@ -8955,11 +8955,11 @@ size_t VM_GetClusterSize(void) {
|
||||
* or the node ID does not exist from the POV of this local node, VALKEYMODULE_ERR
|
||||
* is returned.
|
||||
*
|
||||
* The arguments `ip`, `master_id`, `port` and `flags` can be NULL in case we don't
|
||||
* need to populate back certain info. If an `ip` and `master_id` (only populated
|
||||
* if the instance is a slave) are specified, they point to buffers holding
|
||||
* The arguments `ip`, `primary_id`, `port` and `flags` can be NULL in case we don't
|
||||
* need to populate back certain info. If an `ip` and `primary_id` (only populated
|
||||
* if the instance is a replica) are specified, they point to buffers holding
|
||||
* at least VALKEYMODULE_NODE_ID_LEN bytes. The strings written back as `ip`
|
||||
* and `master_id` are not null terminated.
|
||||
* and `primary_id` are not null terminated.
|
||||
*
|
||||
* The list of flags reported is the following:
|
||||
*
|
||||
@ -8968,9 +8968,9 @@ size_t VM_GetClusterSize(void) {
|
||||
* * VALKEYMODULE_NODE_REPLICA: The node is a replica
|
||||
* * VALKEYMODULE_NODE_PFAIL: We see the node as failing
|
||||
* * VALKEYMODULE_NODE_FAIL: The cluster agrees the node is failing
|
||||
* * VALKEYMODULE_NODE_NOFAILOVER: The slave is configured to never failover
|
||||
* * VALKEYMODULE_NODE_NOFAILOVER: The replica is configured to never failover
|
||||
*/
|
||||
int VM_GetClusterNodeInfo(ValkeyModuleCtx *ctx, const char *id, char *ip, char *master_id, int *port, int *flags) {
|
||||
int VM_GetClusterNodeInfo(ValkeyModuleCtx *ctx, const char *id, char *ip, char *primary_id, int *port, int *flags) {
|
||||
UNUSED(ctx);
|
||||
|
||||
clusterNode *node = clusterLookupNode(id, strlen(id));
|
||||
@ -8980,14 +8980,14 @@ int VM_GetClusterNodeInfo(ValkeyModuleCtx *ctx, const char *id, char *ip, char *
|
||||
|
||||
if (ip) valkey_strlcpy(ip, clusterNodeIp(node), NET_IP_STR_LEN);
|
||||
|
||||
if (master_id) {
|
||||
if (primary_id) {
|
||||
/* If the information is not available, the function will set the
|
||||
* field to zero bytes, so that when the field can't be populated the
|
||||
* function kinda remains predictable. */
|
||||
if (clusterNodeIsSlave(node) && clusterNodeGetMaster(node))
|
||||
memcpy(master_id, clusterNodeGetName(clusterNodeGetMaster(node)), VALKEYMODULE_NODE_ID_LEN);
|
||||
if (clusterNodeIsReplica(node) && clusterNodeGetPrimary(node))
|
||||
memcpy(primary_id, clusterNodeGetName(clusterNodeGetPrimary(node)), VALKEYMODULE_NODE_ID_LEN);
|
||||
else
|
||||
memset(master_id, 0, VALKEYMODULE_NODE_ID_LEN);
|
||||
memset(primary_id, 0, VALKEYMODULE_NODE_ID_LEN);
|
||||
}
|
||||
if (port) *port = getNodeDefaultClientPort(node);
|
||||
|
||||
@ -8996,8 +8996,8 @@ int VM_GetClusterNodeInfo(ValkeyModuleCtx *ctx, const char *id, char *ip, char *
|
||||
if (flags) {
|
||||
*flags = 0;
|
||||
if (clusterNodeIsMyself(node)) *flags |= VALKEYMODULE_NODE_MYSELF;
|
||||
if (clusterNodeIsMaster(node)) *flags |= VALKEYMODULE_NODE_PRIMARY;
|
||||
if (clusterNodeIsSlave(node)) *flags |= VALKEYMODULE_NODE_REPLICA;
|
||||
if (clusterNodeIsPrimary(node)) *flags |= VALKEYMODULE_NODE_PRIMARY;
|
||||
if (clusterNodeIsReplica(node)) *flags |= VALKEYMODULE_NODE_REPLICA;
|
||||
if (clusterNodeTimedOut(node)) *flags |= VALKEYMODULE_NODE_PFAIL;
|
||||
if (clusterNodeIsFailing(node)) *flags |= VALKEYMODULE_NODE_FAIL;
|
||||
if (clusterNodeIsNoFailover(node)) *flags |= VALKEYMODULE_NODE_NOFAILOVER;
|
||||
@ -9016,7 +9016,7 @@ int VM_GetClusterNodeInfo(ValkeyModuleCtx *ctx, const char *id, char *ip, char *
|
||||
*
|
||||
* With the following effects:
|
||||
*
|
||||
* * NO_FAILOVER: prevent Cluster slaves from failing over a dead master.
|
||||
* * NO_FAILOVER: prevent Cluster replicas from failing over a dead primary.
|
||||
* Also disables the replica migration feature.
|
||||
*
|
||||
* * NO_REDIRECTION: Every node will accept any key, without trying to perform
|
||||
@ -10594,7 +10594,7 @@ int moduleUnregisterFilters(ValkeyModule *module) {
|
||||
* 1. Invocation by a client.
|
||||
* 2. Invocation through `ValkeyModule_Call()` by any module.
|
||||
* 3. Invocation through Lua `redis.call()`.
|
||||
* 4. Replication of a command from a master.
|
||||
* 4. Replication of a command from a primary.
|
||||
*
|
||||
* The filter executes in a special filter context, which is different and more
|
||||
* limited than a ValkeyModuleCtx. Because the filter affects any command, it
|
||||
@ -11243,10 +11243,10 @@ static uint64_t moduleEventVersions[] = {
|
||||
*
|
||||
* * ValkeyModuleEvent_ReplicationRoleChanged:
|
||||
*
|
||||
* This event is called when the instance switches from master
|
||||
* This event is called when the instance switches from primary
|
||||
* to replica or the other way around, however the event is
|
||||
* also called when the replica remains a replica but starts to
|
||||
* replicate with a different master.
|
||||
* replicate with a different primary.
|
||||
*
|
||||
* The following sub events are available:
|
||||
*
|
||||
@ -11256,9 +11256,9 @@ static uint64_t moduleEventVersions[] = {
|
||||
* The 'data' field can be casted by the callback to a
|
||||
* `ValkeyModuleReplicationInfo` structure with the following fields:
|
||||
*
|
||||
* int master; // true if master, false if replica
|
||||
* char *masterhost; // master instance hostname for NOW_REPLICA
|
||||
* int masterport; // master instance port for NOW_REPLICA
|
||||
* int primary; // true if primary, false if replica
|
||||
* char *primary_host; // primary instance hostname for NOW_REPLICA
|
||||
* int primary_port; // primary instance port for NOW_REPLICA
|
||||
* char *replid1; // Main replication ID
|
||||
* char *replid2; // Secondary replication ID
|
||||
* uint64_t repl1_offset; // Main replication offset
|
||||
@ -11315,7 +11315,7 @@ static uint64_t moduleEventVersions[] = {
|
||||
*
|
||||
* Called on loading operations: at startup when the server is
|
||||
* started, but also after a first synchronization when the
|
||||
* replica is loading the RDB file from the master.
|
||||
* replica is loading the RDB file from the primary.
|
||||
* The following sub events are available:
|
||||
*
|
||||
* * `VALKEYMODULE_SUBEVENT_LOADING_RDB_START`
|
||||
@ -11344,7 +11344,7 @@ static uint64_t moduleEventVersions[] = {
|
||||
* * ValkeyModuleEvent_ReplicaChange
|
||||
*
|
||||
* This event is called when the instance (that can be both a
|
||||
* master or a replica) get a new online replica, or lose a
|
||||
* primary or a replica) get a new online replica, or lose a
|
||||
* replica since it gets disconnected.
|
||||
* The following sub events are available:
|
||||
*
|
||||
@ -11372,9 +11372,9 @@ static uint64_t moduleEventVersions[] = {
|
||||
* * ValkeyModuleEvent_PrimaryLinkChange
|
||||
*
|
||||
* This is called for replicas in order to notify when the
|
||||
* replication link becomes functional (up) with our master,
|
||||
* replication link becomes functional (up) with our primary,
|
||||
* or when it goes down. Note that the link is not considered
|
||||
* up when we just connected to the master, but only if the
|
||||
* up when we just connected to the primary, but only if the
|
||||
* replication is happening correctly.
|
||||
* The following sub events are available:
|
||||
*
|
||||
@ -11442,7 +11442,7 @@ static uint64_t moduleEventVersions[] = {
|
||||
*
|
||||
* * ValkeyModuleEvent_ReplAsyncLoad
|
||||
*
|
||||
* Called when repl-diskless-load config is set to swapdb and a replication with a master of same
|
||||
* Called when repl-diskless-load config is set to swapdb and a replication with a primary of same
|
||||
* data set history (matching replication ID) occurs.
|
||||
* In which case the server serves current data set while loading new database in memory from socket.
|
||||
* Modules must have declared they support this mechanism in order to activate it, through
|
||||
@ -11924,7 +11924,7 @@ void moduleRemoveCateogires(ValkeyModule *module) {
|
||||
* The function aborts the server on errors, since to start with missing
|
||||
* modules is not considered sane: clients may rely on the existence of
|
||||
* given commands, loading AOF also may need some modules to exist, and
|
||||
* if this instance is a slave, it must understand commands from master. */
|
||||
* if this instance is a replica, it must understand commands from primary. */
|
||||
void moduleLoadFromQueue(void) {
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
@ -12909,13 +12909,13 @@ int VM_RdbLoad(ValkeyModuleCtx *ctx, ValkeyModuleRdbStream *stream, int flags) {
|
||||
}
|
||||
|
||||
/* Not allowed on replicas. */
|
||||
if (server.masterhost != NULL) {
|
||||
if (server.primary_host != NULL) {
|
||||
errno = ENOTSUP;
|
||||
return VALKEYMODULE_ERR;
|
||||
}
|
||||
|
||||
/* Drop replicas if exist. */
|
||||
disconnectSlaves();
|
||||
disconnectReplicas();
|
||||
freeReplicationBacklog();
|
||||
|
||||
if (server.aof_state != AOF_OFF) stopAppendOnly();
|
||||
|
@ -251,7 +251,7 @@ int HelloRepl1_ValkeyCommand(ValkeyModuleCtx *ctx, ValkeyModuleString **argv, in
|
||||
|
||||
/* Another command to show replication. In this case, we call
|
||||
* ValkeyModule_ReplicateVerbatim() to mean we want just the command to be
|
||||
* propagated to slaves / AOF exactly as it was called by the user.
|
||||
* propagated to replicas / AOF exactly as it was called by the user.
|
||||
*
|
||||
* This command also shows how to work with string objects.
|
||||
* It takes a list, and increments all the elements (that must have
|
||||
|
287
src/networking.c
287
src/networking.c
@ -164,10 +164,10 @@ client *createClient(connection *conn) {
|
||||
c->sentlen = 0;
|
||||
c->flags = 0;
|
||||
c->slot = -1;
|
||||
c->ctime = c->lastinteraction = server.unixtime;
|
||||
c->ctime = c->last_interaction = server.unixtime;
|
||||
c->duration = 0;
|
||||
clientSetDefaultAuth(c);
|
||||
c->replstate = REPL_STATE_NONE;
|
||||
c->repl_state = REPL_STATE_NONE;
|
||||
c->repl_start_cmd_stream_on_ack = 0;
|
||||
c->reploff = 0;
|
||||
c->read_reploff = 0;
|
||||
@ -176,11 +176,11 @@ client *createClient(connection *conn) {
|
||||
c->repl_ack_time = 0;
|
||||
c->repl_aof_off = 0;
|
||||
c->repl_last_partial_write = 0;
|
||||
c->slave_listening_port = 0;
|
||||
c->slave_addr = NULL;
|
||||
c->replica_listening_port = 0;
|
||||
c->replica_addr = NULL;
|
||||
c->replica_version = 0;
|
||||
c->slave_capa = SLAVE_CAPA_NONE;
|
||||
c->slave_req = SLAVE_REQ_NONE;
|
||||
c->replica_capa = REPLICA_CAPA_NONE;
|
||||
c->replica_req = REPLICA_REQ_NONE;
|
||||
c->reply = listCreate();
|
||||
c->deferred_reply_errors = NULL;
|
||||
c->reply_bytes = 0;
|
||||
@ -242,10 +242,11 @@ void installClientWriteHandler(client *c) {
|
||||
* buffers can hold, then we'll really install the handler. */
|
||||
void putClientInPendingWriteQueue(client *c) {
|
||||
/* Schedule the client to write the output buffers to the socket only
|
||||
* if not already done and, for slaves, if the slave can actually receive
|
||||
* if not already done and, for replicas, if the replica can actually receive
|
||||
* writes at this stage. */
|
||||
if (!(c->flags & CLIENT_PENDING_WRITE) &&
|
||||
(c->replstate == REPL_STATE_NONE || (c->replstate == SLAVE_STATE_ONLINE && !c->repl_start_cmd_stream_on_ack))) {
|
||||
(c->repl_state == REPL_STATE_NONE ||
|
||||
(c->repl_state == REPLICA_STATE_ONLINE && !c->repl_start_cmd_stream_on_ack))) {
|
||||
/* Here instead of installing the write handler, we just flag the
|
||||
* client and put it into a list of clients that have something
|
||||
* to write to the socket. This way before re-entering the event
|
||||
@ -265,7 +266,7 @@ void putClientInPendingWriteQueue(client *c) {
|
||||
* loop so that when the socket is writable new data gets written.
|
||||
*
|
||||
* If the client should not receive new data, because it is a fake client
|
||||
* (used to load AOF in memory), a master or because the setup of the write
|
||||
* (used to load AOF in memory), a primary or because the setup of the write
|
||||
* handler failed, the function returns C_ERR.
|
||||
*
|
||||
* The function may return C_OK without actually installing the write
|
||||
@ -273,7 +274,7 @@ void putClientInPendingWriteQueue(client *c) {
|
||||
*
|
||||
* 1) The event handler should already be installed since the output buffer
|
||||
* already contains something.
|
||||
* 2) The client is a slave but not yet online, so we want to just accumulate
|
||||
* 2) The client is a replica but not yet online, so we want to just accumulate
|
||||
* writes in the buffer but not actually sending them yet.
|
||||
*
|
||||
* Typically gets called every time a reply is built, before adding more
|
||||
@ -291,9 +292,9 @@ int prepareClientToWrite(client *c) {
|
||||
* CLIENT_PUSHING handling: disables the reply silencing flags. */
|
||||
if ((c->flags & (CLIENT_REPLY_OFF | CLIENT_REPLY_SKIP)) && !(c->flags & CLIENT_PUSHING)) return C_ERR;
|
||||
|
||||
/* Masters don't receive replies, unless CLIENT_MASTER_FORCE_REPLY flag
|
||||
/* Primaries don't receive replies, unless CLIENT_PRIMARY_FORCE_REPLY flag
|
||||
* is set. */
|
||||
if ((c->flags & CLIENT_MASTER) && !(c->flags & CLIENT_MASTER_FORCE_REPLY)) return C_ERR;
|
||||
if ((c->flags & CLIENT_PRIMARY) && !(c->flags & CLIENT_PRIMARY_FORCE_REPLY)) return C_ERR;
|
||||
|
||||
if (!c->conn) return C_ERR; /* Fake client for AOF loading. */
|
||||
|
||||
@ -428,7 +429,7 @@ void _addReplyToBufferOrList(client *c, const char *s, size_t len) {
|
||||
* replication link that caused a reply to be generated we'll simply disconnect it.
|
||||
* Note this is the simplest way to check a command added a response. Replication links are used to write data but
|
||||
* not for responses, so we should normally never get here on a replica client. */
|
||||
if (getClientType(c) == CLIENT_TYPE_SLAVE) {
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA) {
|
||||
sds cmdname = c->lastcmd ? c->lastcmd->fullname : NULL;
|
||||
logInvalidUseAndFreeClientAsync(c, "Replica generated a reply to command '%s'",
|
||||
cmdname ? cmdname : "<unknown>");
|
||||
@ -563,24 +564,24 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) {
|
||||
c->realcmd->failed_calls++;
|
||||
}
|
||||
|
||||
/* Sometimes it could be normal that a slave replies to a master with
|
||||
/* Sometimes it could be normal that a replica replies to a primary with
|
||||
* an error and this function gets called. Actually the error will never
|
||||
* be sent because addReply*() against master clients has no effect...
|
||||
* be sent because addReply*() against primary clients has no effect...
|
||||
* A notable example is:
|
||||
*
|
||||
* EVAL 'redis.call("incr",KEYS[1]); redis.call("nonexisting")' 1 x
|
||||
*
|
||||
* Where the master must propagate the first change even if the second
|
||||
* Where the primary must propagate the first change even if the second
|
||||
* will produce an error. However it is useful to log such events since
|
||||
* they are rare and may hint at errors in a script or a bug in the server. */
|
||||
int ctype = getClientType(c);
|
||||
if (ctype == CLIENT_TYPE_MASTER || ctype == CLIENT_TYPE_SLAVE || c->id == CLIENT_ID_AOF) {
|
||||
if (ctype == CLIENT_TYPE_PRIMARY || ctype == CLIENT_TYPE_REPLICA || c->id == CLIENT_ID_AOF) {
|
||||
char *to, *from;
|
||||
|
||||
if (c->id == CLIENT_ID_AOF) {
|
||||
to = "AOF-loading-client";
|
||||
from = "server";
|
||||
} else if (ctype == CLIENT_TYPE_MASTER) {
|
||||
} else if (ctype == CLIENT_TYPE_PRIMARY) {
|
||||
to = "master";
|
||||
from = "replica";
|
||||
} else {
|
||||
@ -595,16 +596,16 @@ void afterErrorReply(client *c, const char *s, size_t len, int flags) {
|
||||
"to its %s: '%.*s' after processing the command "
|
||||
"'%s'",
|
||||
from, to, (int)len, s, cmdname ? cmdname : "<unknown>");
|
||||
if (ctype == CLIENT_TYPE_MASTER && server.repl_backlog && server.repl_backlog->histlen > 0) {
|
||||
if (ctype == CLIENT_TYPE_PRIMARY && server.repl_backlog && server.repl_backlog->histlen > 0) {
|
||||
showLatestBacklog();
|
||||
}
|
||||
server.stat_unexpected_error_replies++;
|
||||
|
||||
/* Based off the propagation error behavior, check if we need to panic here. There
|
||||
* are currently two checked cases:
|
||||
* * If this command was from our master and we are not a writable replica.
|
||||
* * If this command was from our primary and we are not a writable replica.
|
||||
* * We are reading from an AOF file. */
|
||||
int panic_in_replicas = (ctype == CLIENT_TYPE_MASTER && server.repl_slave_ro) &&
|
||||
int panic_in_replicas = (ctype == CLIENT_TYPE_PRIMARY && server.repl_replica_ro) &&
|
||||
(server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC ||
|
||||
server.propagation_error_behavior == PROPAGATION_ERR_BEHAVIOR_PANIC_ON_REPLICAS);
|
||||
int panic_in_aof =
|
||||
@ -766,7 +767,7 @@ void *addReplyDeferredLen(client *c) {
|
||||
* replication link that caused a reply to be generated we'll simply disconnect it.
|
||||
* Note this is the simplest way to check a command added a response. Replication links are used to write data but
|
||||
* not for responses, so we should normally never get here on a replica client. */
|
||||
if (getClientType(c) == CLIENT_TYPE_SLAVE) {
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA) {
|
||||
sds cmdname = c->lastcmd ? c->lastcmd->fullname : NULL;
|
||||
logInvalidUseAndFreeClientAsync(c, "Replica generated a reply to command '%s'",
|
||||
cmdname ? cmdname : "<unknown>");
|
||||
@ -1257,7 +1258,7 @@ void copyReplicaOutputBuffer(client *dst, client *src) {
|
||||
/* Return true if the specified client has pending reply buffers to write to
|
||||
* the socket. */
|
||||
int clientHasPendingReplies(client *c) {
|
||||
if (getClientType(c) == CLIENT_TYPE_SLAVE) {
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA) {
|
||||
/* Replicas use global shared replication buffer instead of
|
||||
* private output buffer. */
|
||||
serverAssert(c->bufpos == 0 && listLength(c->reply) == 0);
|
||||
@ -1415,29 +1416,29 @@ void freeClientArgv(client *c) {
|
||||
c->argv = NULL;
|
||||
}
|
||||
|
||||
/* Close all the slaves connections. This is useful in chained replication
|
||||
* when we resync with our own master and want to force all our slaves to
|
||||
/* Close all the replicas connections. This is useful in chained replication
|
||||
* when we resync with our own primary and want to force all our replicas to
|
||||
* resync with us as well. */
|
||||
void disconnectSlaves(void) {
|
||||
void disconnectReplicas(void) {
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
listRewind(server.slaves, &li);
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
freeClient((client *)ln->value);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if there is any other slave waiting dumping RDB finished expect me.
|
||||
/* Check if there is any other replica waiting dumping RDB finished expect me.
|
||||
* This function is useful to judge current dumping RDB can be used for full
|
||||
* synchronization or not. */
|
||||
int anyOtherSlaveWaitRdb(client *except_me) {
|
||||
int anyOtherReplicaWaitRdb(client *except_me) {
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
|
||||
listRewind(server.slaves, &li);
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *slave = ln->value;
|
||||
if (slave != except_me && slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
|
||||
client *replica = ln->value;
|
||||
if (replica != except_me && replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_END) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -1446,7 +1447,7 @@ int anyOtherSlaveWaitRdb(client *except_me) {
|
||||
|
||||
/* Remove the specified client from global lists where the client could
|
||||
* be referenced, not including the Pub/Sub channels.
|
||||
* This is used by freeClient() and replicationCacheMaster(). */
|
||||
* This is used by freeClient() and replicationCachePrimary(). */
|
||||
void unlinkClient(client *c) {
|
||||
listNode *ln;
|
||||
|
||||
@ -1467,7 +1468,7 @@ void unlinkClient(client *c) {
|
||||
|
||||
/* Check if this is a replica waiting for diskless replication (rdb pipe),
|
||||
* in which case it needs to be cleaned from that list */
|
||||
if (c->flags & CLIENT_SLAVE && c->replstate == SLAVE_STATE_WAIT_BGSAVE_END && server.rdb_pipe_conns) {
|
||||
if (c->flags & CLIENT_REPLICA && c->repl_state == REPLICA_STATE_WAIT_BGSAVE_END && server.rdb_pipe_conns) {
|
||||
int i;
|
||||
for (i = 0; i < server.rdb_pipe_numconns; i++) {
|
||||
if (server.rdb_pipe_conns[i] == c->conn) {
|
||||
@ -1515,7 +1516,7 @@ void unlinkClient(client *c) {
|
||||
void clearClientConnectionState(client *c) {
|
||||
listNode *ln;
|
||||
|
||||
/* MONITOR clients are also marked with CLIENT_SLAVE, we need to
|
||||
/* MONITOR clients are also marked with CLIENT_REPLICA, we need to
|
||||
* distinguish between the two.
|
||||
*/
|
||||
if (c->flags & CLIENT_MONITOR) {
|
||||
@ -1523,10 +1524,10 @@ void clearClientConnectionState(client *c) {
|
||||
serverAssert(ln != NULL);
|
||||
listDelNode(server.monitors, ln);
|
||||
|
||||
c->flags &= ~(CLIENT_MONITOR | CLIENT_SLAVE);
|
||||
c->flags &= ~(CLIENT_MONITOR | CLIENT_REPLICA);
|
||||
}
|
||||
|
||||
serverAssert(!(c->flags & (CLIENT_SLAVE | CLIENT_MASTER)));
|
||||
serverAssert(!(c->flags & (CLIENT_REPLICA | CLIENT_PRIMARY)));
|
||||
|
||||
if (c->flags & CLIENT_TRACKING) disableTracking(c);
|
||||
selectDb(c, 0);
|
||||
@ -1581,7 +1582,7 @@ void freeClient(client *c) {
|
||||
|
||||
/* If this client was scheduled for async freeing we need to remove it
|
||||
* from the queue. Note that we need to do this here, because later
|
||||
* we may call replicationCacheMaster() and the client should already
|
||||
* we may call replicationCachePrimary() and the client should already
|
||||
* be removed from the list of clients to free. */
|
||||
if (c->flags & CLIENT_CLOSE_ASAP) {
|
||||
ln = listSearchKey(server.clients_to_close, c);
|
||||
@ -1589,23 +1590,23 @@ void freeClient(client *c) {
|
||||
listDelNode(server.clients_to_close, ln);
|
||||
}
|
||||
|
||||
/* If it is our master that's being disconnected we should make sure
|
||||
/* If it is our primary that's being disconnected we should make sure
|
||||
* to cache the state to try a partial resynchronization later.
|
||||
*
|
||||
* Note that before doing this we make sure that the client is not in
|
||||
* some unexpected state, by checking its flags. */
|
||||
if (server.master && c->flags & CLIENT_MASTER) {
|
||||
serverLog(LL_NOTICE, "Connection with master lost.");
|
||||
if (server.primary && c->flags & CLIENT_PRIMARY) {
|
||||
serverLog(LL_NOTICE, "Connection with primary lost.");
|
||||
if (!(c->flags & (CLIENT_PROTOCOL_ERROR | CLIENT_BLOCKED))) {
|
||||
c->flags &= ~(CLIENT_CLOSE_ASAP | CLIENT_CLOSE_AFTER_REPLY);
|
||||
replicationCacheMaster(c);
|
||||
replicationCachePrimary(c);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Log link disconnection with slave */
|
||||
if (getClientType(c) == CLIENT_TYPE_SLAVE) {
|
||||
serverLog(LL_NOTICE, "Connection with replica %s lost.", replicationGetSlaveName(c));
|
||||
/* Log link disconnection with replica */
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA) {
|
||||
serverLog(LL_NOTICE, "Connection with replica %s lost.", replicationGetReplicaName(c));
|
||||
}
|
||||
|
||||
/* Free the query buffer */
|
||||
@ -1655,44 +1656,44 @@ void freeClient(client *c) {
|
||||
* places where active clients may be referenced. */
|
||||
unlinkClient(c);
|
||||
|
||||
/* Master/slave cleanup Case 1:
|
||||
* we lost the connection with a slave. */
|
||||
if (c->flags & CLIENT_SLAVE) {
|
||||
/* If there is no any other slave waiting dumping RDB finished, the
|
||||
/* Primary/replica cleanup Case 1:
|
||||
* we lost the connection with a replica. */
|
||||
if (c->flags & CLIENT_REPLICA) {
|
||||
/* If there is no any other replica waiting dumping RDB finished, the
|
||||
* current child process need not continue to dump RDB, then we kill it.
|
||||
* So child process won't use more memory, and we also can fork a new
|
||||
* child process asap to dump rdb for next full synchronization or bgsave.
|
||||
* But we also need to check if users enable 'save' RDB, if enable, we
|
||||
* should not remove directly since that means RDB is important for users
|
||||
* to keep data safe and we may delay configured 'save' for full sync. */
|
||||
if (server.saveparamslen == 0 && c->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
|
||||
if (server.saveparamslen == 0 && c->repl_state == REPLICA_STATE_WAIT_BGSAVE_END &&
|
||||
server.child_type == CHILD_TYPE_RDB && server.rdb_child_type == RDB_CHILD_TYPE_DISK &&
|
||||
anyOtherSlaveWaitRdb(c) == 0) {
|
||||
anyOtherReplicaWaitRdb(c) == 0) {
|
||||
killRDBChild();
|
||||
}
|
||||
if (c->replstate == SLAVE_STATE_SEND_BULK) {
|
||||
if (c->repl_state == REPLICA_STATE_SEND_BULK) {
|
||||
if (c->repldbfd != -1) close(c->repldbfd);
|
||||
if (c->replpreamble) sdsfree(c->replpreamble);
|
||||
}
|
||||
list *l = (c->flags & CLIENT_MONITOR) ? server.monitors : server.slaves;
|
||||
list *l = (c->flags & CLIENT_MONITOR) ? server.monitors : server.replicas;
|
||||
ln = listSearchKey(l, c);
|
||||
serverAssert(ln != NULL);
|
||||
listDelNode(l, ln);
|
||||
/* We need to remember the time when we started to have zero
|
||||
* attached slaves, as after some time we'll free the replication
|
||||
* attached replicas, as after some time we'll free the replication
|
||||
* backlog. */
|
||||
if (getClientType(c) == CLIENT_TYPE_SLAVE && listLength(server.slaves) == 0)
|
||||
server.repl_no_slaves_since = server.unixtime;
|
||||
refreshGoodSlavesCount();
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA && listLength(server.replicas) == 0)
|
||||
server.repl_no_replicas_since = server.unixtime;
|
||||
refreshGoodReplicasCount();
|
||||
/* Fire the replica change modules event. */
|
||||
if (c->replstate == SLAVE_STATE_ONLINE)
|
||||
if (c->repl_state == REPLICA_STATE_ONLINE)
|
||||
moduleFireServerEvent(VALKEYMODULE_EVENT_REPLICA_CHANGE, VALKEYMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE,
|
||||
NULL);
|
||||
}
|
||||
|
||||
/* Master/slave cleanup Case 2:
|
||||
* we lost the connection with the master. */
|
||||
if (c->flags & CLIENT_MASTER) replicationHandleMasterDisconnection();
|
||||
/* Primary/replica cleanup Case 2:
|
||||
* we lost the connection with the primary. */
|
||||
if (c->flags & CLIENT_PRIMARY) replicationHandlePrimaryDisconnection();
|
||||
|
||||
/* Remove client from memory usage buckets */
|
||||
if (c->mem_usage_bucket) {
|
||||
@ -1708,7 +1709,7 @@ void freeClient(client *c) {
|
||||
freeClientMultiState(c);
|
||||
sdsfree(c->peerid);
|
||||
sdsfree(c->sockname);
|
||||
sdsfree(c->slave_addr);
|
||||
sdsfree(c->replica_addr);
|
||||
zfree(c);
|
||||
}
|
||||
|
||||
@ -1889,7 +1890,7 @@ static int _writevToClient(client *c, ssize_t *nwritten) {
|
||||
* to client. */
|
||||
int _writeToClient(client *c, ssize_t *nwritten) {
|
||||
*nwritten = 0;
|
||||
if (getClientType(c) == CLIENT_TYPE_SLAVE) {
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA) {
|
||||
serverAssert(c->bufpos == 0 && listLength(c->reply) == 0);
|
||||
|
||||
replBufBlock *o = listNodeValue(c->ref_repl_buf_node);
|
||||
@ -1966,14 +1967,14 @@ int writeToClient(client *c, int handler_installed) {
|
||||
* just deliver as much data as it is possible to deliver.
|
||||
*
|
||||
* Moreover, we also send as much as possible if the client is
|
||||
* a slave or a monitor (otherwise, on high-speed traffic, the
|
||||
* a replica or a monitor (otherwise, on high-speed traffic, the
|
||||
* replication/output buffer will grow indefinitely) */
|
||||
if (totwritten > NET_MAX_WRITES_PER_EVENT &&
|
||||
(server.maxmemory == 0 || zmalloc_used_memory() < server.maxmemory) && !(c->flags & CLIENT_SLAVE))
|
||||
(server.maxmemory == 0 || zmalloc_used_memory() < server.maxmemory) && !(c->flags & CLIENT_REPLICA))
|
||||
break;
|
||||
}
|
||||
|
||||
if (getClientType(c) == CLIENT_TYPE_SLAVE) {
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA) {
|
||||
atomic_fetch_add_explicit(&server.stat_net_repl_output_bytes, totwritten, memory_order_relaxed);
|
||||
} else {
|
||||
atomic_fetch_add_explicit(&server.stat_net_output_bytes, totwritten, memory_order_relaxed);
|
||||
@ -1988,11 +1989,11 @@ int writeToClient(client *c, int handler_installed) {
|
||||
}
|
||||
}
|
||||
if (totwritten > 0) {
|
||||
/* For clients representing masters we don't count sending data
|
||||
/* For clients representing primaries we don't count sending data
|
||||
* as an interaction, since we always send REPLCONF ACK commands
|
||||
* that take some time to just fill the socket output buffer.
|
||||
* We just rely on data / pings received for timeout detection. */
|
||||
if (!(c->flags & CLIENT_MASTER)) c->lastinteraction = server.unixtime;
|
||||
if (!(c->flags & CLIENT_PRIMARY)) c->last_interaction = server.unixtime;
|
||||
}
|
||||
if (!clientHasPendingReplies(c)) {
|
||||
c->sentlen = 0;
|
||||
@ -2211,22 +2212,22 @@ int processInlineBuffer(client *c) {
|
||||
return C_ERR;
|
||||
}
|
||||
|
||||
/* Newline from slaves can be used to refresh the last ACK time.
|
||||
* This is useful for a slave to ping back while loading a big
|
||||
/* Newline from replicas can be used to refresh the last ACK time.
|
||||
* This is useful for a replica to ping back while loading a big
|
||||
* RDB file. */
|
||||
if (querylen == 0 && getClientType(c) == CLIENT_TYPE_SLAVE) c->repl_ack_time = server.unixtime;
|
||||
if (querylen == 0 && getClientType(c) == CLIENT_TYPE_REPLICA) c->repl_ack_time = server.unixtime;
|
||||
|
||||
/* Masters should never send us inline protocol to run actual
|
||||
/* Primaries should never send us inline protocol to run actual
|
||||
* commands. If this happens, it is likely due to a bug in the server where
|
||||
* we got some desynchronization in the protocol, for example
|
||||
* because of a PSYNC gone bad.
|
||||
*
|
||||
* However there is an exception: masters may send us just a newline
|
||||
* However there is an exception: primaries may send us just a newline
|
||||
* to keep the connection active. */
|
||||
if (querylen != 0 && c->flags & CLIENT_MASTER) {
|
||||
if (querylen != 0 && c->flags & CLIENT_PRIMARY) {
|
||||
sdsfreesplitres(argv, argc);
|
||||
serverLog(LL_WARNING, "WARNING: Receiving inline protocol from master, master stream corruption? Closing the "
|
||||
"master connection and discarding the cached master.");
|
||||
serverLog(LL_WARNING, "WARNING: Receiving inline protocol from primary, primary stream corruption? Closing the "
|
||||
"primary connection and discarding the cached primary.");
|
||||
setProtocolError("Master using the inline protocol. Desync?", c);
|
||||
return C_ERR;
|
||||
}
|
||||
@ -2257,7 +2258,7 @@ int processInlineBuffer(client *c) {
|
||||
* CLIENT_PROTOCOL_ERROR. */
|
||||
#define PROTO_DUMP_LEN 128
|
||||
static void setProtocolError(const char *errstr, client *c) {
|
||||
if (server.verbosity <= LL_VERBOSE || c->flags & CLIENT_MASTER) {
|
||||
if (server.verbosity <= LL_VERBOSE || c->flags & CLIENT_PRIMARY) {
|
||||
sds client = catClientInfoString(sdsempty(), c);
|
||||
|
||||
/* Sample some protocol to given an idea about what was inside. */
|
||||
@ -2278,7 +2279,7 @@ static void setProtocolError(const char *errstr, client *c) {
|
||||
}
|
||||
|
||||
/* Log all the client and protocol info. */
|
||||
int loglevel = (c->flags & CLIENT_MASTER) ? LL_WARNING : LL_VERBOSE;
|
||||
int loglevel = (c->flags & CLIENT_PRIMARY) ? LL_WARNING : LL_VERBOSE;
|
||||
serverLog(loglevel, "Protocol error (%s) from client: %s. %s", errstr, client, buf);
|
||||
sdsfree(client);
|
||||
}
|
||||
@ -2369,7 +2370,7 @@ int processMultibulkBuffer(client *c) {
|
||||
}
|
||||
|
||||
ok = string2ll(c->querybuf + c->qb_pos + 1, newline - (c->querybuf + c->qb_pos + 1), &ll);
|
||||
if (!ok || ll < 0 || (!(c->flags & CLIENT_MASTER) && ll > server.proto_max_bulk_len)) {
|
||||
if (!ok || ll < 0 || (!(c->flags & CLIENT_PRIMARY) && ll > server.proto_max_bulk_len)) {
|
||||
addReplyError(c, "Protocol error: invalid bulk length");
|
||||
setProtocolError("invalid bulk length", c);
|
||||
return C_ERR;
|
||||
@ -2380,8 +2381,8 @@ int processMultibulkBuffer(client *c) {
|
||||
}
|
||||
|
||||
c->qb_pos = newline - c->querybuf + 2;
|
||||
if (!(c->flags & CLIENT_MASTER) && ll >= PROTO_MBULK_BIG_ARG) {
|
||||
/* When the client is not a master client (because master
|
||||
if (!(c->flags & CLIENT_PRIMARY) && ll >= PROTO_MBULK_BIG_ARG) {
|
||||
/* When the client is not a primary client (because primary
|
||||
* client's querybuf can only be trimmed after data applied
|
||||
* and sent to replicas).
|
||||
*
|
||||
@ -2423,10 +2424,10 @@ int processMultibulkBuffer(client *c) {
|
||||
c->argv = zrealloc(c->argv, sizeof(robj *) * c->argv_len);
|
||||
}
|
||||
|
||||
/* Optimization: if a non-master client's buffer contains JUST our bulk element
|
||||
/* Optimization: if a non-primary client's buffer contains JUST our bulk element
|
||||
* instead of creating a new object by *copying* the sds we
|
||||
* just use the current sds string. */
|
||||
if (!(c->flags & CLIENT_MASTER) && c->qb_pos == 0 && c->bulklen >= PROTO_MBULK_BIG_ARG &&
|
||||
if (!(c->flags & CLIENT_PRIMARY) && c->qb_pos == 0 && c->bulklen >= PROTO_MBULK_BIG_ARG &&
|
||||
sdslen(c->querybuf) == (size_t)(c->bulklen + 2)) {
|
||||
c->argv[c->argc++] = createObject(OBJ_STRING, c->querybuf);
|
||||
c->argv_len_sum += c->bulklen;
|
||||
@ -2455,8 +2456,8 @@ int processMultibulkBuffer(client *c) {
|
||||
/* Perform necessary tasks after a command was executed:
|
||||
*
|
||||
* 1. The client is reset unless there are reasons to avoid doing it.
|
||||
* 2. In the case of master clients, the replication offset is updated.
|
||||
* 3. Propagate commands we got from our master to replicas down the line. */
|
||||
* 2. In the case of primary clients, the replication offset is updated.
|
||||
* 3. Propagate commands we got from our primary to replicas down the line. */
|
||||
void commandProcessed(client *c) {
|
||||
/* If client is blocked(including paused), just return avoid reset and replicate.
|
||||
*
|
||||
@ -2471,21 +2472,21 @@ void commandProcessed(client *c) {
|
||||
resetClient(c);
|
||||
|
||||
long long prev_offset = c->reploff;
|
||||
if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) {
|
||||
/* Update the applied replication offset of our master. */
|
||||
if (c->flags & CLIENT_PRIMARY && !(c->flags & CLIENT_MULTI)) {
|
||||
/* Update the applied replication offset of our primary. */
|
||||
c->reploff = c->read_reploff - sdslen(c->querybuf) + c->qb_pos;
|
||||
}
|
||||
|
||||
/* If the client is a master we need to compute the difference
|
||||
/* If the client is a primary we need to compute the difference
|
||||
* between the applied offset before and after processing the buffer,
|
||||
* to understand how much of the replication stream was actually
|
||||
* applied to the master state: this quantity, and its corresponding
|
||||
* applied to the primary state: this quantity, and its corresponding
|
||||
* part of the replication stream, will be propagated to the
|
||||
* sub-replicas and to the replication backlog. */
|
||||
if (c->flags & CLIENT_MASTER) {
|
||||
if (c->flags & CLIENT_PRIMARY) {
|
||||
long long applied = c->reploff - prev_offset;
|
||||
if (applied) {
|
||||
replicationFeedStreamFromMasterStream(c->querybuf + c->repl_applied, applied);
|
||||
replicationFeedStreamFromPrimaryStream(c->querybuf + c->repl_applied, applied);
|
||||
c->repl_applied += applied;
|
||||
}
|
||||
}
|
||||
@ -2519,8 +2520,8 @@ int processCommandAndResetClient(client *c) {
|
||||
* is dead and will stop reading from its buffer.
|
||||
*/
|
||||
server.current_client = old_client;
|
||||
/* performEvictions may flush slave output buffers. This may
|
||||
* result in a slave, that may be the active client, to be
|
||||
/* performEvictions may flush replica output buffers. This may
|
||||
* result in a replica, that may be the active client, to be
|
||||
* freed. */
|
||||
return deadclient ? C_ERR : C_OK;
|
||||
}
|
||||
@ -2543,7 +2544,7 @@ int processPendingCommandAndInputBuffer(client *c) {
|
||||
|
||||
/* Now process client if it has more data in it's buffer.
|
||||
*
|
||||
* Note: when a master client steps into this function,
|
||||
* Note: when a primary client steps into this function,
|
||||
* it can always satisfy this condition, because its querybuf
|
||||
* contains data not applied. */
|
||||
if (c->querybuf && sdslen(c->querybuf) > 0) {
|
||||
@ -2567,11 +2568,11 @@ int processInputBuffer(client *c) {
|
||||
* commands to execute in c->argv. */
|
||||
if (c->flags & CLIENT_PENDING_COMMAND) break;
|
||||
|
||||
/* Don't process input from the master while there is a busy script
|
||||
* condition on the slave. We want just to accumulate the replication
|
||||
/* Don't process input from the primary while there is a busy script
|
||||
* condition on the replica. We want just to accumulate the replication
|
||||
* stream (instead of replying -BUSY like we do with other clients) and
|
||||
* later resume the processing. */
|
||||
if (isInsideYieldingLongCommand() && c->flags & CLIENT_MASTER) break;
|
||||
if (isInsideYieldingLongCommand() && c->flags & CLIENT_PRIMARY) break;
|
||||
|
||||
/* CLIENT_CLOSE_AFTER_REPLY closes the connection once the reply is
|
||||
* written to the client. Make sure to not let the reply grow after
|
||||
@ -2627,15 +2628,15 @@ int processInputBuffer(client *c) {
|
||||
}
|
||||
}
|
||||
|
||||
if (c->flags & CLIENT_MASTER) {
|
||||
/* If the client is a master, trim the querybuf to repl_applied,
|
||||
* since master client is very special, its querybuf not only
|
||||
if (c->flags & CLIENT_PRIMARY) {
|
||||
/* If the client is a primary, trim the querybuf to repl_applied,
|
||||
* since primary client is very special, its querybuf not only
|
||||
* used to parse command, but also proxy to sub-replicas.
|
||||
*
|
||||
* Here are some scenarios we cannot trim to qb_pos:
|
||||
* 1. we don't receive complete command from master
|
||||
* 2. master client blocked cause of client pause
|
||||
* 3. io threads operate read, master client flagged with CLIENT_PENDING_COMMAND
|
||||
* 1. we don't receive complete command from primary
|
||||
* 2. primary client blocked cause of client pause
|
||||
* 3. io threads operate read, primary client flagged with CLIENT_PENDING_COMMAND
|
||||
*
|
||||
* In these scenarios, qb_pos points to the part of the current command
|
||||
* or the beginning of next command, and the current command is not applied yet,
|
||||
@ -2686,9 +2687,9 @@ void readQueryFromClient(connection *conn) {
|
||||
* for example once we resume a blocked client after CLIENT PAUSE. */
|
||||
if (remaining > 0) readlen = remaining;
|
||||
|
||||
/* Master client needs expand the readlen when meet BIG_ARG(see #9100),
|
||||
/* Primary client needs expand the readlen when meet BIG_ARG(see #9100),
|
||||
* but doesn't need align to the next arg, we can read more data. */
|
||||
if (c->flags & CLIENT_MASTER && readlen < PROTO_IOBUF_LEN) readlen = PROTO_IOBUF_LEN;
|
||||
if (c->flags & CLIENT_PRIMARY && readlen < PROTO_IOBUF_LEN) readlen = PROTO_IOBUF_LEN;
|
||||
}
|
||||
|
||||
if (c->querybuf == NULL) {
|
||||
@ -2697,7 +2698,7 @@ void readQueryFromClient(connection *conn) {
|
||||
qblen = sdslen(c->querybuf);
|
||||
}
|
||||
|
||||
if (!(c->flags & CLIENT_MASTER) && // master client's querybuf can grow greedy.
|
||||
if (!(c->flags & CLIENT_PRIMARY) && // primary client's querybuf can grow greedy.
|
||||
(big_arg || sdsalloc(c->querybuf) < PROTO_IOBUF_LEN)) {
|
||||
/* When reading a BIG_ARG we won't be reading more than that one arg
|
||||
* into the query buffer, so we don't need to pre-allocate more than we
|
||||
@ -2737,8 +2738,8 @@ void readQueryFromClient(connection *conn) {
|
||||
qblen = sdslen(c->querybuf);
|
||||
if (c->querybuf_peak < qblen) c->querybuf_peak = qblen;
|
||||
|
||||
c->lastinteraction = server.unixtime;
|
||||
if (c->flags & CLIENT_MASTER) {
|
||||
c->last_interaction = server.unixtime;
|
||||
if (c->flags & CLIENT_PRIMARY) {
|
||||
c->read_reploff += nread;
|
||||
atomic_fetch_add_explicit(&server.stat_net_repl_input_bytes, nread, memory_order_relaxed);
|
||||
} else {
|
||||
@ -2746,7 +2747,7 @@ void readQueryFromClient(connection *conn) {
|
||||
}
|
||||
c->net_input_bytes += nread;
|
||||
|
||||
if (!(c->flags & CLIENT_MASTER) &&
|
||||
if (!(c->flags & CLIENT_PRIMARY) &&
|
||||
/* The commands cached in the MULTI/EXEC queue have not been executed yet,
|
||||
* so they are also considered a part of the query buffer in a broader sense.
|
||||
*
|
||||
@ -2832,14 +2833,14 @@ sds catClientInfoString(sds s, client *client) {
|
||||
char flags[17], events[3], conninfo[CONN_INFO_LEN], *p;
|
||||
|
||||
p = flags;
|
||||
if (client->flags & CLIENT_SLAVE) {
|
||||
if (client->flags & CLIENT_REPLICA) {
|
||||
if (client->flags & CLIENT_MONITOR)
|
||||
*p++ = 'O';
|
||||
else
|
||||
*p++ = 'S';
|
||||
}
|
||||
/* clang-format off */
|
||||
if (client->flags & CLIENT_MASTER) *p++ = 'M';
|
||||
if (client->flags & CLIENT_PRIMARY) *p++ = 'M';
|
||||
if (client->flags & CLIENT_PUBSUB) *p++ = 'P';
|
||||
if (client->flags & CLIENT_MULTI) *p++ = 'x';
|
||||
if (client->flags & CLIENT_BLOCKED) *p++ = 'b';
|
||||
@ -2883,7 +2884,7 @@ sds catClientInfoString(sds s, client *client) {
|
||||
" %s", connGetInfo(client->conn, conninfo, sizeof(conninfo)),
|
||||
" name=%s", client->name ? (char*)client->name->ptr : "",
|
||||
" age=%I", (long long)(commandTimeSnapshot() / 1000 - client->ctime),
|
||||
" idle=%I", (long long)(server.unixtime - client->lastinteraction),
|
||||
" idle=%I", (long long)(server.unixtime - client->last_interaction),
|
||||
" flags=%s", flags,
|
||||
" db=%i", client->db->id,
|
||||
" sub=%i", (int) dictSize(client->pubsub_channels),
|
||||
@ -3026,13 +3027,13 @@ void clientSetinfoCommand(client *c) {
|
||||
/* Reset the client state to resemble a newly connected client.
|
||||
*/
|
||||
void resetCommand(client *c) {
|
||||
/* MONITOR clients are also marked with CLIENT_SLAVE, we need to
|
||||
/* MONITOR clients are also marked with CLIENT_REPLICA, we need to
|
||||
* distinguish between the two.
|
||||
*/
|
||||
uint64_t flags = c->flags;
|
||||
if (flags & CLIENT_MONITOR) flags &= ~(CLIENT_MONITOR | CLIENT_SLAVE);
|
||||
if (flags & CLIENT_MONITOR) flags &= ~(CLIENT_MONITOR | CLIENT_REPLICA);
|
||||
|
||||
if (flags & (CLIENT_SLAVE | CLIENT_MASTER | CLIENT_MODULE)) {
|
||||
if (flags & (CLIENT_REPLICA | CLIENT_PRIMARY | CLIENT_MODULE)) {
|
||||
addReplyError(c, "can only reset normal client connections");
|
||||
return;
|
||||
}
|
||||
@ -3678,7 +3679,7 @@ void helloCommand(client *c) {
|
||||
|
||||
if (!server.sentinel_mode) {
|
||||
addReplyBulkCString(c, "role");
|
||||
addReplyBulkCString(c, server.masterhost ? "replica" : "master");
|
||||
addReplyBulkCString(c, server.primary_host ? "replica" : "master");
|
||||
}
|
||||
|
||||
addReplyBulkCString(c, "modules");
|
||||
@ -3825,7 +3826,7 @@ void rewriteClientCommandArgument(client *c, int i, robj *newval) {
|
||||
* the caller wishes. The main usage of this function currently is
|
||||
* enforcing the client output length limits. */
|
||||
size_t getClientOutputBufferMemoryUsage(client *c) {
|
||||
if (getClientType(c) == CLIENT_TYPE_SLAVE) {
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA) {
|
||||
size_t repl_buf_size = 0;
|
||||
size_t repl_node_num = 0;
|
||||
size_t repl_node_size = sizeof(listNode) + sizeof(replBufBlock);
|
||||
@ -3875,15 +3876,15 @@ size_t getClientMemoryUsage(client *c, size_t *output_buffer_mem_usage) {
|
||||
*
|
||||
* The function will return one of the following:
|
||||
* CLIENT_TYPE_NORMAL -> Normal client, including MONITOR
|
||||
* CLIENT_TYPE_SLAVE -> Slave
|
||||
* CLIENT_TYPE_REPLICA -> replica
|
||||
* CLIENT_TYPE_PUBSUB -> Client subscribed to Pub/Sub channels
|
||||
* CLIENT_TYPE_MASTER -> The client representing our replication master.
|
||||
* CLIENT_TYPE_PRIMARY -> The client representing our replication primary.
|
||||
*/
|
||||
int getClientType(client *c) {
|
||||
if (c->flags & CLIENT_MASTER) return CLIENT_TYPE_MASTER;
|
||||
if (c->flags & CLIENT_PRIMARY) return CLIENT_TYPE_PRIMARY;
|
||||
/* Even though MONITOR clients are marked as replicas, we
|
||||
* want the expose them as normal clients. */
|
||||
if ((c->flags & CLIENT_SLAVE) && !(c->flags & CLIENT_MONITOR)) return CLIENT_TYPE_SLAVE;
|
||||
if ((c->flags & CLIENT_REPLICA) && !(c->flags & CLIENT_MONITOR)) return CLIENT_TYPE_REPLICA;
|
||||
if (c->flags & CLIENT_PUBSUB) return CLIENT_TYPE_PUBSUB;
|
||||
return CLIENT_TYPE_NORMAL;
|
||||
}
|
||||
@ -3892,13 +3893,13 @@ int getClientTypeByName(char *name) {
|
||||
if (!strcasecmp(name, "normal"))
|
||||
return CLIENT_TYPE_NORMAL;
|
||||
else if (!strcasecmp(name, "slave"))
|
||||
return CLIENT_TYPE_SLAVE;
|
||||
return CLIENT_TYPE_REPLICA;
|
||||
else if (!strcasecmp(name, "replica"))
|
||||
return CLIENT_TYPE_SLAVE;
|
||||
return CLIENT_TYPE_REPLICA;
|
||||
else if (!strcasecmp(name, "pubsub"))
|
||||
return CLIENT_TYPE_PUBSUB;
|
||||
else if (!strcasecmp(name, "master"))
|
||||
return CLIENT_TYPE_MASTER;
|
||||
return CLIENT_TYPE_PRIMARY;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
@ -3906,9 +3907,9 @@ int getClientTypeByName(char *name) {
|
||||
char *getClientTypeName(int class) {
|
||||
switch (class) {
|
||||
case CLIENT_TYPE_NORMAL: return "normal";
|
||||
case CLIENT_TYPE_SLAVE: return "slave";
|
||||
case CLIENT_TYPE_REPLICA: return "slave";
|
||||
case CLIENT_TYPE_PUBSUB: return "pubsub";
|
||||
case CLIENT_TYPE_MASTER: return "master";
|
||||
case CLIENT_TYPE_PRIMARY: return "master";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
@ -3924,9 +3925,9 @@ int checkClientOutputBufferLimits(client *c) {
|
||||
unsigned long used_mem = getClientOutputBufferMemoryUsage(c);
|
||||
|
||||
class = getClientType(c);
|
||||
/* For the purpose of output buffer limiting, masters are handled
|
||||
/* For the purpose of output buffer limiting, primaries are handled
|
||||
* like normal clients. */
|
||||
if (class == CLIENT_TYPE_MASTER) class = CLIENT_TYPE_NORMAL;
|
||||
if (class == CLIENT_TYPE_PRIMARY) class = CLIENT_TYPE_NORMAL;
|
||||
|
||||
/* Note that it doesn't make sense to set the replica clients output buffer
|
||||
* limit lower than the repl-backlog-size config (partial sync will succeed
|
||||
@ -3935,7 +3936,7 @@ int checkClientOutputBufferLimits(client *c) {
|
||||
* This doesn't have memory consumption implications since the replica client
|
||||
* will share the backlog buffers memory. */
|
||||
size_t hard_limit_bytes = server.client_obuf_limits[class].hard_limit_bytes;
|
||||
if (class == CLIENT_TYPE_SLAVE && hard_limit_bytes && (long long)hard_limit_bytes < server.repl_backlog_size)
|
||||
if (class == CLIENT_TYPE_REPLICA && hard_limit_bytes && (long long)hard_limit_bytes < server.repl_backlog_size)
|
||||
hard_limit_bytes = server.repl_backlog_size;
|
||||
if (server.client_obuf_limits[class].hard_limit_bytes && used_mem >= hard_limit_bytes) hard = 1;
|
||||
if (server.client_obuf_limits[class].soft_limit_bytes &&
|
||||
@ -3979,7 +3980,7 @@ int closeClientOnOutputBufferLimitReached(client *c, int async) {
|
||||
serverAssert(c->reply_bytes < SIZE_MAX - (1024 * 64));
|
||||
/* Note that c->reply_bytes is irrelevant for replica clients
|
||||
* (they use the global repl buffers). */
|
||||
if ((c->reply_bytes == 0 && getClientType(c) != CLIENT_TYPE_SLAVE) || c->flags & CLIENT_CLOSE_ASAP) return 0;
|
||||
if ((c->reply_bytes == 0 && getClientType(c) != CLIENT_TYPE_REPLICA) || c->flags & CLIENT_CLOSE_ASAP) return 0;
|
||||
if (checkClientOutputBufferLimits(c)) {
|
||||
sds client = catClientInfoString(sdsempty(), c);
|
||||
|
||||
@ -3998,18 +3999,18 @@ int closeClientOnOutputBufferLimitReached(client *c, int async) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper function used by performEvictions() in order to flush slaves
|
||||
/* Helper function used by performEvictions() in order to flush replicas
|
||||
* output buffers without returning control to the event loop.
|
||||
* This is also called by SHUTDOWN for a best-effort attempt to send
|
||||
* slaves the latest writes. */
|
||||
void flushSlavesOutputBuffers(void) {
|
||||
* replicas the latest writes. */
|
||||
void flushReplicasOutputBuffers(void) {
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
|
||||
listRewind(server.slaves, &li);
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *slave = listNodeValue(ln);
|
||||
int can_receive_writes = connHasWriteHandler(slave->conn) || (slave->flags & CLIENT_PENDING_WRITE);
|
||||
client *replica = listNodeValue(ln);
|
||||
int can_receive_writes = connHasWriteHandler(replica->conn) || (replica->flags & CLIENT_PENDING_WRITE);
|
||||
|
||||
/* We don't want to send the pending data to the replica in a few
|
||||
* cases:
|
||||
@ -4023,11 +4024,11 @@ void flushSlavesOutputBuffers(void) {
|
||||
* to send data to the replica in this case, please grep for the
|
||||
* flag for this flag.
|
||||
*
|
||||
* 3. Obviously if the slave is not ONLINE.
|
||||
* 3. Obviously if the replica is not ONLINE.
|
||||
*/
|
||||
if (slave->replstate == SLAVE_STATE_ONLINE && !(slave->flags & CLIENT_CLOSE_ASAP) && can_receive_writes &&
|
||||
!slave->repl_start_cmd_stream_on_ack && clientHasPendingReplies(slave)) {
|
||||
writeToClient(slave, 0);
|
||||
if (replica->repl_state == REPLICA_STATE_ONLINE && !(replica->flags & CLIENT_CLOSE_ASAP) &&
|
||||
can_receive_writes && !replica->repl_start_cmd_stream_on_ack && clientHasPendingReplies(replica)) {
|
||||
writeToClient(replica, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4139,7 +4140,7 @@ uint32_t isPausedActionsWithUpdate(uint32_t actions_bitmask) {
|
||||
/* This function is called by the server in order to process a few events from
|
||||
* time to time while blocked into some not interruptible operation.
|
||||
* This allows to reply to clients with the -LOADING error while loading the
|
||||
* data set at startup or after a full resynchronization with the master
|
||||
* data set at startup or after a full resynchronization with the primary
|
||||
* and so forth.
|
||||
*
|
||||
* It calls the event loop in order to process a few events. Specifically we
|
||||
@ -4403,7 +4404,7 @@ int handleClientsWithPendingWritesUsingThreads(void) {
|
||||
* buffer, to guarantee data accessing thread safe, we must put all
|
||||
* replicas client into io_threads_list[0] i.e. main thread handles
|
||||
* sending the output buffer of all replicas. */
|
||||
if (getClientType(c) == CLIENT_TYPE_SLAVE) {
|
||||
if (getClientType(c) == CLIENT_TYPE_REPLICA) {
|
||||
listAddNodeTail(io_threads_list[0], c);
|
||||
continue;
|
||||
}
|
||||
@ -4469,7 +4470,7 @@ int handleClientsWithPendingWritesUsingThreads(void) {
|
||||
* pending read clients and flagged as such. */
|
||||
int postponeClientRead(client *c) {
|
||||
if (server.io_threads_active && server.io_threads_do_reads && !ProcessingEventsWhileBlocked &&
|
||||
!(c->flags & (CLIENT_MASTER | CLIENT_SLAVE | CLIENT_BLOCKED)) && io_threads_op == IO_THREADS_OP_IDLE) {
|
||||
!(c->flags & (CLIENT_PRIMARY | CLIENT_REPLICA | CLIENT_BLOCKED)) && io_threads_op == IO_THREADS_OP_IDLE) {
|
||||
listAddNodeHead(server.clients_pending_read, c);
|
||||
c->pending_read_list_node = listFirst(server.clients_pending_read);
|
||||
return 1;
|
||||
|
26
src/object.c
26
src/object.c
@ -1173,11 +1173,11 @@ struct serverMemOverhead *getMemoryOverheadData(void) {
|
||||
* only if replication buffer memory is more than the repl backlog setting,
|
||||
* we consider the excess as replicas' memory. Otherwise, replication buffer
|
||||
* memory is the consumption of repl backlog. */
|
||||
if (listLength(server.slaves) && (long long)server.repl_buffer_mem > server.repl_backlog_size) {
|
||||
mh->clients_slaves = server.repl_buffer_mem - server.repl_backlog_size;
|
||||
if (listLength(server.replicas) && (long long)server.repl_buffer_mem > server.repl_backlog_size) {
|
||||
mh->clients_replicas = server.repl_buffer_mem - server.repl_backlog_size;
|
||||
mh->repl_backlog = server.repl_backlog_size;
|
||||
} else {
|
||||
mh->clients_slaves = 0;
|
||||
mh->clients_replicas = 0;
|
||||
mh->repl_backlog = server.repl_buffer_mem;
|
||||
}
|
||||
if (server.repl_backlog) {
|
||||
@ -1186,12 +1186,12 @@ struct serverMemOverhead *getMemoryOverheadData(void) {
|
||||
raxSize(server.repl_backlog->blocks_index) * sizeof(void *);
|
||||
}
|
||||
mem_total += mh->repl_backlog;
|
||||
mem_total += mh->clients_slaves;
|
||||
mem_total += mh->clients_replicas;
|
||||
|
||||
/* Computing the memory used by the clients would be O(N) if done
|
||||
* here online. We use our values computed incrementally by
|
||||
* updateClientMemoryUsage(). */
|
||||
mh->clients_normal = server.stat_clients_type_memory[CLIENT_TYPE_MASTER] +
|
||||
mh->clients_normal = server.stat_clients_type_memory[CLIENT_TYPE_PRIMARY] +
|
||||
server.stat_clients_type_memory[CLIENT_TYPE_PUBSUB] +
|
||||
server.stat_clients_type_memory[CLIENT_TYPE_NORMAL];
|
||||
mem_total += mh->clients_normal;
|
||||
@ -1271,7 +1271,7 @@ sds getMemoryDoctorReport(void) {
|
||||
int high_alloc_frag = 0; /* High allocator fragmentation. */
|
||||
int high_proc_rss = 0; /* High process rss overhead. */
|
||||
int high_alloc_rss = 0; /* High rss overhead. */
|
||||
int big_slave_buf = 0; /* Slave buffers are too big. */
|
||||
int big_replica_buf = 0; /* Replica buffers are too big. */
|
||||
int big_client_buf = 0; /* Client buffers are too big. */
|
||||
int many_scripts = 0; /* Script cache has too many scripts. */
|
||||
int num_reports = 0;
|
||||
@ -1312,16 +1312,16 @@ sds getMemoryDoctorReport(void) {
|
||||
}
|
||||
|
||||
/* Clients using more than 200k each average? */
|
||||
long numslaves = listLength(server.slaves);
|
||||
long numclients = listLength(server.clients) - numslaves;
|
||||
long num_replicas = listLength(server.replicas);
|
||||
long numclients = listLength(server.clients) - num_replicas;
|
||||
if (mh->clients_normal / numclients > (1024 * 200)) {
|
||||
big_client_buf = 1;
|
||||
num_reports++;
|
||||
}
|
||||
|
||||
/* Slaves using more than 10 MB each? */
|
||||
if (numslaves > 0 && mh->clients_slaves > (1024 * 1024 * 10)) {
|
||||
big_slave_buf = 1;
|
||||
/* Replicas using more than 10 MB each? */
|
||||
if (num_replicas > 0 && mh->clients_replicas > (1024 * 1024 * 10)) {
|
||||
big_replica_buf = 1;
|
||||
num_reports++;
|
||||
}
|
||||
|
||||
@ -1386,7 +1386,7 @@ sds getMemoryDoctorReport(void) {
|
||||
"1.1 (this means that the Resident Set Size of the Valkey process is much larger than the RSS the "
|
||||
"allocator holds). This problem may be due to Lua scripts or Modules.\n\n");
|
||||
}
|
||||
if (big_slave_buf) {
|
||||
if (big_replica_buf) {
|
||||
s = sdscat(s,
|
||||
" * Big replica buffers: The replica output buffers in this instance are greater than 10MB for "
|
||||
"each replica (on average). This likely means that there is some replica instance that is "
|
||||
@ -1579,7 +1579,7 @@ NULL
|
||||
addReplyLongLong(c, mh->repl_backlog);
|
||||
|
||||
addReplyBulkCString(c, "clients.slaves");
|
||||
addReplyLongLong(c, mh->clients_slaves);
|
||||
addReplyLongLong(c, mh->clients_replicas);
|
||||
|
||||
addReplyBulkCString(c, "clients.normal");
|
||||
addReplyLongLong(c, mh->clients_normal);
|
||||
|
110
src/rdb.c
110
src/rdb.c
@ -1183,7 +1183,7 @@ int rdbSaveInfoAuxFields(rio *rdb, int rdbflags, rdbSaveInfo *rsi) {
|
||||
if (rsi) {
|
||||
if (rdbSaveAuxFieldStrInt(rdb, "repl-stream-db", rsi->repl_stream_db) == -1) return -1;
|
||||
if (rdbSaveAuxFieldStrStr(rdb, "repl-id", server.replid) == -1) return -1;
|
||||
if (rdbSaveAuxFieldStrInt(rdb, "repl-offset", server.master_repl_offset) == -1) return -1;
|
||||
if (rdbSaveAuxFieldStrInt(rdb, "repl-offset", server.primary_repl_offset) == -1) return -1;
|
||||
}
|
||||
if (rdbSaveAuxFieldStrInt(rdb, "aof-base", aof_base) == -1) return -1;
|
||||
return 1;
|
||||
@ -1369,19 +1369,19 @@ int rdbSaveRio(int req, rio *rdb, int *error, int rdbflags, rdbSaveInfo *rsi) {
|
||||
snprintf(magic, sizeof(magic), "REDIS%04d", RDB_VERSION);
|
||||
if (rdbWriteRaw(rdb, magic, 9) == -1) goto werr;
|
||||
if (rdbSaveInfoAuxFields(rdb, rdbflags, rsi) == -1) goto werr;
|
||||
if (!(req & SLAVE_REQ_RDB_EXCLUDE_DATA) && rdbSaveModulesAux(rdb, VALKEYMODULE_AUX_BEFORE_RDB) == -1) goto werr;
|
||||
if (!(req & REPLICA_REQ_RDB_EXCLUDE_DATA) && rdbSaveModulesAux(rdb, VALKEYMODULE_AUX_BEFORE_RDB) == -1) goto werr;
|
||||
|
||||
/* save functions */
|
||||
if (!(req & SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS) && rdbSaveFunctions(rdb) == -1) goto werr;
|
||||
if (!(req & REPLICA_REQ_RDB_EXCLUDE_FUNCTIONS) && rdbSaveFunctions(rdb) == -1) goto werr;
|
||||
|
||||
/* save all databases, skip this if we're in functions-only mode */
|
||||
if (!(req & SLAVE_REQ_RDB_EXCLUDE_DATA)) {
|
||||
if (!(req & REPLICA_REQ_RDB_EXCLUDE_DATA)) {
|
||||
for (j = 0; j < server.dbnum; j++) {
|
||||
if (rdbSaveDb(rdb, j, rdbflags, &key_counter) == -1) goto werr;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(req & SLAVE_REQ_RDB_EXCLUDE_DATA) && rdbSaveModulesAux(rdb, VALKEYMODULE_AUX_AFTER_RDB) == -1) goto werr;
|
||||
if (!(req & REPLICA_REQ_RDB_EXCLUDE_DATA) && rdbSaveModulesAux(rdb, VALKEYMODULE_AUX_AFTER_RDB) == -1) goto werr;
|
||||
|
||||
/* EOF opcode */
|
||||
if (rdbSaveType(rdb, RDB_OPCODE_EOF) == -1) goto werr;
|
||||
@ -1495,7 +1495,7 @@ werr:
|
||||
int rdbSaveToFile(const char *filename) {
|
||||
startSaving(RDBFLAGS_NONE);
|
||||
|
||||
if (rdbSaveInternal(SLAVE_REQ_NONE, filename, NULL, RDBFLAGS_NONE) != C_OK) {
|
||||
if (rdbSaveInternal(REPLICA_REQ_NONE, filename, NULL, RDBFLAGS_NONE) != C_OK) {
|
||||
int saved_errno = errno;
|
||||
stopSaving(0);
|
||||
errno = saved_errno;
|
||||
@ -1816,8 +1816,8 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
|
||||
int deep_integrity_validation = server.sanitize_dump_payload == SANITIZE_DUMP_YES;
|
||||
if (server.sanitize_dump_payload == SANITIZE_DUMP_CLIENTS) {
|
||||
/* Skip sanitization when loading (an RDB), or getting a RESTORE command
|
||||
* from either the master or a client using an ACL user with the skip-sanitize-payload flag. */
|
||||
int skip = server.loading || (server.current_client && (server.current_client->flags & CLIENT_MASTER));
|
||||
* from either the primary or a client using an ACL user with the skip-sanitize-payload flag. */
|
||||
int skip = server.loading || (server.current_client && (server.current_client->flags & CLIENT_PRIMARY));
|
||||
if (!skip && server.current_client && server.current_client->user)
|
||||
skip = !!(server.current_client->user->flags & USER_FLAG_SANITIZE_PAYLOAD_SKIP);
|
||||
deep_integrity_validation = !skip;
|
||||
@ -2434,12 +2434,12 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error) {
|
||||
}
|
||||
|
||||
while (listpacks--) {
|
||||
/* Get the master ID, the one we'll use as key of the radix tree
|
||||
/* Get the primary ID, the one we'll use as key of the radix tree
|
||||
* node: the entries inside the listpack itself are delta-encoded
|
||||
* relatively to this ID. */
|
||||
sds nodekey = rdbGenericLoadStringObject(rdb, RDB_LOAD_SDS, NULL);
|
||||
if (nodekey == NULL) {
|
||||
rdbReportReadError("Stream master ID loading failed: invalid encoding or I/O error.");
|
||||
rdbReportReadError("Stream primary ID loading failed: invalid encoding or I/O error.");
|
||||
decrRefCount(o);
|
||||
return NULL;
|
||||
}
|
||||
@ -2883,7 +2883,7 @@ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) {
|
||||
if (server.loading_process_events_interval_bytes &&
|
||||
(r->processed_bytes + len) / server.loading_process_events_interval_bytes >
|
||||
r->processed_bytes / server.loading_process_events_interval_bytes) {
|
||||
if (server.masterhost && server.repl_state == REPL_STATE_TRANSFER) replicationSendNewlineToMaster();
|
||||
if (server.primary_host && server.repl_state == REPL_STATE_TRANSFER) replicationSendNewlineToPrimary();
|
||||
loadingAbsProgress(r->processed_bytes);
|
||||
processEventsWhileBlocked();
|
||||
processModuleLoadingProgressEvent(0);
|
||||
@ -3197,9 +3197,9 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
|
||||
|
||||
/* Check if the key already expired. This function is used when loading
|
||||
* an RDB file from disk, either at startup, or when an RDB was
|
||||
* received from the master. In the latter case, the master is
|
||||
* received from the primary. In the latter case, the primary is
|
||||
* responsible for key expiry. If we would expire keys here, the
|
||||
* snapshot taken by the master may not be reflected on the slave.
|
||||
* snapshot taken by the primary may not be reflected on the replica.
|
||||
* Similarly, if the base AOF is RDB format, we want to load all
|
||||
* the keys they are, since the log of operations in the incr AOF
|
||||
* is assumed to work in the exact keyspace state. */
|
||||
@ -3215,18 +3215,18 @@ int rdbLoadRioWithLoadingCtx(rio *rdb, int rdbflags, rdbSaveInfo *rsi, rdbLoadin
|
||||
sdsfree(key);
|
||||
goto eoferr;
|
||||
}
|
||||
} else if (iAmMaster() && !(rdbflags & RDBFLAGS_AOF_PREAMBLE) && expiretime != -1 && expiretime < now) {
|
||||
} else if (iAmPrimary() && !(rdbflags & RDBFLAGS_AOF_PREAMBLE) && expiretime != -1 && expiretime < now) {
|
||||
if (rdbflags & RDBFLAGS_FEED_REPL) {
|
||||
/* Caller should have created replication backlog,
|
||||
* and now this path only works when rebooting,
|
||||
* so we don't have replicas yet. */
|
||||
serverAssert(server.repl_backlog != NULL && listLength(server.slaves) == 0);
|
||||
serverAssert(server.repl_backlog != NULL && listLength(server.replicas) == 0);
|
||||
robj keyobj;
|
||||
initStaticStringObject(keyobj, key);
|
||||
robj *argv[2];
|
||||
argv[0] = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
|
||||
argv[1] = &keyobj;
|
||||
replicationFeedSlaves(dbid, argv, 2);
|
||||
replicationFeedReplicas(dbid, argv, 2);
|
||||
}
|
||||
sdsfree(key);
|
||||
decrRefCount(val);
|
||||
@ -3378,7 +3378,7 @@ static void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal, time_t sav
|
||||
}
|
||||
|
||||
/* A background saving child (BGSAVE) terminated its work. Handle this.
|
||||
* This function covers the case of RDB -> Slaves socket transfers for
|
||||
* This function covers the case of RDB -> Replicas socket transfers for
|
||||
* diskless replication. */
|
||||
static void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
|
||||
if (!bysignal && exitcode == 0) {
|
||||
@ -3416,9 +3416,9 @@ void backgroundSaveDoneHandler(int exitcode, int bysignal) {
|
||||
server.rdb_child_type = RDB_CHILD_TYPE_NONE;
|
||||
server.rdb_save_time_last = save_end - server.rdb_save_time_start;
|
||||
server.rdb_save_time_start = -1;
|
||||
/* Possibly there are slaves waiting for a BGSAVE in order to be served
|
||||
/* Possibly there are replicas waiting for a BGSAVE in order to be served
|
||||
* (the first stage of SYNC is a bulk transfer of dump.rdb) */
|
||||
updateSlavesWaitingBgsave((!bysignal && exitcode == 0) ? C_OK : C_ERR, type);
|
||||
updateReplicasWaitingBgsave((!bysignal && exitcode == 0) ? C_OK : C_ERR, type);
|
||||
}
|
||||
|
||||
/* Kill the RDB saving child using SIGUSR1 (so that the parent will know
|
||||
@ -3434,9 +3434,9 @@ void killRDBChild(void) {
|
||||
* - rdbRemoveTempFile */
|
||||
}
|
||||
|
||||
/* Spawn an RDB child that writes the RDB to the sockets of the slaves
|
||||
* that are currently in SLAVE_STATE_WAIT_BGSAVE_START state. */
|
||||
int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
|
||||
/* Spawn an RDB child that writes the RDB to the sockets of the replicas
|
||||
* that are currently in REPLICA_STATE_WAIT_BGSAVE_START state. */
|
||||
int rdbSaveToReplicasSockets(int req, rdbSaveInfo *rsi) {
|
||||
listNode *ln;
|
||||
listIter li;
|
||||
pid_t childpid;
|
||||
@ -3468,17 +3468,17 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
|
||||
|
||||
/* Collect the connections of the replicas we want to transfer
|
||||
* the RDB to, which are i WAIT_BGSAVE_START state. */
|
||||
server.rdb_pipe_conns = zmalloc(sizeof(connection *) * listLength(server.slaves));
|
||||
server.rdb_pipe_conns = zmalloc(sizeof(connection *) * listLength(server.replicas));
|
||||
server.rdb_pipe_numconns = 0;
|
||||
server.rdb_pipe_numconns_writing = 0;
|
||||
listRewind(server.slaves, &li);
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *slave = ln->value;
|
||||
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
|
||||
/* Check slave has the exact requirements */
|
||||
if (slave->slave_req != req) continue;
|
||||
server.rdb_pipe_conns[server.rdb_pipe_numconns++] = slave->conn;
|
||||
replicationSetupSlaveForFullResync(slave, getPsyncInitialOffset());
|
||||
client *replica = ln->value;
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_START) {
|
||||
/* Check replica has the exact requirements */
|
||||
if (replica->replica_req != req) continue;
|
||||
server.rdb_pipe_conns[server.rdb_pipe_numconns++] = replica->conn;
|
||||
replicationSetupReplicaForFullResync(replica, getPsyncInitialOffset());
|
||||
}
|
||||
}
|
||||
|
||||
@ -3522,13 +3522,13 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
|
||||
serverLog(LL_WARNING, "Can't save in background: fork: %s", strerror(errno));
|
||||
|
||||
/* Undo the state change. The caller will perform cleanup on
|
||||
* all the slaves in BGSAVE_START state, but an early call to
|
||||
* replicationSetupSlaveForFullResync() turned it into BGSAVE_END */
|
||||
listRewind(server.slaves, &li);
|
||||
* all the replicas in BGSAVE_START state, but an early call to
|
||||
* replicationSetupReplicaForFullResync() turned it into BGSAVE_END */
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *slave = ln->value;
|
||||
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
|
||||
slave->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
|
||||
client *replica = ln->value;
|
||||
if (replica->repl_state == REPLICA_STATE_WAIT_BGSAVE_END) {
|
||||
replica->repl_state = REPLICA_STATE_WAIT_BGSAVE_START;
|
||||
}
|
||||
}
|
||||
close(rdb_pipe_write);
|
||||
@ -3563,7 +3563,7 @@ void saveCommand(client *c) {
|
||||
|
||||
rdbSaveInfo rsi, *rsiptr;
|
||||
rsiptr = rdbPopulateSaveInfo(&rsi);
|
||||
if (rdbSave(SLAVE_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE) == C_OK) {
|
||||
if (rdbSave(REPLICA_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE) == C_OK) {
|
||||
addReply(c, shared.ok);
|
||||
} else {
|
||||
addReplyErrorObject(c, shared.err);
|
||||
@ -3599,7 +3599,7 @@ void bgsaveCommand(client *c) {
|
||||
"Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever "
|
||||
"possible.");
|
||||
}
|
||||
} else if (rdbSaveBackground(SLAVE_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE) == C_OK) {
|
||||
} else if (rdbSaveBackground(REPLICA_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE) == C_OK) {
|
||||
addReplyStatus(c, "Background saving started");
|
||||
} else {
|
||||
addReplyErrorObject(c, shared.err);
|
||||
@ -3608,48 +3608,48 @@ void bgsaveCommand(client *c) {
|
||||
|
||||
/* Populate the rdbSaveInfo structure used to persist the replication
|
||||
* information inside the RDB file. Currently the structure explicitly
|
||||
* contains just the currently selected DB from the master stream, however
|
||||
* contains just the currently selected DB from the primary stream, however
|
||||
* if the rdbSave*() family functions receive a NULL rsi structure also
|
||||
* the Replication ID/offset is not saved. The function populates 'rsi'
|
||||
* that is normally stack-allocated in the caller, returns the populated
|
||||
* pointer if the instance has a valid master client, otherwise NULL
|
||||
* pointer if the instance has a valid primary client, otherwise NULL
|
||||
* is returned, and the RDB saving will not persist any replication related
|
||||
* information. */
|
||||
rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
|
||||
rdbSaveInfo rsi_init = RDB_SAVE_INFO_INIT;
|
||||
*rsi = rsi_init;
|
||||
|
||||
/* If the instance is a master, we can populate the replication info
|
||||
/* If the instance is a primary, we can populate the replication info
|
||||
* only when repl_backlog is not NULL. If the repl_backlog is NULL,
|
||||
* it means that the instance isn't in any replication chains. In this
|
||||
* scenario the replication info is useless, because when a slave
|
||||
* scenario the replication info is useless, because when a replica
|
||||
* connects to us, the NULL repl_backlog will trigger a full
|
||||
* synchronization, at the same time we will use a new replid and clear
|
||||
* replid2. */
|
||||
if (!server.masterhost && server.repl_backlog) {
|
||||
/* Note that when server.slaveseldb is -1, it means that this master
|
||||
if (!server.primary_host && server.repl_backlog) {
|
||||
/* Note that when server.replicas_eldb is -1, it means that this primary
|
||||
* didn't apply any write commands after a full synchronization.
|
||||
* So we can let repl_stream_db be 0, this allows a restarted slave
|
||||
* So we can let repl_stream_db be 0, this allows a restarted replica
|
||||
* to reload replication ID/offset, it's safe because the next write
|
||||
* command must generate a SELECT statement. */
|
||||
rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb;
|
||||
rsi->repl_stream_db = server.replicas_eldb == -1 ? 0 : server.replicas_eldb;
|
||||
return rsi;
|
||||
}
|
||||
|
||||
/* If the instance is a slave we need a connected master
|
||||
/* If the instance is a replica we need a connected primary
|
||||
* in order to fetch the currently selected DB. */
|
||||
if (server.master) {
|
||||
rsi->repl_stream_db = server.master->db->id;
|
||||
if (server.primary) {
|
||||
rsi->repl_stream_db = server.primary->db->id;
|
||||
return rsi;
|
||||
}
|
||||
|
||||
/* If we have a cached master we can use it in order to populate the
|
||||
* replication selected DB info inside the RDB file: the slave can
|
||||
* increment the master_repl_offset only from data arriving from the
|
||||
* master, so if we are disconnected the offset in the cached master
|
||||
/* If we have a cached primary we can use it in order to populate the
|
||||
* replication selected DB info inside the RDB file: the replica can
|
||||
* increment the primary_repl_offset only from data arriving from the
|
||||
* primary, so if we are disconnected the offset in the cached primary
|
||||
* is valid. */
|
||||
if (server.cached_master) {
|
||||
rsi->repl_stream_db = server.cached_master->db->id;
|
||||
if (server.cached_primary) {
|
||||
rsi->repl_stream_db = server.cached_primary->db->id;
|
||||
return rsi;
|
||||
}
|
||||
return NULL;
|
||||
|
@ -154,7 +154,7 @@ int rdbSaveObjectType(rio *rdb, robj *o);
|
||||
int rdbLoadObjectType(rio *rdb);
|
||||
int rdbLoad(char *filename, rdbSaveInfo *rsi, int rdbflags);
|
||||
int rdbSaveBackground(int req, char *filename, rdbSaveInfo *rsi, int rdbflags);
|
||||
int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi);
|
||||
int rdbSaveToReplicasSockets(int req, rdbSaveInfo *rsi);
|
||||
void rdbRemoveTempFile(pid_t childpid, int from_signal);
|
||||
int rdbSaveToFile(const char *filename);
|
||||
int rdbSave(int req, char *filename, rdbSaveInfo *rsi, int rdbflags);
|
||||
|
1480
src/replication.c
1480
src/replication.c
File diff suppressed because it is too large
Load Diff
28
src/script.c
28
src/script.c
@ -48,8 +48,8 @@ static void exitScriptTimedoutMode(scriptRunCtx *run_ctx) {
|
||||
serverAssert(scriptIsTimedout());
|
||||
run_ctx->flags &= ~SCRIPT_TIMEDOUT;
|
||||
blockingOperationEnds();
|
||||
/* if we are a replica and we have an active master, set it for continue processing */
|
||||
if (server.masterhost && server.master) queueClientForReprocessing(server.master);
|
||||
/* if we are a replica and we have an active primary, set it for continue processing */
|
||||
if (server.primary_host && server.primary) queueClientForReprocessing(server.primary);
|
||||
}
|
||||
|
||||
static void enterScriptTimedoutMode(scriptRunCtx *run_ctx) {
|
||||
@ -135,7 +135,7 @@ int scriptPrepareForRun(scriptRunCtx *run_ctx,
|
||||
int client_allow_oom = !!(caller->flags & CLIENT_ALLOW_OOM);
|
||||
|
||||
int running_stale =
|
||||
server.masterhost && server.repl_state != REPL_STATE_CONNECTED && server.repl_serve_stale_data == 0;
|
||||
server.primary_host && server.repl_state != REPL_STATE_CONNECTED && server.repl_serve_stale_data == 0;
|
||||
int obey_client = mustObeyClient(caller);
|
||||
|
||||
if (!(script_flags & SCRIPT_FLAG_EVAL_COMPAT_MODE)) {
|
||||
@ -156,7 +156,7 @@ int scriptPrepareForRun(scriptRunCtx *run_ctx,
|
||||
* 1. we are not a readonly replica
|
||||
* 2. no disk error detected
|
||||
* 3. command is not `fcall_ro`/`eval[sha]_ro` */
|
||||
if (server.masterhost && server.repl_slave_ro && !obey_client) {
|
||||
if (server.primary_host && server.repl_replica_ro && !obey_client) {
|
||||
addReplyError(caller, "-READONLY Can not run script with write flag on readonly replica");
|
||||
return C_ERR;
|
||||
}
|
||||
@ -186,8 +186,8 @@ int scriptPrepareForRun(scriptRunCtx *run_ctx,
|
||||
return C_ERR;
|
||||
}
|
||||
|
||||
/* Don't accept write commands if there are not enough good slaves and
|
||||
* user configured the min-slaves-to-write option. */
|
||||
/* Don't accept write commands if there are not enough good replicas and
|
||||
* user configured the min-replicas-to-write option. */
|
||||
if (!checkGoodReplicasStatus()) {
|
||||
addReplyErrorObject(caller, shared.noreplicaserr);
|
||||
return C_ERR;
|
||||
@ -206,7 +206,7 @@ int scriptPrepareForRun(scriptRunCtx *run_ctx,
|
||||
} else {
|
||||
/* Special handling for backwards compatibility (no shebang eval[sha]) mode */
|
||||
if (running_stale) {
|
||||
addReplyErrorObject(caller, shared.masterdownerr);
|
||||
addReplyErrorObject(caller, shared.primarydownerr);
|
||||
return C_ERR;
|
||||
}
|
||||
}
|
||||
@ -367,13 +367,13 @@ static int scriptVerifyWriteCommandAllow(scriptRunCtx *run_ctx, char **err) {
|
||||
* fail it on unpredictable error state. */
|
||||
if ((run_ctx->flags & SCRIPT_WRITE_DIRTY)) return C_OK;
|
||||
|
||||
/* Write commands are forbidden against read-only slaves, or if a
|
||||
/* Write commands are forbidden against read-only replicas, or if a
|
||||
* command marked as non-deterministic was already called in the context
|
||||
* of this script. */
|
||||
int deny_write_type = writeCommandsDeniedByDiskError();
|
||||
|
||||
if (server.masterhost && server.repl_slave_ro && !mustObeyClient(run_ctx->original_client)) {
|
||||
*err = sdsdup(shared.roslaveerr->ptr);
|
||||
if (server.primary_host && server.repl_replica_ro && !mustObeyClient(run_ctx->original_client)) {
|
||||
*err = sdsdup(shared.roreplicaerr->ptr);
|
||||
return C_ERR;
|
||||
}
|
||||
|
||||
@ -382,8 +382,8 @@ static int scriptVerifyWriteCommandAllow(scriptRunCtx *run_ctx, char **err) {
|
||||
return C_ERR;
|
||||
}
|
||||
|
||||
/* Don't accept write commands if there are not enough good slaves and
|
||||
* user configured the min-slaves-to-write option. Note this only reachable
|
||||
/* Don't accept write commands if there are not enough good replicas and
|
||||
* user configured the min-replicas-to-write option. Note this only reachable
|
||||
* for Eval scripts that didn't declare flags, see the other check in
|
||||
* scriptPrepareForRun */
|
||||
if (!checkGoodReplicasStatus()) {
|
||||
@ -423,7 +423,7 @@ static int scriptVerifyClusterState(scriptRunCtx *run_ctx, client *c, client *or
|
||||
}
|
||||
/* If this is a Cluster node, we need to make sure the script is not
|
||||
* trying to access non-local keys, with the exception of commands
|
||||
* received from our master or when loading the AOF back in memory. */
|
||||
* received from our primary or when loading the AOF back in memory. */
|
||||
int error_code;
|
||||
/* Duplicate relevant flags in the script client. */
|
||||
c->flags &= ~(CLIENT_READONLY | CLIENT_ASKING);
|
||||
@ -498,7 +498,7 @@ int scriptSetRepl(scriptRunCtx *run_ctx, int repl) {
|
||||
}
|
||||
|
||||
static int scriptVerifyAllowStale(client *c, sds *err) {
|
||||
if (!server.masterhost) {
|
||||
if (!server.primary_host) {
|
||||
/* Not a replica, stale is irrelevant */
|
||||
return C_OK;
|
||||
}
|
||||
|
@ -1074,7 +1074,7 @@ static int luaRedisStatusReplyCommand(lua_State *lua) {
|
||||
/* server.set_repl()
|
||||
*
|
||||
* Set the propagation of write commands executed in the context of the
|
||||
* script to on/off for AOF and slaves. */
|
||||
* script to on/off for AOF and replicas. */
|
||||
static int luaRedisSetReplCommand(lua_State *lua) {
|
||||
int flags, argc = lua_gettop(lua);
|
||||
|
||||
|
1587
src/sentinel.c
1587
src/sentinel.c
File diff suppressed because it is too large
Load Diff
276
src/server.c
276
src/server.c
@ -140,7 +140,7 @@ void serverLogRaw(int level, const char *msg) {
|
||||
} else if (pid != server.pid) {
|
||||
role_char = 'C'; /* RDB / AOF writing child. */
|
||||
} else {
|
||||
role_char = (server.masterhost ? 'S' : 'M'); /* Slave or Master. */
|
||||
role_char = (server.primary_host ? 'S' : 'M'); /* replica or Primary. */
|
||||
}
|
||||
fprintf(fp, "%d:%c %s %c %s\n", (int)getpid(), role_char, buf, c[level], msg);
|
||||
}
|
||||
@ -240,7 +240,7 @@ mstime_t commandTimeSnapshot(void) {
|
||||
* This is specifically important in the context of scripts, where we
|
||||
* pretend that time freezes. This way a key can expire only the first time
|
||||
* it is accessed and not in the middle of the script execution, making
|
||||
* propagation to slaves / AOF consistent. See issue #1525 for more info.
|
||||
* propagation to replicas / AOF consistent. See issue #1525 for more info.
|
||||
* Note that we cannot use the cached server.mstime because it can change
|
||||
* in processEventsWhileBlocked etc. */
|
||||
return server.cmd_time_snapshot;
|
||||
@ -700,7 +700,7 @@ int clientsCronResizeQueryBuffer(client *c) {
|
||||
/* If the client query buffer is NULL, it is using the shared query buffer and there is nothing to do. */
|
||||
if (c->querybuf == NULL) return 0;
|
||||
size_t querybuf_size = sdsalloc(c->querybuf);
|
||||
time_t idletime = server.unixtime - c->lastinteraction;
|
||||
time_t idletime = server.unixtime - c->last_interaction;
|
||||
|
||||
/* Only resize the query buffer if the buffer is actually wasting at least a
|
||||
* few kbytes */
|
||||
@ -709,8 +709,8 @@ int clientsCronResizeQueryBuffer(client *c) {
|
||||
if (idletime > 2) {
|
||||
/* 1) Query is idle for a long time. */
|
||||
size_t remaining = sdslen(c->querybuf) - c->qb_pos;
|
||||
if (!(c->flags & CLIENT_MASTER) && !remaining) {
|
||||
/* If the client is not a master and no data is pending,
|
||||
if (!(c->flags & CLIENT_PRIMARY) && !remaining) {
|
||||
/* If the client is not a primary and no data is pending,
|
||||
* The client can safely use the shared query buffer in the next read - free the client's querybuf. */
|
||||
sdsfree(c->querybuf);
|
||||
/* By setting the querybuf to NULL, the client will use the shared query buffer in the next read.
|
||||
@ -893,7 +893,7 @@ void removeClientFromMemUsageBucket(client *c, int allow_eviction) {
|
||||
* together clients consuming about the same amount of memory and can quickly
|
||||
* free them in case we reach maxmemory-clients (client eviction).
|
||||
*
|
||||
* Note: This function filters clients of type no-evict, master or replica regardless
|
||||
* Note: This function filters clients of type no-evict, primary or replica regardless
|
||||
* of whether the eviction is enabled or not, so the memory usage we get from these
|
||||
* types of clients via the INFO command may be out of date.
|
||||
*
|
||||
@ -1018,13 +1018,13 @@ void clientsCron(void) {
|
||||
* incrementally in the databases, such as active key expiring, resizing,
|
||||
* rehashing. */
|
||||
void databasesCron(void) {
|
||||
/* Expire keys by random sampling. Not required for slaves
|
||||
* as master will synthesize DELs for us. */
|
||||
/* Expire keys by random sampling. Not required for replicas
|
||||
* as primary will synthesize DELs for us. */
|
||||
if (server.active_expire_enabled) {
|
||||
if (iAmMaster()) {
|
||||
if (iAmPrimary()) {
|
||||
activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
|
||||
} else {
|
||||
expireSlaveKeys();
|
||||
expireReplicaKeys();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1328,7 +1328,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
|
||||
if (!server.sentinel_mode) {
|
||||
run_with_period(5000) {
|
||||
serverLog(LL_DEBUG, "%lu clients connected (%lu replicas), %zu bytes in use",
|
||||
listLength(server.clients) - listLength(server.slaves), listLength(server.slaves),
|
||||
listLength(server.clients) - listLength(server.replicas), listLength(server.replicas),
|
||||
zmalloc_used_memory());
|
||||
}
|
||||
}
|
||||
@ -1365,7 +1365,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
|
||||
serverLog(LL_NOTICE, "%d changes in %d seconds. Saving...", sp->changes, (int)sp->seconds);
|
||||
rdbSaveInfo rsi, *rsiptr;
|
||||
rsiptr = rdbPopulateSaveInfo(&rsi);
|
||||
rdbSaveBackground(SLAVE_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE);
|
||||
rdbSaveBackground(REPLICA_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1405,7 +1405,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
|
||||
/* Clear the paused actions state if needed. */
|
||||
updatePausedActions();
|
||||
|
||||
/* Replication cron function -- used to reconnect to master,
|
||||
/* Replication cron function -- used to reconnect to primary,
|
||||
* detect transfer failures, start background RDB transfers and so forth.
|
||||
*
|
||||
* If the server is trying to failover then run the replication cron faster so
|
||||
@ -1449,7 +1449,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
|
||||
(server.unixtime - server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY || server.lastbgsave_status == C_OK)) {
|
||||
rdbSaveInfo rsi, *rsiptr;
|
||||
rsiptr = rdbPopulateSaveInfo(&rsi);
|
||||
if (rdbSaveBackground(SLAVE_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE) == C_OK)
|
||||
if (rdbSaveBackground(REPLICA_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_NONE) == C_OK)
|
||||
server.rdb_bgsave_scheduled = 0;
|
||||
}
|
||||
|
||||
@ -1544,7 +1544,7 @@ static void sendGetackToReplicas(void) {
|
||||
argv[0] = shared.replconf;
|
||||
argv[1] = shared.getack;
|
||||
argv[2] = shared.special_asterick; /* Not used argument. */
|
||||
replicationFeedSlaves(-1, argv, 3);
|
||||
replicationFeedReplicas(-1, argv, 3);
|
||||
}
|
||||
|
||||
extern int ProcessingEventsWhileBlocked;
|
||||
@ -1612,13 +1612,13 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
|
||||
|
||||
/* Run a fast expire cycle (the called function will return
|
||||
* ASAP if a fast cycle is not needed). */
|
||||
if (server.active_expire_enabled && iAmMaster()) activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
|
||||
if (server.active_expire_enabled && iAmPrimary()) activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
|
||||
|
||||
if (moduleCount()) {
|
||||
moduleFireServerEvent(VALKEYMODULE_EVENT_EVENTLOOP, VALKEYMODULE_SUBEVENT_EVENTLOOP_BEFORE_SLEEP, NULL);
|
||||
}
|
||||
|
||||
/* Send all the slaves an ACK request if at least one client blocked
|
||||
/* Send all the replicas an ACK request if at least one client blocked
|
||||
* during the previous event loop iteration. Note that we do this after
|
||||
* processUnblockedClients(), so if there are multiple pipelined WAITs
|
||||
* and the just unblocked WAIT gets blocked again, we don't have to wait
|
||||
@ -1626,10 +1626,10 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
|
||||
*
|
||||
* We also don't send the ACKs while clients are paused, since it can
|
||||
* increment the replication backlog, they'll be sent after the pause
|
||||
* if we are still the master. */
|
||||
if (server.get_ack_from_slaves && !isPausedActionsWithUpdate(PAUSE_ACTION_REPLICA)) {
|
||||
* if we are still the primary. */
|
||||
if (server.get_ack_from_replicas && !isPausedActionsWithUpdate(PAUSE_ACTION_REPLICA)) {
|
||||
sendGetackToReplicas();
|
||||
server.get_ack_from_slaves = 0;
|
||||
server.get_ack_from_replicas = 0;
|
||||
}
|
||||
|
||||
/* We may have received updates from clients about their current offset. NOTE:
|
||||
@ -1818,9 +1818,10 @@ void createSharedObjects(void) {
|
||||
shared.outofrangeerr = createObject(OBJ_STRING, sdsnew("-ERR index out of range\r\n"));
|
||||
shared.noscripterr = createObject(OBJ_STRING, sdsnew("-NOSCRIPT No matching script. Please use EVAL.\r\n"));
|
||||
createSharedObjectsWithCompat();
|
||||
shared.masterdownerr = createObject(
|
||||
shared.primarydownerr = createObject(
|
||||
OBJ_STRING, sdsnew("-MASTERDOWN Link with MASTER is down and replica-serve-stale-data is set to 'no'.\r\n"));
|
||||
shared.roslaveerr = createObject(OBJ_STRING, sdsnew("-READONLY You can't write against a read only replica.\r\n"));
|
||||
shared.roreplicaerr =
|
||||
createObject(OBJ_STRING, sdsnew("-READONLY You can't write against a read only replica.\r\n"));
|
||||
shared.noautherr = createObject(OBJ_STRING, sdsnew("-NOAUTH Authentication required.\r\n"));
|
||||
shared.oomerr = createObject(OBJ_STRING, sdsnew("-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
|
||||
shared.execaborterr =
|
||||
@ -2027,23 +2028,23 @@ void initServerConfig(void) {
|
||||
appendServerSaveParams(60, 10000); /* save after 1 minute and 10000 changes */
|
||||
|
||||
/* Replication related */
|
||||
server.masterhost = NULL;
|
||||
server.masterport = 6379;
|
||||
server.master = NULL;
|
||||
server.cached_master = NULL;
|
||||
server.master_initial_offset = -1;
|
||||
server.primary_host = NULL;
|
||||
server.primary_port = 6379;
|
||||
server.primary = NULL;
|
||||
server.cached_primary = NULL;
|
||||
server.primary_initial_offset = -1;
|
||||
server.repl_state = REPL_STATE_NONE;
|
||||
server.repl_transfer_tmpfile = NULL;
|
||||
server.repl_transfer_fd = -1;
|
||||
server.repl_transfer_s = NULL;
|
||||
server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT;
|
||||
server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
|
||||
server.master_repl_offset = 0;
|
||||
server.primary_repl_offset = 0;
|
||||
server.fsynced_reploff_pending = 0;
|
||||
|
||||
/* Replication partial resync backlog */
|
||||
server.repl_backlog = NULL;
|
||||
server.repl_no_slaves_since = time(NULL);
|
||||
server.repl_no_replicas_since = time(NULL);
|
||||
|
||||
/* Failover related */
|
||||
server.failover_end_time = 0;
|
||||
@ -2144,11 +2145,11 @@ int restartServer(int flags, mstime_t delay) {
|
||||
* to user specified configuration. This is currently implemented on Linux
|
||||
* only.
|
||||
*
|
||||
* A process_class value of -1 implies OOM_CONFIG_MASTER or OOM_CONFIG_REPLICA,
|
||||
* A process_class value of -1 implies OOM_CONFIG_PRIMARY or OOM_CONFIG_REPLICA,
|
||||
* depending on current role.
|
||||
*/
|
||||
int setOOMScoreAdj(int process_class) {
|
||||
if (process_class == -1) process_class = (server.masterhost ? CONFIG_OOM_REPLICA : CONFIG_OOM_MASTER);
|
||||
if (process_class == -1) process_class = (server.primary_host ? CONFIG_OOM_REPLICA : CONFIG_OOM_PRIMARY);
|
||||
|
||||
serverAssert(process_class >= 0 && process_class < CONFIG_OOM_COUNT);
|
||||
|
||||
@ -2541,19 +2542,19 @@ void initServer(void) {
|
||||
server.clients = listCreate();
|
||||
server.clients_index = raxNew();
|
||||
server.clients_to_close = listCreate();
|
||||
server.slaves = listCreate();
|
||||
server.replicas = listCreate();
|
||||
server.monitors = listCreate();
|
||||
server.clients_pending_write = listCreate();
|
||||
server.clients_pending_read = listCreate();
|
||||
server.clients_timeout_table = raxNew();
|
||||
server.replication_allowed = 1;
|
||||
server.slaveseldb = -1; /* Force to emit the first SELECT command. */
|
||||
server.replicas_eldb = -1; /* Force to emit the first SELECT command. */
|
||||
server.unblocked_clients = listCreate();
|
||||
server.ready_keys = listCreate();
|
||||
server.tracking_pending_keys = listCreate();
|
||||
server.pending_push_messages = listCreate();
|
||||
server.clients_waiting_acks = listCreate();
|
||||
server.get_ack_from_slaves = 0;
|
||||
server.get_ack_from_replicas = 0;
|
||||
server.paused_actions = 0;
|
||||
memset(server.client_pause_per_purpose, 0, sizeof(server.client_pause_per_purpose));
|
||||
server.postponed_clients = listCreate();
|
||||
@ -2663,7 +2664,7 @@ void initServer(void) {
|
||||
server.lastbgsave_status = C_OK;
|
||||
server.aof_last_write_status = C_OK;
|
||||
server.aof_last_write_errno = 0;
|
||||
server.repl_good_slaves_count = 0;
|
||||
server.repl_good_replicas_count = 0;
|
||||
server.last_sig_received = 0;
|
||||
|
||||
/* Initiate acl info struct */
|
||||
@ -3147,9 +3148,9 @@ struct serverCommand *lookupCommandOrOriginal(robj **argv, int argc) {
|
||||
return cmd;
|
||||
}
|
||||
|
||||
/* Commands arriving from the master client or AOF client, should never be rejected. */
|
||||
/* Commands arriving from the primary client or AOF client, should never be rejected. */
|
||||
int mustObeyClient(client *c) {
|
||||
return c->id == CLIENT_ID_AOF || c->flags & CLIENT_MASTER;
|
||||
return c->id == CLIENT_ID_AOF || c->flags & CLIENT_PRIMARY;
|
||||
}
|
||||
|
||||
static int shouldPropagate(int target) {
|
||||
@ -3159,14 +3160,14 @@ static int shouldPropagate(int target) {
|
||||
if (server.aof_state != AOF_OFF) return 1;
|
||||
}
|
||||
if (target & PROPAGATE_REPL) {
|
||||
if (server.masterhost == NULL && (server.repl_backlog || listLength(server.slaves) != 0)) return 1;
|
||||
if (server.primary_host == NULL && (server.repl_backlog || listLength(server.replicas) != 0)) return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Propagate the specified command (in the context of the specified database id)
|
||||
* to AOF and Slaves.
|
||||
* to AOF and replicas.
|
||||
*
|
||||
* flags are an xor between:
|
||||
* + PROPAGATE_NONE (no propagation of command at all)
|
||||
@ -3188,7 +3189,7 @@ static void propagateNow(int dbid, robj **argv, int argc, int target) {
|
||||
serverAssert(!(isPausedActions(PAUSE_ACTION_REPLICA) && (!server.client_pause_in_transaction)));
|
||||
|
||||
if (server.aof_state != AOF_OFF && target & PROPAGATE_AOF) feedAppendOnlyFile(dbid, argv, argc);
|
||||
if (target & PROPAGATE_REPL) replicationFeedSlaves(dbid, argv, argc);
|
||||
if (target & PROPAGATE_REPL) replicationFeedReplicas(dbid, argv, argc);
|
||||
}
|
||||
|
||||
/* Used inside commands to schedule the propagation of additional commands
|
||||
@ -3369,7 +3370,7 @@ int incrCommandStatsOnError(struct serverCommand *cmd, int flags) {
|
||||
* CMD_CALL_NONE No flags.
|
||||
* CMD_CALL_PROPAGATE_AOF Append command to AOF if it modified the dataset
|
||||
* or if the client flags are forcing propagation.
|
||||
* CMD_CALL_PROPAGATE_REPL Send command to slaves if it modified the dataset
|
||||
* CMD_CALL_PROPAGATE_REPL Send command to replicas if it modified the dataset
|
||||
* or if the client flags are forcing propagation.
|
||||
* CMD_CALL_PROPAGATE Alias for PROPAGATE_AOF|PROPAGATE_REPL.
|
||||
* CMD_CALL_FULL Alias for SLOWLOG|STATS|PROPAGATE.
|
||||
@ -3382,12 +3383,12 @@ int incrCommandStatsOnError(struct serverCommand *cmd, int flags) {
|
||||
* in the call flags, then the command is propagated even if the
|
||||
* dataset was not affected by the command.
|
||||
* 2. If the client flags CLIENT_PREVENT_REPL_PROP or CLIENT_PREVENT_AOF_PROP
|
||||
* are set, the propagation into AOF or to slaves is not performed even
|
||||
* are set, the propagation into AOF or to replicas is not performed even
|
||||
* if the command modified the dataset.
|
||||
*
|
||||
* Note that regardless of the client flags, if CMD_CALL_PROPAGATE_AOF
|
||||
* or CMD_CALL_PROPAGATE_REPL are not set, then respectively AOF or
|
||||
* slaves propagation will never occur.
|
||||
* replicas propagation will never occur.
|
||||
*
|
||||
* Client flags are modified by the implementation of a given command
|
||||
* using the following API:
|
||||
@ -3428,7 +3429,7 @@ void call(client *c, int flags) {
|
||||
|
||||
/* Call the command. */
|
||||
dirty = server.dirty;
|
||||
long long old_master_repl_offset = server.master_repl_offset;
|
||||
long long old_primary_repl_offset = server.primary_repl_offset;
|
||||
incrCommandStatsOnError(NULL, 0);
|
||||
|
||||
const long long call_timer = ustime();
|
||||
@ -3604,7 +3605,7 @@ void call(client *c, int flags) {
|
||||
|
||||
/* Remember the replication offset of the client, right after its last
|
||||
* command that resulted in propagation. */
|
||||
if (old_master_repl_offset != server.master_repl_offset) c->woff = server.master_repl_offset;
|
||||
if (old_primary_repl_offset != server.primary_repl_offset) c->woff = server.primary_repl_offset;
|
||||
|
||||
/* Client pause takes effect after a transaction has finished. This needs
|
||||
* to be located after everything is propagated. */
|
||||
@ -3853,7 +3854,7 @@ int processCommand(client *c) {
|
||||
|
||||
/* If cluster is enabled perform the cluster redirection here.
|
||||
* However we don't perform the redirection if:
|
||||
* 1) The sender of this command is our master.
|
||||
* 1) The sender of this command is our primary.
|
||||
* 2) The command has no key arguments. */
|
||||
if (server.cluster_enabled && !mustObeyClient(c) &&
|
||||
!(!(c->cmd->flags & CMD_MOVABLE_KEYS) && c->cmd->key_specs_num == 0 && c->cmd->proc != execCommand)) {
|
||||
@ -3896,8 +3897,8 @@ int processCommand(client *c) {
|
||||
* message belongs to the old value of the key before it gets evicted.*/
|
||||
trackingHandlePendingKeyInvalidations();
|
||||
|
||||
/* performEvictions may flush slave output buffers. This may result
|
||||
* in a slave, that may be the active client, to be freed. */
|
||||
/* performEvictions may flush replica output buffers. This may result
|
||||
* in a replica, that may be the active client, to be freed. */
|
||||
if (server.current_client == NULL) return C_ERR;
|
||||
|
||||
if (out_of_memory && is_denyoom_command) {
|
||||
@ -3918,7 +3919,7 @@ int processCommand(client *c) {
|
||||
if (server.tracking_clients) trackingLimitUsedSlots();
|
||||
|
||||
/* Don't accept write commands if there are problems persisting on disk
|
||||
* unless coming from our master, in which case check the replica ignore
|
||||
* unless coming from our primary, in which case check the replica ignore
|
||||
* disk write error config to either log or crash. */
|
||||
int deny_write_type = writeCommandsDeniedByDiskError();
|
||||
if (deny_write_type != DISK_ERROR_TYPE_NONE && (is_write_command || c->cmd->proc == pingCommand)) {
|
||||
@ -3943,17 +3944,17 @@ int processCommand(client *c) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Don't accept write commands if there are not enough good slaves and
|
||||
* user configured the min-slaves-to-write option. */
|
||||
/* Don't accept write commands if there are not enough good replicas and
|
||||
* user configured the min-replicas-to-write option. */
|
||||
if (is_write_command && !checkGoodReplicasStatus()) {
|
||||
rejectCommand(c, shared.noreplicaserr);
|
||||
return C_OK;
|
||||
}
|
||||
|
||||
/* Don't accept write commands if this is a read only slave. But
|
||||
* accept write commands if this is our master. */
|
||||
if (server.masterhost && server.repl_slave_ro && !obey_client && is_write_command) {
|
||||
rejectCommand(c, shared.roslaveerr);
|
||||
/* Don't accept write commands if this is a read only replica. But
|
||||
* accept write commands if this is our primary. */
|
||||
if (server.primary_host && server.repl_replica_ro && !obey_client && is_write_command) {
|
||||
rejectCommand(c, shared.roreplicaerr);
|
||||
return C_OK;
|
||||
}
|
||||
|
||||
@ -3972,10 +3973,10 @@ int processCommand(client *c) {
|
||||
|
||||
/* Only allow commands with flag "t", such as INFO, REPLICAOF and so on,
|
||||
* when replica-serve-stale-data is no and we are a replica with a broken
|
||||
* link with master. */
|
||||
if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED && server.repl_serve_stale_data == 0 &&
|
||||
* link with primary. */
|
||||
if (server.primary_host && server.repl_state != REPL_STATE_CONNECTED && server.repl_serve_stale_data == 0 &&
|
||||
is_denystale_command) {
|
||||
rejectCommand(c, shared.masterdownerr);
|
||||
rejectCommand(c, shared.primarydownerr);
|
||||
return C_OK;
|
||||
}
|
||||
|
||||
@ -4015,15 +4016,15 @@ int processCommand(client *c) {
|
||||
/* Prevent a replica from sending commands that access the keyspace.
|
||||
* The main objective here is to prevent abuse of client pause check
|
||||
* from which replicas are exempt. */
|
||||
if ((c->flags & CLIENT_SLAVE) && (is_may_replicate_command || is_write_command || is_read_command)) {
|
||||
if ((c->flags & CLIENT_REPLICA) && (is_may_replicate_command || is_write_command || is_read_command)) {
|
||||
rejectCommandFormat(c, "Replica can't interact with the keyspace");
|
||||
return C_OK;
|
||||
}
|
||||
|
||||
/* If the server is paused, block the client until
|
||||
* the pause has ended. Replicas are never paused. */
|
||||
if (!(c->flags & CLIENT_SLAVE) && ((isPausedActions(PAUSE_ACTION_CLIENT_ALL)) ||
|
||||
((isPausedActions(PAUSE_ACTION_CLIENT_WRITE)) && is_may_replicate_command))) {
|
||||
if (!(c->flags & CLIENT_REPLICA) && ((isPausedActions(PAUSE_ACTION_CLIENT_ALL)) ||
|
||||
((isPausedActions(PAUSE_ACTION_CLIENT_WRITE)) && is_may_replicate_command))) {
|
||||
blockPostponeClient(c);
|
||||
return C_OK;
|
||||
}
|
||||
@ -4179,14 +4180,14 @@ static inline int isShutdownInitiated(void) {
|
||||
* need to wait for before shutting down. Returns 1 if we're ready to shut
|
||||
* down now. */
|
||||
int isReadyToShutdown(void) {
|
||||
if (listLength(server.slaves) == 0) return 1; /* No replicas. */
|
||||
if (listLength(server.replicas) == 0) return 1; /* No replicas. */
|
||||
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
listRewind(server.slaves, &li);
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
client *replica = listNodeValue(ln);
|
||||
if (replica->repl_ack_off != server.master_repl_offset) return 0;
|
||||
if (replica->repl_ack_off != server.primary_repl_offset) return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
@ -4228,16 +4229,16 @@ int finishShutdown(void) {
|
||||
listIter replicas_iter;
|
||||
listNode *replicas_list_node;
|
||||
int num_replicas = 0, num_lagging_replicas = 0;
|
||||
listRewind(server.slaves, &replicas_iter);
|
||||
listRewind(server.replicas, &replicas_iter);
|
||||
while ((replicas_list_node = listNext(&replicas_iter)) != NULL) {
|
||||
client *replica = listNodeValue(replicas_list_node);
|
||||
num_replicas++;
|
||||
if (replica->repl_ack_off != server.master_repl_offset) {
|
||||
if (replica->repl_ack_off != server.primary_repl_offset) {
|
||||
num_lagging_replicas++;
|
||||
long lag = replica->replstate == SLAVE_STATE_ONLINE ? time(NULL) - replica->repl_ack_time : 0;
|
||||
long lag = replica->repl_state == REPLICA_STATE_ONLINE ? time(NULL) - replica->repl_ack_time : 0;
|
||||
serverLog(LL_NOTICE, "Lagging replica %s reported offset %lld behind master, lag=%ld, state=%s.",
|
||||
replicationGetSlaveName(replica), server.master_repl_offset - replica->repl_ack_off, lag,
|
||||
replstateToString(replica->replstate));
|
||||
replicationGetReplicaName(replica), server.primary_repl_offset - replica->repl_ack_off, lag,
|
||||
replstateToString(replica->repl_state));
|
||||
}
|
||||
}
|
||||
if (num_replicas > 0) {
|
||||
@ -4305,11 +4306,11 @@ int finishShutdown(void) {
|
||||
rdbSaveInfo rsi, *rsiptr;
|
||||
rsiptr = rdbPopulateSaveInfo(&rsi);
|
||||
/* Keep the page cache since it's likely to restart soon */
|
||||
if (rdbSave(SLAVE_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_KEEP_CACHE) != C_OK) {
|
||||
if (rdbSave(REPLICA_REQ_NONE, server.rdb_filename, rsiptr, RDBFLAGS_KEEP_CACHE) != C_OK) {
|
||||
/* Ooops.. error saving! The best we can do is to continue
|
||||
* operating. Note that if there was a background saving process,
|
||||
* in the next cron() the server will be notified that the background
|
||||
* saving aborted, handling special stuff like slaves pending for
|
||||
* saving aborted, handling special stuff like replicas pending for
|
||||
* synchronization... */
|
||||
if (force) {
|
||||
serverLog(LL_WARNING, "Error trying to save the DB. Exit anyway.");
|
||||
@ -4334,9 +4335,9 @@ int finishShutdown(void) {
|
||||
unlink(server.pidfile);
|
||||
}
|
||||
|
||||
/* Best effort flush of slave output buffers, so that we hopefully
|
||||
/* Best effort flush of replica output buffers, so that we hopefully
|
||||
* send them pending writes. */
|
||||
flushSlavesOutputBuffers();
|
||||
flushReplicasOutputBuffers();
|
||||
|
||||
/* Close the listening sockets. Apparently this allows faster restarts. */
|
||||
closeListeningSockets(1);
|
||||
@ -5156,10 +5157,10 @@ sds fillPercentileDistributionLatencies(sds info, const char *histogram_name, st
|
||||
|
||||
const char *replstateToString(int replstate) {
|
||||
switch (replstate) {
|
||||
case SLAVE_STATE_WAIT_BGSAVE_START:
|
||||
case SLAVE_STATE_WAIT_BGSAVE_END: return "wait_bgsave";
|
||||
case SLAVE_STATE_SEND_BULK: return "send_bulk";
|
||||
case SLAVE_STATE_ONLINE: return "online";
|
||||
case REPLICA_STATE_WAIT_BGSAVE_START:
|
||||
case REPLICA_STATE_WAIT_BGSAVE_END: return "wait_bgsave";
|
||||
case REPLICA_STATE_SEND_BULK: return "send_bulk";
|
||||
case REPLICA_STATE_ONLINE: return "online";
|
||||
default: return "";
|
||||
}
|
||||
}
|
||||
@ -5407,7 +5408,7 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
|
||||
if (sections++) info = sdscat(info, "\r\n");
|
||||
/* clang-format off */
|
||||
info = sdscatprintf(info, "# Clients\r\n" FMTARGS(
|
||||
"connected_clients:%lu\r\n", listLength(server.clients) - listLength(server.slaves),
|
||||
"connected_clients:%lu\r\n", listLength(server.clients) - listLength(server.replicas),
|
||||
"cluster_connections:%lu\r\n", getClusterConnectionsCount(),
|
||||
"maxclients:%u\r\n", server.maxclients,
|
||||
"client_recent_max_input_buffer:%zu\r\n", maxin,
|
||||
@ -5506,7 +5507,7 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
|
||||
"mem_not_counted_for_evict:%zu\r\n", freeMemoryGetNotCountedMemory(),
|
||||
"mem_replication_backlog:%zu\r\n", mh->repl_backlog,
|
||||
"mem_total_replication_buffers:%zu\r\n", server.repl_buffer_mem,
|
||||
"mem_clients_slaves:%zu\r\n", mh->clients_slaves,
|
||||
"mem_clients_slaves:%zu\r\n", mh->clients_replicas,
|
||||
"mem_clients_normal:%zu\r\n", mh->clients_normal,
|
||||
"mem_cluster_links:%zu\r\n", mh->cluster_links,
|
||||
"mem_aof_buffer:%zu\r\n", mh->aof_buffer,
|
||||
@ -5672,7 +5673,7 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
|
||||
"latest_fork_usec:%lld\r\n", server.stat_fork_time,
|
||||
"total_forks:%lld\r\n", server.stat_total_forks,
|
||||
"migrate_cached_sockets:%ld\r\n", dictSize(server.migrate_cached_sockets),
|
||||
"slave_expires_tracked_keys:%zu\r\n", getSlaveKeyWithExpireCount(),
|
||||
"slave_expires_tracked_keys:%zu\r\n", getReplicaKeyWithExpireCount(),
|
||||
"active_defrag_hits:%lld\r\n", server.stat_active_defrag_hits,
|
||||
"active_defrag_misses:%lld\r\n", server.stat_active_defrag_misses,
|
||||
"active_defrag_key_hits:%lld\r\n", server.stat_active_defrag_key_hits,
|
||||
@ -5708,28 +5709,28 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
|
||||
info = sdscatprintf(info,
|
||||
"# Replication\r\n"
|
||||
"role:%s\r\n",
|
||||
server.masterhost == NULL ? "master" : "slave");
|
||||
if (server.masterhost) {
|
||||
long long slave_repl_offset = 1;
|
||||
long long slave_read_repl_offset = 1;
|
||||
server.primary_host == NULL ? "master" : "slave");
|
||||
if (server.primary_host) {
|
||||
long long replica_repl_offset = 1;
|
||||
long long replica_read_repl_offset = 1;
|
||||
|
||||
if (server.master) {
|
||||
slave_repl_offset = server.master->reploff;
|
||||
slave_read_repl_offset = server.master->read_reploff;
|
||||
} else if (server.cached_master) {
|
||||
slave_repl_offset = server.cached_master->reploff;
|
||||
slave_read_repl_offset = server.cached_master->read_reploff;
|
||||
if (server.primary) {
|
||||
replica_repl_offset = server.primary->reploff;
|
||||
replica_read_repl_offset = server.primary->read_reploff;
|
||||
} else if (server.cached_primary) {
|
||||
replica_repl_offset = server.cached_primary->reploff;
|
||||
replica_read_repl_offset = server.cached_primary->read_reploff;
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
info = sdscatprintf(info, FMTARGS(
|
||||
"master_host:%s\r\n", server.masterhost,
|
||||
"master_port:%d\r\n", server.masterport,
|
||||
"master_host:%s\r\n", server.primary_host,
|
||||
"master_port:%d\r\n", server.primary_port,
|
||||
"master_link_status:%s\r\n", (server.repl_state == REPL_STATE_CONNECTED) ? "up" : "down",
|
||||
"master_last_io_seconds_ago:%d\r\n", server.master ? ((int)(server.unixtime-server.master->lastinteraction)) : -1,
|
||||
"master_last_io_seconds_ago:%d\r\n", server.primary ? ((int)(server.unixtime-server.primary->last_interaction)) : -1,
|
||||
"master_sync_in_progress:%d\r\n", server.repl_state == REPL_STATE_TRANSFER,
|
||||
"slave_read_repl_offset:%lld\r\n", slave_read_repl_offset,
|
||||
"slave_repl_offset:%lld\r\n", slave_repl_offset));
|
||||
"slave_read_repl_offset:%lld\r\n", replica_read_repl_offset,
|
||||
"slave_repl_offset:%lld\r\n", replica_repl_offset));
|
||||
/* clang-format on */
|
||||
|
||||
if (server.repl_state == REPL_STATE_TRANSFER) {
|
||||
@ -5753,45 +5754,46 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
|
||||
}
|
||||
/* clang-format off */
|
||||
info = sdscatprintf(info, FMTARGS(
|
||||
"slave_priority:%d\r\n", server.slave_priority,
|
||||
"slave_read_only:%d\r\n", server.repl_slave_ro,
|
||||
"slave_priority:%d\r\n", server.replica_priority,
|
||||
"slave_read_only:%d\r\n", server.repl_replica_ro,
|
||||
"replica_announced:%d\r\n", server.replica_announced));
|
||||
/* clang-format on */
|
||||
}
|
||||
|
||||
info = sdscatprintf(info, "connected_slaves:%lu\r\n", listLength(server.slaves));
|
||||
info = sdscatprintf(info, "connected_slaves:%lu\r\n", listLength(server.replicas));
|
||||
|
||||
/* If min-slaves-to-write is active, write the number of slaves
|
||||
/* If min-replicas-to-write is active, write the number of replicas
|
||||
* currently considered 'good'. */
|
||||
if (server.repl_min_slaves_to_write && server.repl_min_slaves_max_lag) {
|
||||
info = sdscatprintf(info, "min_slaves_good_slaves:%d\r\n", server.repl_good_slaves_count);
|
||||
if (server.repl_min_replicas_to_write && server.repl_min_replicas_max_lag) {
|
||||
info = sdscatprintf(info, "min_slaves_good_slaves:%d\r\n", server.repl_good_replicas_count);
|
||||
}
|
||||
|
||||
if (listLength(server.slaves)) {
|
||||
int slaveid = 0;
|
||||
if (listLength(server.replicas)) {
|
||||
int replica_id = 0;
|
||||
listNode *ln;
|
||||
listIter li;
|
||||
|
||||
listRewind(server.slaves, &li);
|
||||
listRewind(server.replicas, &li);
|
||||
while ((ln = listNext(&li))) {
|
||||
client *slave = listNodeValue(ln);
|
||||
char ip[NET_IP_STR_LEN], *slaveip = slave->slave_addr;
|
||||
client *replica = listNodeValue(ln);
|
||||
char ip[NET_IP_STR_LEN], *replica_ip = replica->replica_addr;
|
||||
int port;
|
||||
long lag = 0;
|
||||
|
||||
if (!slaveip) {
|
||||
if (connAddrPeerName(slave->conn, ip, sizeof(ip), &port) == -1) continue;
|
||||
slaveip = ip;
|
||||
if (!replica_ip) {
|
||||
if (connAddrPeerName(replica->conn, ip, sizeof(ip), &port) == -1) continue;
|
||||
replica_ip = ip;
|
||||
}
|
||||
const char *state = replstateToString(slave->replstate);
|
||||
const char *state = replstateToString(replica->repl_state);
|
||||
if (state[0] == '\0') continue;
|
||||
if (slave->replstate == SLAVE_STATE_ONLINE) lag = time(NULL) - slave->repl_ack_time;
|
||||
if (replica->repl_state == REPLICA_STATE_ONLINE) lag = time(NULL) - replica->repl_ack_time;
|
||||
|
||||
info = sdscatprintf(info,
|
||||
"slave%d:ip=%s,port=%d,state=%s,"
|
||||
"offset=%lld,lag=%ld\r\n",
|
||||
slaveid, slaveip, slave->slave_listening_port, state, slave->repl_ack_off, lag);
|
||||
slaveid++;
|
||||
replica_id, replica_ip, replica->replica_listening_port, state,
|
||||
replica->repl_ack_off, lag);
|
||||
replica_id++;
|
||||
}
|
||||
}
|
||||
/* clang-format off */
|
||||
@ -5799,7 +5801,7 @@ sds genValkeyInfoString(dict *section_dict, int all_sections, int everything) {
|
||||
"master_failover_state:%s\r\n", getFailoverStateString(),
|
||||
"master_replid:%s\r\n", server.replid,
|
||||
"master_replid2:%s\r\n", server.replid2,
|
||||
"master_repl_offset:%lld\r\n", server.master_repl_offset,
|
||||
"master_repl_offset:%lld\r\n", server.primary_repl_offset,
|
||||
"second_repl_offset:%lld\r\n", server.second_replid_offset,
|
||||
"repl_backlog_active:%d\r\n", server.repl_backlog != NULL,
|
||||
"repl_backlog_size:%lld\r\n", server.repl_backlog_size,
|
||||
@ -5953,10 +5955,10 @@ void monitorCommand(client *c) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* ignore MONITOR if already slave or in monitor mode */
|
||||
if (c->flags & CLIENT_SLAVE) return;
|
||||
/* ignore MONITOR if already replica or in monitor mode */
|
||||
if (c->flags & CLIENT_REPLICA) return;
|
||||
|
||||
c->flags |= (CLIENT_SLAVE | CLIENT_MONITOR);
|
||||
c->flags |= (CLIENT_REPLICA | CLIENT_MONITOR);
|
||||
listAddNodeTail(server.monitors, c);
|
||||
addReply(c, shared.ok);
|
||||
}
|
||||
@ -6391,8 +6393,8 @@ void loadDataFromDisk(void) {
|
||||
int rsi_is_valid = 0;
|
||||
errno = 0; /* Prevent a stale value from affecting error checking */
|
||||
int rdb_flags = RDBFLAGS_NONE;
|
||||
if (iAmMaster()) {
|
||||
/* Master may delete expired keys when loading, we should
|
||||
if (iAmPrimary()) {
|
||||
/* Primary may delete expired keys when loading, we should
|
||||
* propagate expire to replication backlog. */
|
||||
createReplicationBacklog();
|
||||
rdb_flags |= RDBFLAGS_FEED_REPL;
|
||||
@ -6408,26 +6410,26 @@ void loadDataFromDisk(void) {
|
||||
* information in function rdbPopulateSaveInfo. */
|
||||
rsi.repl_stream_db != -1) {
|
||||
rsi_is_valid = 1;
|
||||
if (!iAmMaster()) {
|
||||
if (!iAmPrimary()) {
|
||||
memcpy(server.replid, rsi.repl_id, sizeof(server.replid));
|
||||
server.master_repl_offset = rsi.repl_offset;
|
||||
/* If this is a replica, create a cached master from this
|
||||
server.primary_repl_offset = rsi.repl_offset;
|
||||
/* If this is a replica, create a cached primary from this
|
||||
* information, in order to allow partial resynchronizations
|
||||
* with masters. */
|
||||
replicationCacheMasterUsingMyself();
|
||||
selectDb(server.cached_master, rsi.repl_stream_db);
|
||||
* with primaries. */
|
||||
replicationCachePrimaryUsingMyself();
|
||||
selectDb(server.cached_primary, rsi.repl_stream_db);
|
||||
} else {
|
||||
/* If this is a master, we can save the replication info
|
||||
/* If this is a primary, we can save the replication info
|
||||
* as secondary ID and offset, in order to allow replicas
|
||||
* to partial resynchronizations with masters. */
|
||||
* to partial resynchronizations with primaries. */
|
||||
memcpy(server.replid2, rsi.repl_id, sizeof(server.replid));
|
||||
server.second_replid_offset = rsi.repl_offset + 1;
|
||||
/* Rebase master_repl_offset from rsi.repl_offset. */
|
||||
server.master_repl_offset += rsi.repl_offset;
|
||||
/* Rebase primary_repl_offset from rsi.repl_offset. */
|
||||
server.primary_repl_offset += rsi.repl_offset;
|
||||
serverAssert(server.repl_backlog);
|
||||
server.repl_backlog->offset = server.master_repl_offset - server.repl_backlog->histlen + 1;
|
||||
server.repl_backlog->offset = server.primary_repl_offset - server.repl_backlog->histlen + 1;
|
||||
rebaseReplicationBuffer(rsi.repl_offset);
|
||||
server.repl_no_slaves_since = time(NULL);
|
||||
server.repl_no_replicas_since = time(NULL);
|
||||
}
|
||||
}
|
||||
} else if (rdb_load_ret != RDB_NOT_EXIST) {
|
||||
@ -6435,7 +6437,7 @@ void loadDataFromDisk(void) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* We always create replication backlog if server is a master, we need
|
||||
/* We always create replication backlog if server is a primary, we need
|
||||
* it because we put DELs in it when loading expired keys in RDB, but
|
||||
* if RDB doesn't have replication info or there is no rdb, it is not
|
||||
* possible to support partial resynchronization, to avoid extra memory
|
||||
@ -6590,9 +6592,9 @@ int serverIsSupervised(int mode) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
int iAmMaster(void) {
|
||||
return ((!server.cluster_enabled && server.masterhost == NULL) ||
|
||||
(server.cluster_enabled && clusterNodeIsMaster(getMyClusterNode())));
|
||||
int iAmPrimary(void) {
|
||||
return ((!server.cluster_enabled && server.primary_host == NULL) ||
|
||||
(server.cluster_enabled && clusterNodeIsPrimary(getMyClusterNode())));
|
||||
}
|
||||
|
||||
#ifdef SERVER_TEST
|
||||
@ -6719,7 +6721,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
/* We need to init sentinel right now as parsing the configuration file
|
||||
* in sentinel mode will have the effect of populating the sentinel
|
||||
* data structures with master nodes to monitor. */
|
||||
* data structures with primary nodes to monitor. */
|
||||
if (server.sentinel_mode) {
|
||||
initSentinelConfig();
|
||||
initSentinel();
|
||||
@ -6939,7 +6941,7 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
if (server.supervised_mode == SUPERVISED_SYSTEMD) {
|
||||
if (!server.masterhost) {
|
||||
if (!server.primary_host) {
|
||||
serverCommunicateSystemd("STATUS=Ready to accept connections\n");
|
||||
} else {
|
||||
serverCommunicateSystemd(
|
||||
|
316
src/server.h
316
src/server.h
@ -192,7 +192,7 @@ struct hdr_histogram;
|
||||
#define CONFIG_FDSET_INCR (CONFIG_MIN_RESERVED_FDS + 96)
|
||||
|
||||
/* OOM Score Adjustment classes. */
|
||||
#define CONFIG_OOM_MASTER 0
|
||||
#define CONFIG_OOM_PRIMARY 0
|
||||
#define CONFIG_OOM_REPLICA 1
|
||||
#define CONFIG_OOM_BGCHILD 2
|
||||
#define CONFIG_OOM_COUNT 3
|
||||
@ -344,29 +344,29 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
|
||||
#define CMD_DOC_SYSCMD (1 << 1) /* System (internal) command */
|
||||
|
||||
/* Client flags */
|
||||
#define CLIENT_SLAVE (1 << 0) /* This client is a replica */
|
||||
#define CLIENT_MASTER (1 << 1) /* This client is a master */
|
||||
#define CLIENT_MONITOR (1 << 2) /* This client is a slave monitor, see MONITOR */
|
||||
#define CLIENT_REPLICA (1 << 0) /* This client is a replica */
|
||||
#define CLIENT_PRIMARY (1 << 1) /* This client is a primary */
|
||||
#define CLIENT_MONITOR (1 << 2) /* This client is a replica monitor, see MONITOR */
|
||||
#define CLIENT_MULTI (1 << 3) /* This client is in a MULTI context */
|
||||
#define CLIENT_BLOCKED (1 << 4) /* The client is waiting in a blocking operation */
|
||||
#define CLIENT_DIRTY_CAS (1 << 5) /* Watched keys modified. EXEC will fail. */
|
||||
#define CLIENT_CLOSE_AFTER_REPLY (1 << 6) /* Close after writing entire reply. */
|
||||
#define CLIENT_UNBLOCKED \
|
||||
(1 << 7) /* This client was unblocked and is stored in \
|
||||
server.unblocked_clients */
|
||||
#define CLIENT_SCRIPT (1 << 8) /* This is a non connected client used by Lua */
|
||||
#define CLIENT_ASKING (1 << 9) /* Client issued the ASKING command */
|
||||
#define CLIENT_CLOSE_ASAP (1 << 10) /* Close this client ASAP */
|
||||
#define CLIENT_UNIX_SOCKET (1 << 11) /* Client connected via Unix domain socket */
|
||||
#define CLIENT_DIRTY_EXEC (1 << 12) /* EXEC will fail for errors while queueing */
|
||||
#define CLIENT_MASTER_FORCE_REPLY (1 << 13) /* Queue replies even if is master */
|
||||
#define CLIENT_FORCE_AOF (1 << 14) /* Force AOF propagation of current cmd. */
|
||||
#define CLIENT_FORCE_REPL (1 << 15) /* Force replication of current cmd. */
|
||||
#define CLIENT_PRE_PSYNC (1 << 16) /* Instance don't understand PSYNC. */
|
||||
#define CLIENT_READONLY (1 << 17) /* Cluster client is in read-only state. */
|
||||
#define CLIENT_PUBSUB (1 << 18) /* Client is in Pub/Sub mode. */
|
||||
#define CLIENT_PREVENT_AOF_PROP (1 << 19) /* Don't propagate to AOF. */
|
||||
#define CLIENT_PREVENT_REPL_PROP (1 << 20) /* Don't propagate to slaves. */
|
||||
(1 << 7) /* This client was unblocked and is stored in \
|
||||
server.unblocked_clients */
|
||||
#define CLIENT_SCRIPT (1 << 8) /* This is a non connected client used by Lua */
|
||||
#define CLIENT_ASKING (1 << 9) /* Client issued the ASKING command */
|
||||
#define CLIENT_CLOSE_ASAP (1 << 10) /* Close this client ASAP */
|
||||
#define CLIENT_UNIX_SOCKET (1 << 11) /* Client connected via Unix domain socket */
|
||||
#define CLIENT_DIRTY_EXEC (1 << 12) /* EXEC will fail for errors while queueing */
|
||||
#define CLIENT_PRIMARY_FORCE_REPLY (1 << 13) /* Queue replies even if is primary */
|
||||
#define CLIENT_FORCE_AOF (1 << 14) /* Force AOF propagation of current cmd. */
|
||||
#define CLIENT_FORCE_REPL (1 << 15) /* Force replication of current cmd. */
|
||||
#define CLIENT_PRE_PSYNC (1 << 16) /* Instance don't understand PSYNC. */
|
||||
#define CLIENT_READONLY (1 << 17) /* Cluster client is in read-only state. */
|
||||
#define CLIENT_PUBSUB (1 << 18) /* Client is in Pub/Sub mode. */
|
||||
#define CLIENT_PREVENT_AOF_PROP (1 << 19) /* Don't propagate to AOF. */
|
||||
#define CLIENT_PREVENT_REPL_PROP (1 << 20) /* Don't propagate to replicas. */
|
||||
#define CLIENT_PREVENT_PROP (CLIENT_PREVENT_AOF_PROP | CLIENT_PREVENT_REPL_PROP)
|
||||
#define CLIENT_PENDING_WRITE \
|
||||
(1 << 21) /* Client has output to send but a write \
|
||||
@ -449,25 +449,25 @@ typedef enum blocking_type {
|
||||
|
||||
/* Client classes for client limits, currently used only for
|
||||
* the max-client-output-buffer limit implementation. */
|
||||
#define CLIENT_TYPE_NORMAL 0 /* Normal req-reply clients + MONITORs */
|
||||
#define CLIENT_TYPE_SLAVE 1 /* Slaves. */
|
||||
#define CLIENT_TYPE_PUBSUB 2 /* Clients subscribed to PubSub channels. */
|
||||
#define CLIENT_TYPE_MASTER 3 /* Master. */
|
||||
#define CLIENT_TYPE_COUNT 4 /* Total number of client types. */
|
||||
#define CLIENT_TYPE_NORMAL 0 /* Normal req-reply clients + MONITORs */
|
||||
#define CLIENT_TYPE_REPLICA 1 /* Replicas. */
|
||||
#define CLIENT_TYPE_PUBSUB 2 /* Clients subscribed to PubSub channels. */
|
||||
#define CLIENT_TYPE_PRIMARY 3 /* Primary. */
|
||||
#define CLIENT_TYPE_COUNT 4 /* Total number of client types. */
|
||||
#define CLIENT_TYPE_OBUF_COUNT \
|
||||
3 /* Number of clients to expose to output \
|
||||
buffer configuration. Just the first \
|
||||
three: normal, slave, pubsub. */
|
||||
three: normal, replica, pubsub. */
|
||||
|
||||
/* Slave replication state. Used in server.repl_state for slaves to remember
|
||||
/* Replica replication state. Used in server.repl_state for replicas to remember
|
||||
* what to do next. */
|
||||
typedef enum {
|
||||
REPL_STATE_NONE = 0, /* No active replication */
|
||||
REPL_STATE_CONNECT, /* Must connect to master */
|
||||
REPL_STATE_CONNECTING, /* Connecting to master */
|
||||
REPL_STATE_CONNECT, /* Must connect to primary */
|
||||
REPL_STATE_CONNECTING, /* Connecting to primary */
|
||||
/* --- Handshake states, must be ordered --- */
|
||||
REPL_STATE_RECEIVE_PING_REPLY, /* Wait for PING reply */
|
||||
REPL_STATE_SEND_HANDSHAKE, /* Send handshake sequence to master */
|
||||
REPL_STATE_SEND_HANDSHAKE, /* Send handshake sequence to primary */
|
||||
REPL_STATE_RECEIVE_AUTH_REPLY, /* Wait for AUTH reply */
|
||||
REPL_STATE_RECEIVE_PORT_REPLY, /* Wait for REPLCONF reply */
|
||||
REPL_STATE_RECEIVE_IP_REPLY, /* Wait for REPLCONF reply */
|
||||
@ -476,8 +476,8 @@ typedef enum {
|
||||
REPL_STATE_SEND_PSYNC, /* Send PSYNC */
|
||||
REPL_STATE_RECEIVE_PSYNC_REPLY, /* Wait for PSYNC reply */
|
||||
/* --- End of handshake states --- */
|
||||
REPL_STATE_TRANSFER, /* Receiving .rdb from master */
|
||||
REPL_STATE_CONNECTED, /* Connected to master */
|
||||
REPL_STATE_TRANSFER, /* Receiving .rdb from primary */
|
||||
REPL_STATE_CONNECTED, /* Connected to primary */
|
||||
} repl_state;
|
||||
|
||||
/* The state of an in progress coordinated failover */
|
||||
@ -488,31 +488,31 @@ typedef enum {
|
||||
* PSYNC FAILOVER request. */
|
||||
} failover_state;
|
||||
|
||||
/* State of slaves from the POV of the master. Used in client->replstate.
|
||||
* In SEND_BULK and ONLINE state the slave receives new updates
|
||||
/* State of replicas from the POV of the primary. Used in client->replstate.
|
||||
* In SEND_BULK and ONLINE state the replica receives new updates
|
||||
* in its output queue. In the WAIT_BGSAVE states instead the server is waiting
|
||||
* to start the next background saving in order to send updates to it. */
|
||||
#define SLAVE_STATE_WAIT_BGSAVE_START 6 /* We need to produce a new RDB file. */
|
||||
#define SLAVE_STATE_WAIT_BGSAVE_END 7 /* Waiting RDB file creation to finish. */
|
||||
#define SLAVE_STATE_SEND_BULK 8 /* Sending RDB file to slave. */
|
||||
#define SLAVE_STATE_ONLINE 9 /* RDB file transmitted, sending just updates. */
|
||||
#define SLAVE_STATE_RDB_TRANSMITTED \
|
||||
#define REPLICA_STATE_WAIT_BGSAVE_START 6 /* We need to produce a new RDB file. */
|
||||
#define REPLICA_STATE_WAIT_BGSAVE_END 7 /* Waiting RDB file creation to finish. */
|
||||
#define REPLICA_STATE_SEND_BULK 8 /* Sending RDB file to replica. */
|
||||
#define REPLICA_STATE_ONLINE 9 /* RDB file transmitted, sending just updates. */
|
||||
#define REPLICA_STATE_RDB_TRANSMITTED \
|
||||
10 /* RDB file transmitted - This state is used only for \
|
||||
* a replica that only wants RDB without replication buffer */
|
||||
|
||||
/* Slave capabilities. */
|
||||
#define SLAVE_CAPA_NONE 0
|
||||
#define SLAVE_CAPA_EOF (1 << 0) /* Can parse the RDB EOF streaming format. */
|
||||
#define SLAVE_CAPA_PSYNC2 (1 << 1) /* Supports PSYNC2 protocol. */
|
||||
/* Replica capabilities. */
|
||||
#define REPLICA_CAPA_NONE 0
|
||||
#define REPLICA_CAPA_EOF (1 << 0) /* Can parse the RDB EOF streaming format. */
|
||||
#define REPLICA_CAPA_PSYNC2 (1 << 1) /* Supports PSYNC2 protocol. */
|
||||
|
||||
/* Slave requirements */
|
||||
#define SLAVE_REQ_NONE 0
|
||||
#define SLAVE_REQ_RDB_EXCLUDE_DATA (1 << 0) /* Exclude data from RDB */
|
||||
#define SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS (1 << 1) /* Exclude functions from RDB */
|
||||
/* Mask of all bits in the slave requirements bitfield that represent non-standard (filtered) RDB requirements */
|
||||
#define SLAVE_REQ_RDB_MASK (SLAVE_REQ_RDB_EXCLUDE_DATA | SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS)
|
||||
/* Replica requirements */
|
||||
#define REPLICA_REQ_NONE 0
|
||||
#define REPLICA_REQ_RDB_EXCLUDE_DATA (1 << 0) /* Exclude data from RDB */
|
||||
#define REPLICA_REQ_RDB_EXCLUDE_FUNCTIONS (1 << 1) /* Exclude functions from RDB */
|
||||
/* Mask of all bits in the replica requirements bitfield that represent non-standard (filtered) RDB requirements */
|
||||
#define REPLICA_REQ_RDB_MASK (REPLICA_REQ_RDB_EXCLUDE_DATA | REPLICA_REQ_RDB_EXCLUDE_FUNCTIONS)
|
||||
|
||||
/* Synchronous read timeout - slave side */
|
||||
/* Synchronous read timeout - replica side */
|
||||
#define CONFIG_REPL_SYNCIO_TIMEOUT 5
|
||||
|
||||
/* The default number of replication backlog blocks to trim per call. */
|
||||
@ -667,7 +667,7 @@ typedef enum {
|
||||
/* RDB active child save type. */
|
||||
#define RDB_CHILD_TYPE_NONE 0
|
||||
#define RDB_CHILD_TYPE_DISK 1 /* RDB is written to disk. */
|
||||
#define RDB_CHILD_TYPE_SOCKET 2 /* RDB is written to slave socket. */
|
||||
#define RDB_CHILD_TYPE_SOCKET 2 /* RDB is written to replica socket. */
|
||||
|
||||
/* Keyspace changes notification classes. Every class is associated with a
|
||||
* character for configuration purposes. */
|
||||
@ -1162,7 +1162,7 @@ typedef struct replBacklog {
|
||||
* buffer for quickly searching replication
|
||||
* offset on partial resynchronization. */
|
||||
long long histlen; /* Backlog actual data length */
|
||||
long long offset; /* Replication "master offset" of first
|
||||
long long offset; /* Replication "primary offset" of first
|
||||
* byte in the replication backlog buffer.*/
|
||||
} replBacklog;
|
||||
|
||||
@ -1230,35 +1230,35 @@ typedef struct client {
|
||||
size_t sentlen; /* Amount of bytes already sent in the current
|
||||
buffer or object being sent. */
|
||||
time_t ctime; /* Client creation time. */
|
||||
long duration; /* Current command duration. Used for measuring latency of blocking/non-blocking cmds */
|
||||
int slot; /* The slot the client is executing against. Set to -1 if no slot is being used */
|
||||
dictEntry *cur_script; /* Cached pointer to the dictEntry of the script being executed. */
|
||||
time_t lastinteraction; /* Time of the last interaction, used for timeout */
|
||||
long duration; /* Current command duration. Used for measuring latency of blocking/non-blocking cmds */
|
||||
int slot; /* The slot the client is executing against. Set to -1 if no slot is being used */
|
||||
dictEntry *cur_script; /* Cached pointer to the dictEntry of the script being executed. */
|
||||
time_t last_interaction; /* Time of the last interaction, used for timeout */
|
||||
time_t obuf_soft_limit_reached_time;
|
||||
int authenticated; /* Needed when the default user requires auth. */
|
||||
int replstate; /* Replication state if this is a slave. */
|
||||
int repl_start_cmd_stream_on_ack; /* Install slave write handler on first ACK. */
|
||||
int repl_state; /* Replication state if this is a replica. */
|
||||
int repl_start_cmd_stream_on_ack; /* Install replica write handler on first ACK. */
|
||||
int repldbfd; /* Replication DB file descriptor. */
|
||||
off_t repldboff; /* Replication DB file offset. */
|
||||
off_t repldbsize; /* Replication DB file size. */
|
||||
sds replpreamble; /* Replication DB preamble. */
|
||||
long long read_reploff; /* Read replication offset if this is a master. */
|
||||
long long reploff; /* Applied replication offset if this is a master. */
|
||||
long long read_reploff; /* Read replication offset if this is a primary. */
|
||||
long long reploff; /* Applied replication offset if this is a primary. */
|
||||
long long repl_applied; /* Applied replication data count in querybuf, if this is a replica. */
|
||||
long long repl_ack_off; /* Replication ack offset, if this is a slave. */
|
||||
long long repl_aof_off; /* Replication AOF fsync ack offset, if this is a slave. */
|
||||
long long repl_ack_time; /* Replication ack time, if this is a slave. */
|
||||
long long repl_ack_off; /* Replication ack offset, if this is a replica. */
|
||||
long long repl_aof_off; /* Replication AOF fsync ack offset, if this is a replica. */
|
||||
long long repl_ack_time; /* Replication ack time, if this is a replica. */
|
||||
long long repl_last_partial_write; /* The last time the server did a partial write from the RDB child pipe to this
|
||||
replica */
|
||||
long long psync_initial_offset; /* FULLRESYNC reply offset other slaves
|
||||
copying this slave output buffer
|
||||
long long psync_initial_offset; /* FULLRESYNC reply offset other replicas
|
||||
copying this replica output buffer
|
||||
should use. */
|
||||
char replid[CONFIG_RUN_ID_SIZE + 1]; /* Master replication ID (if master). */
|
||||
int slave_listening_port; /* As configured with: REPLCONF listening-port */
|
||||
char *slave_addr; /* Optionally given by REPLCONF ip-address */
|
||||
char replid[CONFIG_RUN_ID_SIZE + 1]; /* primary replication ID (if primary). */
|
||||
int replica_listening_port; /* As configured with: REPLCONF listening-port */
|
||||
char *replica_addr; /* Optionally given by REPLCONF ip-address */
|
||||
int replica_version; /* Version on the form 0xMMmmpp. */
|
||||
short slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */
|
||||
short slave_req; /* Slave requirements: SLAVE_REQ_* */
|
||||
short replica_capa; /* Replica capabilities: REPLICA_CAPA_* bitwise OR. */
|
||||
short replica_req; /* Replica requirements: REPLICA_REQ_* */
|
||||
multiState mstate; /* MULTI/EXEC state */
|
||||
blockingState bstate; /* blocking state */
|
||||
long long woff; /* Last write global replication offset. */
|
||||
@ -1361,7 +1361,7 @@ struct sentinelConfig {
|
||||
struct sharedObjectsStruct {
|
||||
robj *ok, *err, *emptybulk, *czero, *cone, *pong, *space, *queued, *null[4], *nullarray[4], *emptymap[4],
|
||||
*emptyset[4], *emptyarray, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr, *outofrangeerr, *noscripterr,
|
||||
*loadingerr, *slowevalerr, *slowscripterr, *slowmoduleerr, *bgsaveerr, *masterdownerr, *roslaveerr,
|
||||
*loadingerr, *slowevalerr, *slowscripterr, *slowmoduleerr, *bgsaveerr, *primarydownerr, *roreplicaerr,
|
||||
*execaborterr, *noautherr, *noreplicaserr, *busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk,
|
||||
*subscribebulk, *unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink, *rpop, *lpop, *lpush,
|
||||
*rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax, *emptyscan, *multi, *exec, *left, *right, *hset, *srem,
|
||||
@ -1436,7 +1436,7 @@ struct serverMemOverhead {
|
||||
size_t total_allocated;
|
||||
size_t startup_allocated;
|
||||
size_t repl_backlog;
|
||||
size_t clients_slaves;
|
||||
size_t clients_replicas;
|
||||
size_t clients_normal;
|
||||
size_t cluster_links;
|
||||
size_t aof_buffer;
|
||||
@ -1481,12 +1481,12 @@ typedef enum {
|
||||
* metadata to the RDB file.
|
||||
*
|
||||
* For example, to use select a DB at load time, useful in
|
||||
* replication in order to make sure that chained slaves (slaves of slaves)
|
||||
* replication in order to make sure that chained replicas (replicas of replicas)
|
||||
* select the correct DB and are able to accept the stream coming from the
|
||||
* top-level master. */
|
||||
* top-level primary. */
|
||||
typedef struct rdbSaveInfo {
|
||||
/* Used saving and loading. */
|
||||
int repl_stream_db; /* DB to select in server.master client. */
|
||||
int repl_stream_db; /* DB to select in server.primary client. */
|
||||
|
||||
/* Used only loading. */
|
||||
int repl_id_is_set; /* True if repl_id field is set. */
|
||||
@ -1647,7 +1647,7 @@ struct valkeyServer {
|
||||
list *clients_to_close; /* Clients to close asynchronously */
|
||||
list *clients_pending_write; /* There is to write or install handler. */
|
||||
list *clients_pending_read; /* Client has pending read socket buffers. */
|
||||
list *slaves, *monitors; /* List of slaves and MONITORs */
|
||||
list *replicas, *monitors; /* List of replicas and MONITORs */
|
||||
client *current_client; /* The client that triggered the command execution (External or AOF). */
|
||||
client *executing_client; /* The client executing the current command (possibly script or module). */
|
||||
|
||||
@ -1719,7 +1719,7 @@ struct valkeyServer {
|
||||
double stat_fork_rate; /* Fork rate in GB/sec. */
|
||||
long long stat_total_forks; /* Total count of fork. */
|
||||
long long stat_rejected_conn; /* Clients rejected because of maxclients */
|
||||
long long stat_sync_full; /* Number of full resyncs with slaves. */
|
||||
long long stat_sync_full; /* Number of full resyncs with replicas. */
|
||||
long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */
|
||||
long long stat_sync_partial_err; /* Number of unaccepted PSYNC requests. */
|
||||
list *slowlog; /* SLOWLOG list of commands */
|
||||
@ -1745,7 +1745,7 @@ struct valkeyServer {
|
||||
size_t stat_clients_type_memory[CLIENT_TYPE_COUNT]; /* Mem usage by type */
|
||||
size_t stat_cluster_links_memory; /* Mem usage by cluster links */
|
||||
long long
|
||||
stat_unexpected_error_replies; /* Number of unexpected (aof-loading, replica to master, etc.) error replies */
|
||||
stat_unexpected_error_replies; /* Number of unexpected (aof-loading, replica to primary, etc.) error replies */
|
||||
long long stat_total_error_replies; /* Total number of issued error replies ( command + rejected errors ) */
|
||||
long long stat_dump_payload_sanitizations; /* Number deep dump payloads integrity validations. */
|
||||
long long stat_io_reads_processed; /* Number of read events processed by IO / Main threads */
|
||||
@ -1906,29 +1906,29 @@ struct valkeyServer {
|
||||
int shutdown_on_sigint; /* Shutdown flags configured for SIGINT. */
|
||||
int shutdown_on_sigterm; /* Shutdown flags configured for SIGTERM. */
|
||||
|
||||
/* Replication (master) */
|
||||
/* Replication (primary) */
|
||||
char replid[CONFIG_RUN_ID_SIZE + 1]; /* My current replication ID. */
|
||||
char replid2[CONFIG_RUN_ID_SIZE + 1]; /* replid inherited from master*/
|
||||
long long master_repl_offset; /* My current replication offset */
|
||||
char replid2[CONFIG_RUN_ID_SIZE + 1]; /* replid inherited from primary*/
|
||||
long long primary_repl_offset; /* My current replication offset */
|
||||
long long second_replid_offset; /* Accept offsets up to this for replid2. */
|
||||
_Atomic long long fsynced_reploff_pending; /* Largest replication offset to
|
||||
* potentially have been fsynced, applied to
|
||||
fsynced_reploff only when AOF state is AOF_ON
|
||||
(not during the initial rewrite) */
|
||||
long long fsynced_reploff; /* Largest replication offset that has been confirmed to be fsynced */
|
||||
int slaveseldb; /* Last SELECTed DB in replication output */
|
||||
int repl_ping_slave_period; /* Master pings the slave every N seconds */
|
||||
int replicas_eldb; /* Last SELECTed DB in replication output */
|
||||
int repl_ping_replica_period; /* Primary pings the replica every N seconds */
|
||||
replBacklog *repl_backlog; /* Replication backlog for partial syncs */
|
||||
long long repl_backlog_size; /* Backlog circular buffer size */
|
||||
time_t repl_backlog_time_limit; /* Time without slaves after the backlog
|
||||
time_t repl_backlog_time_limit; /* Time without replicas after the backlog
|
||||
gets released. */
|
||||
time_t repl_no_slaves_since; /* We have no slaves since that time.
|
||||
Only valid if server.slaves len is 0. */
|
||||
int repl_min_slaves_to_write; /* Min number of slaves to write. */
|
||||
int repl_min_slaves_max_lag; /* Max lag of <count> slaves to write. */
|
||||
int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */
|
||||
int repl_diskless_sync; /* Master send RDB to slaves sockets directly. */
|
||||
int repl_diskless_load; /* Slave parse RDB directly from the socket.
|
||||
time_t repl_no_replicas_since; /* We have no replicas since that time.
|
||||
Only valid if server.replicas len is 0. */
|
||||
int repl_min_replicas_to_write; /* Min number of replicas to write. */
|
||||
int repl_min_replicas_max_lag; /* Max lag of <count> replicas to write. */
|
||||
int repl_good_replicas_count; /* Number of replicas with lag <= max_lag. */
|
||||
int repl_diskless_sync; /* Primary send RDB to replicas sockets directly. */
|
||||
int repl_diskless_load; /* Replica parse RDB directly from the socket.
|
||||
* see REPL_DISKLESS_LOAD_* enum */
|
||||
int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
|
||||
int repl_diskless_sync_max_replicas; /* Max replicas for diskless repl BGSAVE
|
||||
@ -1936,45 +1936,45 @@ struct valkeyServer {
|
||||
size_t repl_buffer_mem; /* The memory of replication buffer. */
|
||||
list *repl_buffer_blocks; /* Replication buffers blocks list
|
||||
* (serving replica clients and repl backlog) */
|
||||
/* Replication (slave) */
|
||||
char *masteruser; /* AUTH with this user and masterauth with master */
|
||||
sds masterauth; /* AUTH with this password with master */
|
||||
char *masterhost; /* Hostname of master */
|
||||
int masterport; /* Port of master */
|
||||
int repl_timeout; /* Timeout after N seconds of master idle */
|
||||
client *master; /* Client that is master for this slave */
|
||||
client *cached_master; /* Cached master to be reused for PSYNC. */
|
||||
/* Replication (replica) */
|
||||
char *primary_user; /* AUTH with this user and primary_auth with primary */
|
||||
sds primary_auth; /* AUTH with this password with primary */
|
||||
char *primary_host; /* Hostname of primary */
|
||||
int primary_port; /* Port of primary */
|
||||
int repl_timeout; /* Timeout after N seconds of primary idle */
|
||||
client *primary; /* Client that is primary for this replica */
|
||||
client *cached_primary; /* Cached primary to be reused for PSYNC. */
|
||||
int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
|
||||
int repl_state; /* Replication status if the instance is a slave */
|
||||
off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
|
||||
off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
|
||||
int repl_state; /* Replication status if the instance is a replica */
|
||||
off_t repl_transfer_size; /* Size of RDB to read from primary during sync. */
|
||||
off_t repl_transfer_read; /* Amount of RDB read from primary during sync. */
|
||||
off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
|
||||
connection *repl_transfer_s; /* Slave -> Master SYNC connection */
|
||||
int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
|
||||
char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
|
||||
connection *repl_transfer_s; /* Replica -> Primary SYNC connection */
|
||||
int repl_transfer_fd; /* Replica -> Primary SYNC temp file descriptor */
|
||||
char *repl_transfer_tmpfile; /* Replica-> Primary SYNC temp file name */
|
||||
time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
|
||||
int repl_serve_stale_data; /* Serve stale data when link is down? */
|
||||
int repl_slave_ro; /* Slave is read only? */
|
||||
int repl_slave_ignore_maxmemory; /* If true slaves do not evict. */
|
||||
time_t repl_down_since; /* Unix time at which link with master went down */
|
||||
int repl_replica_ro; /* Replica is read only? */
|
||||
int repl_replica_ignore_maxmemory; /* If true replicas do not evict. */
|
||||
time_t repl_down_since; /* Unix time at which link with primary went down */
|
||||
int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
|
||||
int slave_priority; /* Reported in INFO and used by Sentinel. */
|
||||
int replica_priority; /* Reported in INFO and used by Sentinel. */
|
||||
int replica_announced; /* If true, replica is announced by Sentinel */
|
||||
int slave_announce_port; /* Give the master this listening port. */
|
||||
char *slave_announce_ip; /* Give the master this ip address. */
|
||||
int replica_announce_port; /* Give the primary this listening port. */
|
||||
char *replica_announce_ip; /* Give the primary this ip address. */
|
||||
int propagation_error_behavior; /* Configures the behavior of the replica
|
||||
* when it receives an error on the replication stream */
|
||||
int repl_ignore_disk_write_error; /* Configures whether replicas panic when unable to
|
||||
* persist writes to AOF. */
|
||||
/* The following two fields is where we store master PSYNC replid/offset
|
||||
/* The following two fields is where we store primary PSYNC replid/offset
|
||||
* while the PSYNC is in progress. At the end we'll copy the fields into
|
||||
* the server->master client structure. */
|
||||
char master_replid[CONFIG_RUN_ID_SIZE + 1]; /* Master PSYNC runid. */
|
||||
long long master_initial_offset; /* Master PSYNC offset. */
|
||||
int repl_slave_lazy_flush; /* Lazy FLUSHALL before loading DB? */
|
||||
* the server->primary client structure. */
|
||||
char primary_replid[CONFIG_RUN_ID_SIZE + 1]; /* Primary PSYNC runid. */
|
||||
long long primary_initial_offset; /* Primary PSYNC offset. */
|
||||
int repl_replica_lazy_flush; /* Lazy FLUSHALL before loading DB? */
|
||||
/* Synchronous replication. */
|
||||
list *clients_waiting_acks; /* Clients waiting in WAIT or WAITAOF. */
|
||||
int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */
|
||||
int get_ack_from_replicas; /* If true we send REPLCONF GETACK. */
|
||||
/* Limits */
|
||||
unsigned int maxclients; /* Max number of simultaneous clients */
|
||||
unsigned long long maxmemory; /* Max number of memory bytes to use */
|
||||
@ -2043,26 +2043,26 @@ struct valkeyServer {
|
||||
char *cluster_configfile; /* Cluster auto-generated config file name. */
|
||||
struct clusterState *cluster; /* State of the cluster */
|
||||
int cluster_migration_barrier; /* Cluster replicas migration barrier. */
|
||||
int cluster_allow_replica_migration; /* Automatic replica migrations to orphaned masters and from empty masters */
|
||||
int cluster_slave_validity_factor; /* Slave max data age for failover. */
|
||||
int cluster_require_full_coverage; /* If true, put the cluster down if
|
||||
there is at least an uncovered slot.*/
|
||||
int cluster_slave_no_failover; /* Prevent slave from starting a failover
|
||||
if the master is in failure state. */
|
||||
char *cluster_announce_ip; /* IP address to announce on cluster bus. */
|
||||
char *cluster_announce_hostname; /* hostname to announce on cluster bus. */
|
||||
char *cluster_announce_human_nodename; /* Human readable node name assigned to a node. */
|
||||
int cluster_preferred_endpoint_type; /* Use the announced hostname when available. */
|
||||
int cluster_announce_port; /* base port to announce on cluster bus. */
|
||||
int cluster_announce_tls_port; /* TLS port to announce on cluster bus. */
|
||||
int cluster_announce_bus_port; /* bus port to announce on cluster bus. */
|
||||
int cluster_module_flags; /* Set of flags that modules are able
|
||||
to set in order to suppress certain
|
||||
native Redis Cluster features. Check the
|
||||
VALKEYMODULE_CLUSTER_FLAG_*. */
|
||||
int cluster_allow_reads_when_down; /* Are reads allowed when the cluster
|
||||
is down? */
|
||||
int cluster_config_file_lock_fd; /* cluster config fd, will be flocked. */
|
||||
int cluster_allow_replica_migration; /* Automatic replica migrations to orphaned primaries and from empty primaries */
|
||||
int cluster_replica_validity_factor; /* Replica max data age for failover. */
|
||||
int cluster_require_full_coverage; /* If true, put the cluster down if
|
||||
there is at least an uncovered slot.*/
|
||||
int cluster_replica_no_failover; /* Prevent replica from starting a failover
|
||||
if the primary is in failure state. */
|
||||
char *cluster_announce_ip; /* IP address to announce on cluster bus. */
|
||||
char *cluster_announce_hostname; /* hostname to announce on cluster bus. */
|
||||
char *cluster_announce_human_nodename; /* Human readable node name assigned to a node. */
|
||||
int cluster_preferred_endpoint_type; /* Use the announced hostname when available. */
|
||||
int cluster_announce_port; /* base port to announce on cluster bus. */
|
||||
int cluster_announce_tls_port; /* TLS port to announce on cluster bus. */
|
||||
int cluster_announce_bus_port; /* bus port to announce on cluster bus. */
|
||||
int cluster_module_flags; /* Set of flags that modules are able
|
||||
to set in order to suppress certain
|
||||
native Redis Cluster features. Check the
|
||||
VALKEYMODULE_CLUSTER_FLAG_*. */
|
||||
int cluster_allow_reads_when_down; /* Are reads allowed when the cluster
|
||||
is down? */
|
||||
int cluster_config_file_lock_fd; /* cluster config fd, will be flocked. */
|
||||
unsigned long long cluster_link_msg_queue_limit_bytes; /* Memory usage limit on individual link msg queue */
|
||||
int cluster_drop_packet_filter; /* Debug config that allows tactically
|
||||
* dropping packets of a specific type */
|
||||
@ -2317,7 +2317,7 @@ typedef int serverGetKeysProc(struct serverCommand *cmd, robj **argv, int argc,
|
||||
* CMD_NO_ASYNC_LOADING: Deny during async loading (when a replica uses diskless
|
||||
* sync swapdb, and allows access to the old dataset)
|
||||
*
|
||||
* CMD_STALE: Allow the command while a slave has stale data but is not
|
||||
* CMD_STALE: Allow the command while a replica has stale data but is not
|
||||
* allowed to serve this data. Normally no command is accepted
|
||||
* in this condition but just a few.
|
||||
*
|
||||
@ -2692,8 +2692,8 @@ int closeClientOnOutputBufferLimitReached(client *c, int async);
|
||||
int getClientType(client *c);
|
||||
int getClientTypeByName(char *name);
|
||||
char *getClientTypeName(int class);
|
||||
void flushSlavesOutputBuffers(void);
|
||||
void disconnectSlaves(void);
|
||||
void flushReplicasOutputBuffers(void);
|
||||
void disconnectReplicas(void);
|
||||
void evictClients(void);
|
||||
int listenToPort(connListener *fds);
|
||||
void pauseActions(pause_purpose purpose, mstime_t end, uint32_t actions_bitmask);
|
||||
@ -2867,36 +2867,36 @@ ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout);
|
||||
ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout);
|
||||
|
||||
/* Replication */
|
||||
void replicationFeedSlaves(int dictid, robj **argv, int argc);
|
||||
void replicationFeedStreamFromMasterStream(char *buf, size_t buflen);
|
||||
void replicationFeedReplicas(int dictid, robj **argv, int argc);
|
||||
void replicationFeedStreamFromPrimaryStream(char *buf, size_t buflen);
|
||||
void resetReplicationBuffer(void);
|
||||
void feedReplicationBuffer(char *buf, size_t len);
|
||||
void freeReplicaReferencedReplBuffer(client *replica);
|
||||
void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc);
|
||||
void updateSlavesWaitingBgsave(int bgsaveerr, int type);
|
||||
void updateReplicasWaitingBgsave(int bgsaveerr, int type);
|
||||
void replicationCron(void);
|
||||
void replicationStartPendingFork(void);
|
||||
void replicationHandleMasterDisconnection(void);
|
||||
void replicationCacheMaster(client *c);
|
||||
void replicationHandlePrimaryDisconnection(void);
|
||||
void replicationCachePrimary(client *c);
|
||||
void resizeReplicationBacklog(void);
|
||||
void replicationSetMaster(char *ip, int port);
|
||||
void replicationUnsetMaster(void);
|
||||
void refreshGoodSlavesCount(void);
|
||||
void replicationSetPrimary(char *ip, int port);
|
||||
void replicationUnsetPrimary(void);
|
||||
void refreshGoodReplicasCount(void);
|
||||
int checkGoodReplicasStatus(void);
|
||||
void processClientsWaitingReplicas(void);
|
||||
void unblockClientWaitingReplicas(client *c);
|
||||
int replicationCountAcksByOffset(long long offset);
|
||||
int replicationCountAOFAcksByOffset(long long offset);
|
||||
void replicationSendNewlineToMaster(void);
|
||||
long long replicationGetSlaveOffset(void);
|
||||
char *replicationGetSlaveName(client *c);
|
||||
void replicationSendNewlineToPrimary(void);
|
||||
long long replicationGetReplicaOffset(void);
|
||||
char *replicationGetReplicaName(client *c);
|
||||
long long getPsyncInitialOffset(void);
|
||||
int replicationSetupSlaveForFullResync(client *slave, long long offset);
|
||||
int replicationSetupReplicaForFullResync(client *replica, long long offset);
|
||||
void changeReplicationId(void);
|
||||
void clearReplicationId2(void);
|
||||
void createReplicationBacklog(void);
|
||||
void freeReplicationBacklog(void);
|
||||
void replicationCacheMasterUsingMyself(void);
|
||||
void replicationCachePrimaryUsingMyself(void);
|
||||
void feedReplicationBacklog(void *ptr, size_t len);
|
||||
void incrementalTrimReplicationBacklog(size_t blocks);
|
||||
int canFeedReplicaReplBuffer(client *replica);
|
||||
@ -3500,7 +3500,7 @@ void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeo
|
||||
void blockClientShutdown(client *c);
|
||||
void blockPostponeClient(client *c);
|
||||
void blockClientForReplicaAck(client *c, mstime_t timeout, long long offset, long numreplicas, int numlocal);
|
||||
void replicationRequestAckFromSlaves(void);
|
||||
void replicationRequestAckFromReplicas(void);
|
||||
void signalDeletedKeyAsReady(serverDb *db, robj *key, int type);
|
||||
void updateStatsOnUnblock(client *c, long blocked_us, long reply_us, int had_errors);
|
||||
void scanDatabaseForDeletedKeys(serverDb *emptied, serverDb *replaced_with);
|
||||
@ -3517,10 +3517,10 @@ int clientsCronHandleTimeout(client *c, mstime_t now_ms);
|
||||
|
||||
/* expire.c -- Handling of expired keys */
|
||||
void activeExpireCycle(int type);
|
||||
void expireSlaveKeys(void);
|
||||
void rememberSlaveKeyWithExpire(serverDb *db, robj *key);
|
||||
void flushSlaveKeysWithExpireList(void);
|
||||
size_t getSlaveKeyWithExpireCount(void);
|
||||
void expireReplicaKeys(void);
|
||||
void rememberReplicaKeyWithExpire(serverDb *db, robj *key);
|
||||
void flushReplicaKeysWithExpireList(void);
|
||||
size_t getReplicaKeyWithExpireCount(void);
|
||||
|
||||
/* evict.c -- maxmemory handling and LRU eviction. */
|
||||
void evictionPoolAlloc(void);
|
||||
@ -3858,7 +3858,7 @@ sds getVersion(void);
|
||||
#define serverDebug(fmt, ...) printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
|
||||
#define serverDebugMark() printf("-- MARK %s:%d --\n", __FILE__, __LINE__)
|
||||
|
||||
int iAmMaster(void);
|
||||
int iAmPrimary(void);
|
||||
|
||||
#define STRINGIFY_(x) #x
|
||||
#define STRINGIFY(x) STRINGIFY_(x)
|
||||
|
30
src/stream.h
30
src/stream.h
@ -30,20 +30,20 @@ typedef struct stream {
|
||||
* rewriting code that also needs to iterate the stream to emit the XADD
|
||||
* commands. */
|
||||
typedef struct streamIterator {
|
||||
stream *stream; /* The stream we are iterating. */
|
||||
streamID master_id; /* ID of the master entry at listpack head. */
|
||||
uint64_t master_fields_count; /* Master entries # of fields. */
|
||||
unsigned char *master_fields_start; /* Master entries start in listpack. */
|
||||
unsigned char *master_fields_ptr; /* Master field to emit next. */
|
||||
int entry_flags; /* Flags of entry we are emitting. */
|
||||
int rev; /* True if iterating end to start (reverse). */
|
||||
int skip_tombstones; /* True if not emitting tombstone entries. */
|
||||
uint64_t start_key[2]; /* Start key as 128 bit big endian. */
|
||||
uint64_t end_key[2]; /* End key as 128 bit big endian. */
|
||||
raxIterator ri; /* Rax iterator. */
|
||||
unsigned char *lp; /* Current listpack. */
|
||||
unsigned char *lp_ele; /* Current listpack cursor. */
|
||||
unsigned char *lp_flags; /* Current entry flags pointer. */
|
||||
stream *stream; /* The stream we are iterating. */
|
||||
streamID primary_id; /* ID of the primary entry at listpack head. */
|
||||
uint64_t primary_fields_count; /* Primary entries # of fields. */
|
||||
unsigned char *primary_fields_start; /* Primary entries start in listpack. */
|
||||
unsigned char *primary_fields_ptr; /* Primary field to emit next. */
|
||||
int entry_flags; /* Flags of entry we are emitting. */
|
||||
int rev; /* True if iterating end to start (reverse). */
|
||||
int skip_tombstones; /* True if not emitting tombstone entries. */
|
||||
uint64_t start_key[2]; /* Start key as 128 bit big endian. */
|
||||
uint64_t end_key[2]; /* End key as 128 bit big endian. */
|
||||
raxIterator ri; /* Rax iterator. */
|
||||
unsigned char *lp; /* Current listpack. */
|
||||
unsigned char *lp_ele; /* Current listpack cursor. */
|
||||
unsigned char *lp_flags; /* Current entry flags pointer. */
|
||||
/* Buffers used to hold the string of lpGet() when the element is
|
||||
* integer encoded, so that there is no string representation of the
|
||||
* element inside the listpack itself. */
|
||||
@ -97,7 +97,7 @@ typedef struct streamNACK {
|
||||
} streamNACK;
|
||||
|
||||
/* Stream propagation information, passed to functions in order to propagate
|
||||
* XCLAIM commands to AOF and slaves. */
|
||||
* XCLAIM commands to AOF and replicas. */
|
||||
typedef struct streamPropInfo {
|
||||
robj *keyname;
|
||||
robj *groupname;
|
||||
|
@ -33,7 +33,7 @@
|
||||
/* ----------------- Blocking sockets I/O with timeouts --------------------- */
|
||||
|
||||
/* The server performs most of the I/O in a nonblocking way, with the exception
|
||||
* of the SYNC command where the slave does it in a blocking way, and
|
||||
* of the SYNC command where the replica does it in a blocking way, and
|
||||
* the MIGRATE command that must be blocking in order to be atomic from the
|
||||
* point of view of the two instances (one migrating the key and one receiving
|
||||
* the key). This is why need the following blocking I/O functions.
|
||||
|
148
src/t_stream.c
148
src/t_stream.c
@ -32,11 +32,11 @@
|
||||
#include "stream.h"
|
||||
|
||||
/* Every stream item inside the listpack, has a flags field that is used to
|
||||
* mark the entry as deleted, or having the same field as the "master"
|
||||
* mark the entry as deleted, or having the same field as the "primary"
|
||||
* entry at the start of the listpack> */
|
||||
#define STREAM_ITEM_FLAG_NONE 0 /* No special flags. */
|
||||
#define STREAM_ITEM_FLAG_DELETED (1 << 0) /* Entry is deleted. Skip it. */
|
||||
#define STREAM_ITEM_FLAG_SAMEFIELDS (1 << 1) /* Same fields as master entry. */
|
||||
#define STREAM_ITEM_FLAG_SAMEFIELDS (1 << 1) /* Same fields as primary entry. */
|
||||
|
||||
/* For stream commands that require multiple IDs
|
||||
* when the number of IDs is less than 'STREAMID_STATIC_VECTOR_LEN',
|
||||
@ -286,8 +286,8 @@ static inline int64_t lpGetIntegerIfValid(unsigned char *ele, int *valid) {
|
||||
#define lpGetInteger(ele) lpGetIntegerIfValid(ele, NULL)
|
||||
|
||||
/* Get an edge streamID of a given listpack.
|
||||
* 'master_id' is an input param, used to build the 'edge_id' output param */
|
||||
int lpGetEdgeStreamID(unsigned char *lp, int first, streamID *master_id, streamID *edge_id) {
|
||||
* 'primary_id' is an input param, used to build the 'edge_id' output param */
|
||||
int lpGetEdgeStreamID(unsigned char *lp, int first, streamID *primary_id, streamID *edge_id) {
|
||||
if (lp == NULL) return 0;
|
||||
|
||||
unsigned char *lp_ele;
|
||||
@ -295,19 +295,19 @@ int lpGetEdgeStreamID(unsigned char *lp, int first, streamID *master_id, streamI
|
||||
/* We need to seek either the first or the last entry depending
|
||||
* on the direction of the iteration. */
|
||||
if (first) {
|
||||
/* Get the master fields count. */
|
||||
/* Get the primary fields count. */
|
||||
lp_ele = lpFirst(lp); /* Seek items count */
|
||||
lp_ele = lpNext(lp, lp_ele); /* Seek deleted count. */
|
||||
lp_ele = lpNext(lp, lp_ele); /* Seek num fields. */
|
||||
int64_t master_fields_count = lpGetInteger(lp_ele);
|
||||
int64_t primary_fields_count = lpGetInteger(lp_ele);
|
||||
lp_ele = lpNext(lp, lp_ele); /* Seek first field. */
|
||||
|
||||
/* If we are iterating in normal order, skip the master fields
|
||||
/* If we are iterating in normal order, skip the primary fields
|
||||
* to seek the first actual entry. */
|
||||
for (int64_t i = 0; i < master_fields_count; i++) lp_ele = lpNext(lp, lp_ele);
|
||||
for (int64_t i = 0; i < primary_fields_count; i++) lp_ele = lpNext(lp, lp_ele);
|
||||
|
||||
/* If we are going forward, skip the previous entry's
|
||||
* lp-count field (or in case of the master entry, the zero
|
||||
* lp-count field (or in case of the primary entry, the zero
|
||||
* term field) */
|
||||
lp_ele = lpNext(lp, lp_ele);
|
||||
if (lp_ele == NULL) return 0;
|
||||
@ -321,7 +321,7 @@ int lpGetEdgeStreamID(unsigned char *lp, int first, streamID *master_id, streamI
|
||||
* entry is composed of, and jump backward N times to seek
|
||||
* its start. */
|
||||
int64_t lp_count = lpGetInteger(lp_ele);
|
||||
if (lp_count == 0) /* We reached the master entry. */
|
||||
if (lp_count == 0) /* We reached the primary entry. */
|
||||
return 0;
|
||||
|
||||
while (lp_count--) lp_ele = lpPrev(lp, lp_ele);
|
||||
@ -329,9 +329,9 @@ int lpGetEdgeStreamID(unsigned char *lp, int first, streamID *master_id, streamI
|
||||
|
||||
lp_ele = lpNext(lp, lp_ele); /* Seek ID (lp_ele currently points to 'flags'). */
|
||||
|
||||
/* Get the ID: it is encoded as difference between the master
|
||||
/* Get the ID: it is encoded as difference between the primary
|
||||
* ID and this entry ID. */
|
||||
streamID id = *master_id;
|
||||
streamID id = *primary_id;
|
||||
id.ms += lpGetInteger(lp_ele);
|
||||
lp_ele = lpNext(lp, lp_ele);
|
||||
id.seq += lpGetInteger(lp_ele);
|
||||
@ -488,19 +488,19 @@ int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_
|
||||
* to do so we consider the ID as a single 128 bit number written in
|
||||
* big endian, so that the most significant bytes are the first ones. */
|
||||
uint64_t rax_key[2]; /* Key in the radix tree containing the listpack.*/
|
||||
streamID master_id; /* ID of the master entry in the listpack. */
|
||||
streamID primary_id; /* ID of the primary entry in the listpack. */
|
||||
|
||||
/* Create a new listpack and radix tree node if needed. Note that when
|
||||
* a new listpack is created, we populate it with a "master entry". This
|
||||
* a new listpack is created, we populate it with a "primary entry". This
|
||||
* is just a set of fields that is taken as references in order to compress
|
||||
* the stream entries that we'll add inside the listpack.
|
||||
*
|
||||
* Note that while we use the first added entry fields to create
|
||||
* the master entry, the first added entry is NOT represented in the master
|
||||
* the primary entry, the first added entry is NOT represented in the primary
|
||||
* entry, which is a stand alone object. But of course, the first entry
|
||||
* will compress well because it's used as reference.
|
||||
*
|
||||
* The master entry is composed like in the following example:
|
||||
* The primary entry is composed like in the following example:
|
||||
*
|
||||
* +-------+---------+------------+---------+--/--+---------+---------+-+
|
||||
* | count | deleted | num-fields | field_1 | field_2 | ... | field_N |0|
|
||||
@ -514,7 +514,7 @@ int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_
|
||||
* The real entries will be encoded with an ID that is just the
|
||||
* millisecond and sequence difference compared to the key stored at
|
||||
* the radix tree node containing the listpack (delta encoding), and
|
||||
* if the fields of the entry are the same as the master entry fields, the
|
||||
* if the fields of the entry are the same as the primary entry fields, the
|
||||
* entry flags will specify this fact and the entry fields and number
|
||||
* of fields will be omitted (see later in the code of this function).
|
||||
*
|
||||
@ -548,9 +548,9 @@ int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_
|
||||
|
||||
int flags = STREAM_ITEM_FLAG_NONE;
|
||||
if (lp == NULL) {
|
||||
master_id = id;
|
||||
primary_id = id;
|
||||
streamEncodeID(rax_key, &id);
|
||||
/* Create the listpack having the master entry ID and fields.
|
||||
/* Create the listpack having the primary entry ID and fields.
|
||||
* Pre-allocate some bytes when creating listpack to avoid realloc on
|
||||
* every XADD. Since listpack.c uses malloc_size, it'll grow in steps,
|
||||
* and won't realloc on every XADD.
|
||||
@ -568,32 +568,32 @@ int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_
|
||||
sds field = argv[i * 2]->ptr;
|
||||
lp = lpAppend(lp, (unsigned char *)field, sdslen(field));
|
||||
}
|
||||
lp = lpAppendInteger(lp, 0); /* Master entry zero terminator. */
|
||||
lp = lpAppendInteger(lp, 0); /* primary entry zero terminator. */
|
||||
raxInsert(s->rax, (unsigned char *)&rax_key, sizeof(rax_key), lp, NULL);
|
||||
/* The first entry we insert, has obviously the same fields of the
|
||||
* master entry. */
|
||||
* primary entry. */
|
||||
flags |= STREAM_ITEM_FLAG_SAMEFIELDS;
|
||||
} else {
|
||||
serverAssert(ri.key_len == sizeof(rax_key));
|
||||
memcpy(rax_key, ri.key, sizeof(rax_key));
|
||||
|
||||
/* Read the master ID from the radix tree key. */
|
||||
streamDecodeID(rax_key, &master_id);
|
||||
/* Read the primary ID from the radix tree key. */
|
||||
streamDecodeID(rax_key, &primary_id);
|
||||
unsigned char *lp_ele = lpFirst(lp);
|
||||
|
||||
/* Update count and skip the deleted fields. */
|
||||
int64_t count = lpGetInteger(lp_ele);
|
||||
lp = lpReplaceInteger(lp, &lp_ele, count + 1);
|
||||
lp_ele = lpNext(lp, lp_ele); /* seek deleted. */
|
||||
lp_ele = lpNext(lp, lp_ele); /* seek master entry num fields. */
|
||||
lp_ele = lpNext(lp, lp_ele); /* seek primary entry num fields. */
|
||||
|
||||
/* Check if the entry we are adding, have the same fields
|
||||
* as the master entry. */
|
||||
int64_t master_fields_count = lpGetInteger(lp_ele);
|
||||
* as the primary entry. */
|
||||
int64_t primary_fields_count = lpGetInteger(lp_ele);
|
||||
lp_ele = lpNext(lp, lp_ele);
|
||||
if (numfields == master_fields_count) {
|
||||
if (numfields == primary_fields_count) {
|
||||
int64_t i;
|
||||
for (i = 0; i < master_fields_count; i++) {
|
||||
for (i = 0; i < primary_fields_count; i++) {
|
||||
sds field = argv[i * 2]->ptr;
|
||||
int64_t e_len;
|
||||
unsigned char buf[LP_INTBUF_SIZE];
|
||||
@ -604,7 +604,7 @@ int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_
|
||||
}
|
||||
/* All fields are the same! We can compress the field names
|
||||
* setting a single bit in the flags. */
|
||||
if (i == master_fields_count) flags |= STREAM_ITEM_FLAG_SAMEFIELDS;
|
||||
if (i == primary_fields_count) flags |= STREAM_ITEM_FLAG_SAMEFIELDS;
|
||||
}
|
||||
}
|
||||
|
||||
@ -623,7 +623,7 @@ int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_
|
||||
* +-----+--------+-------+-/-+-------+--------+
|
||||
*
|
||||
* The entry-id field is actually two separated fields: the ms
|
||||
* and seq difference compared to the master entry.
|
||||
* and seq difference compared to the primary entry.
|
||||
*
|
||||
* The lp-count field is a number that states the number of listpack pieces
|
||||
* that compose the entry, so that it's possible to travel the entry
|
||||
@ -631,8 +631,8 @@ int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_
|
||||
* the entry, and jump back N times to seek the "flags" field to read
|
||||
* the stream full entry. */
|
||||
lp = lpAppendInteger(lp, flags);
|
||||
lp = lpAppendInteger(lp, id.ms - master_id.ms);
|
||||
lp = lpAppendInteger(lp, id.seq - master_id.seq);
|
||||
lp = lpAppendInteger(lp, id.ms - primary_id.ms);
|
||||
lp = lpAppendInteger(lp, id.seq - primary_id.seq);
|
||||
if (!(flags & STREAM_ITEM_FLAG_SAMEFIELDS)) lp = lpAppendInteger(lp, numfields);
|
||||
for (int64_t i = 0; i < numfields; i++) {
|
||||
sds field = argv[i * 2]->ptr, value = argv[i * 2 + 1]->ptr;
|
||||
@ -731,16 +731,16 @@ int64_t streamTrim(stream *s, streamAddTrimArgs *args) {
|
||||
|
||||
/* Check if we can remove the whole node. */
|
||||
int remove_node;
|
||||
streamID master_id = {0}; /* For MINID */
|
||||
streamID primary_id = {0}; /* For MINID */
|
||||
if (trim_strategy == TRIM_STRATEGY_MAXLEN) {
|
||||
remove_node = s->length - entries >= maxlen;
|
||||
} else {
|
||||
/* Read the master ID from the radix tree key. */
|
||||
streamDecodeID(ri.key, &master_id);
|
||||
/* Read the primary ID from the radix tree key. */
|
||||
streamDecodeID(ri.key, &primary_id);
|
||||
|
||||
/* Read last ID. */
|
||||
streamID last_id = {0, 0};
|
||||
lpGetEdgeStreamID(lp, 0, &master_id, &last_id);
|
||||
lpGetEdgeStreamID(lp, 0, &primary_id, &last_id);
|
||||
|
||||
/* We can remove the entire node id its last ID < 'id' */
|
||||
remove_node = streamCompareID(&last_id, id) < 0;
|
||||
@ -763,13 +763,13 @@ int64_t streamTrim(stream *s, streamAddTrimArgs *args) {
|
||||
int64_t deleted_from_lp = 0;
|
||||
|
||||
p = lpNext(lp, p); /* Skip deleted field. */
|
||||
p = lpNext(lp, p); /* Skip num-of-fields in the master entry. */
|
||||
p = lpNext(lp, p); /* Skip num-of-fields in the primary entry. */
|
||||
|
||||
/* Skip all the master fields. */
|
||||
int64_t master_fields_count = lpGetInteger(p);
|
||||
p = lpNext(lp, p); /* Skip the first field. */
|
||||
for (int64_t j = 0; j < master_fields_count; j++) p = lpNext(lp, p); /* Skip all master fields. */
|
||||
p = lpNext(lp, p); /* Skip the zero master entry terminator. */
|
||||
/* Skip all the primary fields. */
|
||||
int64_t primary_fields_count = lpGetInteger(p);
|
||||
p = lpNext(lp, p); /* Skip the first field. */
|
||||
for (int64_t j = 0; j < primary_fields_count; j++) p = lpNext(lp, p); /* Skip all primary fields. */
|
||||
p = lpNext(lp, p); /* Skip the zero primary entry terminator. */
|
||||
|
||||
/* 'p' is now pointing to the first entry inside the listpack.
|
||||
* We have to run entry after entry, marking entries as deleted
|
||||
@ -790,8 +790,8 @@ int64_t streamTrim(stream *s, streamAddTrimArgs *args) {
|
||||
|
||||
streamID currid = {0}; /* For MINID */
|
||||
if (trim_strategy == TRIM_STRATEGY_MINID) {
|
||||
currid.ms = master_id.ms + ms_delta;
|
||||
currid.seq = master_id.seq + seq_delta;
|
||||
currid.ms = primary_id.ms + ms_delta;
|
||||
currid.seq = primary_id.seq + seq_delta;
|
||||
}
|
||||
|
||||
int stop;
|
||||
@ -805,7 +805,7 @@ int64_t streamTrim(stream *s, streamAddTrimArgs *args) {
|
||||
if (stop) break;
|
||||
|
||||
if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) {
|
||||
to_skip = master_fields_count;
|
||||
to_skip = primary_fields_count;
|
||||
} else {
|
||||
to_skip = lpGetInteger(p); /* Get num-fields. */
|
||||
p = lpNext(lp, p); /* Skip num-fields. */
|
||||
@ -833,7 +833,7 @@ int64_t streamTrim(stream *s, streamAddTrimArgs *args) {
|
||||
p = lpNext(lp, p); /* Skip deleted field. */
|
||||
int64_t marked_deleted = lpGetInteger(p);
|
||||
lp = lpReplaceInteger(lp, &p, marked_deleted + deleted_from_lp);
|
||||
p = lpNext(lp, p); /* Skip num-of-fields in the master entry. */
|
||||
p = lpNext(lp, p); /* Skip num-of-fields in the primary entry. */
|
||||
|
||||
/* Here we should perform garbage collection in case at this point
|
||||
* there are too many entries deleted inside the listpack. */
|
||||
@ -983,7 +983,7 @@ static int streamParseAddOrTrimArgsOrReply(client *c, streamAddTrimArgs *args, i
|
||||
}
|
||||
|
||||
if (mustObeyClient(c)) {
|
||||
/* If command came from master or from AOF we must not enforce maxnodes
|
||||
/* If command came from primary or from AOF we must not enforce maxnodes
|
||||
* (The maxlen/minid argument was re-written to make sure there's no
|
||||
* inconsistency). */
|
||||
args->limit = 0;
|
||||
@ -1092,23 +1092,23 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
|
||||
else if (si->rev && !raxPrev(&si->ri))
|
||||
return 0;
|
||||
serverAssert(si->ri.key_len == sizeof(streamID));
|
||||
/* Get the master ID. */
|
||||
streamDecodeID(si->ri.key, &si->master_id);
|
||||
/* Get the master fields count. */
|
||||
/* Get the primary ID. */
|
||||
streamDecodeID(si->ri.key, &si->primary_id);
|
||||
/* Get the primary fields count. */
|
||||
si->lp = si->ri.data;
|
||||
si->lp_ele = lpFirst(si->lp); /* Seek items count */
|
||||
si->lp_ele = lpNext(si->lp, si->lp_ele); /* Seek deleted count. */
|
||||
si->lp_ele = lpNext(si->lp, si->lp_ele); /* Seek num fields. */
|
||||
si->master_fields_count = lpGetInteger(si->lp_ele);
|
||||
si->primary_fields_count = lpGetInteger(si->lp_ele);
|
||||
si->lp_ele = lpNext(si->lp, si->lp_ele); /* Seek first field. */
|
||||
si->master_fields_start = si->lp_ele;
|
||||
/* We are now pointing to the first field of the master entry.
|
||||
si->primary_fields_start = si->lp_ele;
|
||||
/* We are now pointing to the first field of the primary entry.
|
||||
* We need to seek either the first or the last entry depending
|
||||
* on the direction of the iteration. */
|
||||
if (!si->rev) {
|
||||
/* If we are iterating in normal order, skip the master fields
|
||||
/* If we are iterating in normal order, skip the primary fields
|
||||
* to seek the first actual entry. */
|
||||
for (uint64_t i = 0; i < si->master_fields_count; i++) si->lp_ele = lpNext(si->lp, si->lp_ele);
|
||||
for (uint64_t i = 0; i < si->primary_fields_count; i++) si->lp_ele = lpNext(si->lp, si->lp_ele);
|
||||
} else {
|
||||
/* If we are iterating in reverse direction, just seek the
|
||||
* last part of the last entry in the listpack (that is, the
|
||||
@ -1131,7 +1131,7 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
|
||||
while (1) {
|
||||
if (!si->rev) {
|
||||
/* If we are going forward, skip the previous entry
|
||||
* lp-count field (or in case of the master entry, the zero
|
||||
* lp-count field (or in case of the primary entry, the zero
|
||||
* term field) */
|
||||
si->lp_ele = lpNext(si->lp, si->lp_ele);
|
||||
if (si->lp_ele == NULL) break;
|
||||
@ -1140,7 +1140,7 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
|
||||
* entry is composed of, and jump backward N times to seek
|
||||
* its start. */
|
||||
int64_t lp_count = lpGetInteger(si->lp_ele);
|
||||
if (lp_count == 0) { /* We reached the master entry. */
|
||||
if (lp_count == 0) { /* We reached the primary entry. */
|
||||
si->lp = NULL;
|
||||
si->lp_ele = NULL;
|
||||
break;
|
||||
@ -1153,9 +1153,9 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
|
||||
int64_t flags = lpGetInteger(si->lp_ele);
|
||||
si->lp_ele = lpNext(si->lp, si->lp_ele); /* Seek ID. */
|
||||
|
||||
/* Get the ID: it is encoded as difference between the master
|
||||
/* Get the ID: it is encoded as difference between the primary
|
||||
* ID and this entry ID. */
|
||||
*id = si->master_id;
|
||||
*id = si->primary_id;
|
||||
id->ms += lpGetInteger(si->lp_ele);
|
||||
si->lp_ele = lpNext(si->lp, si->lp_ele);
|
||||
id->seq += lpGetInteger(si->lp_ele);
|
||||
@ -1166,7 +1166,7 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
|
||||
/* The number of entries is here or not depending on the
|
||||
* flags. */
|
||||
if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) {
|
||||
*numfields = si->master_fields_count;
|
||||
*numfields = si->primary_fields_count;
|
||||
} else {
|
||||
*numfields = lpGetInteger(si->lp_ele);
|
||||
si->lp_ele = lpNext(si->lp, si->lp_ele);
|
||||
@ -1180,7 +1180,7 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
|
||||
(!si->skip_tombstones || !(flags & STREAM_ITEM_FLAG_DELETED))) {
|
||||
if (memcmp(buf, si->end_key, sizeof(streamID)) > 0) return 0; /* We are already out of range. */
|
||||
si->entry_flags = flags;
|
||||
if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) si->master_fields_ptr = si->master_fields_start;
|
||||
if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) si->primary_fields_ptr = si->primary_fields_start;
|
||||
return 1; /* Valid item returned. */
|
||||
}
|
||||
} else {
|
||||
@ -1188,7 +1188,7 @@ int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields) {
|
||||
(!si->skip_tombstones || !(flags & STREAM_ITEM_FLAG_DELETED))) {
|
||||
if (memcmp(buf, si->start_key, sizeof(streamID)) < 0) return 0; /* We are already out of range. */
|
||||
si->entry_flags = flags;
|
||||
if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) si->master_fields_ptr = si->master_fields_start;
|
||||
if (flags & STREAM_ITEM_FLAG_SAMEFIELDS) si->primary_fields_ptr = si->primary_fields_start;
|
||||
return 1; /* Valid item returned. */
|
||||
}
|
||||
}
|
||||
@ -1226,8 +1226,8 @@ void streamIteratorGetField(streamIterator *si,
|
||||
int64_t *fieldlen,
|
||||
int64_t *valuelen) {
|
||||
if (si->entry_flags & STREAM_ITEM_FLAG_SAMEFIELDS) {
|
||||
*fieldptr = lpGet(si->master_fields_ptr, fieldlen, si->field_buf);
|
||||
si->master_fields_ptr = lpNext(si->lp, si->master_fields_ptr);
|
||||
*fieldptr = lpGet(si->primary_fields_ptr, fieldlen, si->field_buf);
|
||||
si->primary_fields_ptr = lpNext(si->lp, si->primary_fields_ptr);
|
||||
} else {
|
||||
*fieldptr = lpGet(si->lp_ele, fieldlen, si->field_buf);
|
||||
si->lp_ele = lpNext(si->lp, si->lp_ele);
|
||||
@ -1259,7 +1259,7 @@ void streamIteratorRemoveEntry(streamIterator *si, streamID *current) {
|
||||
flags |= STREAM_ITEM_FLAG_DELETED;
|
||||
lp = lpReplaceInteger(lp, &si->lp_flags, flags);
|
||||
|
||||
/* Change the valid/deleted entries count in the master entry. */
|
||||
/* Change the valid/deleted entries count in the primary entry. */
|
||||
unsigned char *p = lpFirst(lp);
|
||||
aux = lpGetInteger(p);
|
||||
|
||||
@ -1526,7 +1526,7 @@ void streamPropagateXCLAIM(client *c, robj *key, streamCG *group, robj *groupnam
|
||||
* RETRYCOUNT <count> FORCE JUSTID LASTID <id>.
|
||||
*
|
||||
* Note that JUSTID is useful in order to avoid that XCLAIM will do
|
||||
* useless work in the slave side, trying to fetch the stream item. */
|
||||
* useless work in the replica side, trying to fetch the stream item. */
|
||||
robj *argv[14];
|
||||
argv[0] = shared.xclaim;
|
||||
argv[1] = key;
|
||||
@ -1625,8 +1625,8 @@ void streamPropagateConsumerCreation(client *c, robj *key, robj *groupname, sds
|
||||
*
|
||||
* The final argument 'spi' (stream propagation info pointer) is a structure
|
||||
* filled with information needed to propagate the command execution to AOF
|
||||
* and slaves, in the case a consumer group was passed: we need to generate
|
||||
* XCLAIM commands to create the pending list into AOF/slaves in that case.
|
||||
* and replicas, in the case a consumer group was passed: we need to generate
|
||||
* XCLAIM commands to create the pending list into AOF/replicas in that case.
|
||||
*
|
||||
* If 'spi' is set to NULL no propagation will happen even if the group was
|
||||
* given, but currently such a feature is never used by the code base that
|
||||
@ -1689,7 +1689,7 @@ size_t streamReplyWithRange(client *c,
|
||||
group->last_id = id;
|
||||
/* In the past, we would only set it when NOACK was specified. And in
|
||||
* #9127, XCLAIM did not propagate entries_read in ACK, which would
|
||||
* cause entries_read to be inconsistent between master and replicas,
|
||||
* cause entries_read to be inconsistent between primary and replicas,
|
||||
* so here we call streamPropagateGroupID unconditionally. */
|
||||
propagate_last_id = 1;
|
||||
}
|
||||
@ -2144,7 +2144,7 @@ void xlenCommand(client *c) {
|
||||
* This function also implements the XREADGROUP command, which is like XREAD
|
||||
* but accepting the [GROUP group-name consumer-name] additional option.
|
||||
* This is useful because while XREAD is a read command and can be called
|
||||
* on slaves, XREADGROUP is not. */
|
||||
* on replicas, XREADGROUP is not. */
|
||||
#define XREAD_BLOCKED_DEFAULT_COUNT 1000
|
||||
void xreadCommand(client *c) {
|
||||
long long timeout = -1; /* -1 means, no BLOCK argument given. */
|
||||
@ -3928,18 +3928,18 @@ int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep) {
|
||||
if (!lpValidateNext(lp, &next, size)) return 0;
|
||||
|
||||
/* num-of-fields */
|
||||
int64_t master_fields = lpGetIntegerIfValid(p, &valid_record);
|
||||
int64_t primary_fields = lpGetIntegerIfValid(p, &valid_record);
|
||||
if (!valid_record) return 0;
|
||||
p = next;
|
||||
if (!lpValidateNext(lp, &next, size)) return 0;
|
||||
|
||||
/* the field names */
|
||||
for (int64_t j = 0; j < master_fields; j++) {
|
||||
for (int64_t j = 0; j < primary_fields; j++) {
|
||||
p = next;
|
||||
if (!lpValidateNext(lp, &next, size)) return 0;
|
||||
}
|
||||
|
||||
/* the zero master entry terminator. */
|
||||
/* the zero primary entry terminator. */
|
||||
int64_t zero = lpGetIntegerIfValid(p, &valid_record);
|
||||
if (!valid_record || zero != 0) return 0;
|
||||
p = next;
|
||||
@ -3948,7 +3948,7 @@ int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep) {
|
||||
entry_count += deleted_count;
|
||||
while (entry_count--) {
|
||||
if (!p) return 0;
|
||||
int64_t fields = master_fields, extra_fields = 3;
|
||||
int64_t fields = primary_fields, extra_fields = 3;
|
||||
int64_t flags = lpGetIntegerIfValid(p, &valid_record);
|
||||
if (!valid_record) return 0;
|
||||
p = next;
|
||||
|
@ -55,11 +55,11 @@ int clientsCronHandleTimeout(client *c, mstime_t now_ms) {
|
||||
|
||||
if (server.maxidletime &&
|
||||
/* This handles the idle clients connection timeout if set. */
|
||||
!(c->flags & CLIENT_SLAVE) && /* No timeout for slaves and monitors */
|
||||
!mustObeyClient(c) && /* No timeout for masters and AOF */
|
||||
!(c->flags & CLIENT_REPLICA) && /* No timeout for replicas and monitors */
|
||||
!mustObeyClient(c) && /* No timeout for primaries and AOF */
|
||||
!(c->flags & CLIENT_BLOCKED) && /* No timeout for BLPOP */
|
||||
!(c->flags & CLIENT_PUBSUB) && /* No timeout for Pub/Sub clients */
|
||||
(now - c->lastinteraction > server.maxidletime)) {
|
||||
(now - c->last_interaction > server.maxidletime)) {
|
||||
serverLog(LL_VERBOSE, "Closing idle client");
|
||||
freeClient(c);
|
||||
return 1;
|
||||
|
@ -162,7 +162,7 @@ typedef struct clusterNode {
|
||||
int port;
|
||||
sds name;
|
||||
int flags;
|
||||
sds replicate; /* Master ID if node is a slave */
|
||||
sds replicate; /* Primary ID if node is a replica */
|
||||
int *slots;
|
||||
int slots_count;
|
||||
int *updated_slots; /* Used by updateClusterSlotsConfiguration */
|
||||
@ -1092,7 +1092,7 @@ static int fetchClusterConfiguration(void) {
|
||||
*p = '\0';
|
||||
line = lines;
|
||||
lines = p + 1;
|
||||
char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL;
|
||||
char *name = NULL, *addr = NULL, *flags = NULL, *primary_id = NULL;
|
||||
int i = 0;
|
||||
while ((p = strchr(line, ' ')) != NULL) {
|
||||
*p = '\0';
|
||||
@ -1103,7 +1103,7 @@ static int fetchClusterConfiguration(void) {
|
||||
case 0: name = token; break;
|
||||
case 1: addr = token; break;
|
||||
case 2: flags = token; break;
|
||||
case 3: master_id = token; break;
|
||||
case 3: primary_id = token; break;
|
||||
}
|
||||
/* clang-format on */
|
||||
if (i == 8) break; // Slots
|
||||
@ -1114,7 +1114,7 @@ static int fetchClusterConfiguration(void) {
|
||||
goto cleanup;
|
||||
}
|
||||
int myself = (strstr(flags, "myself") != NULL);
|
||||
int is_replica = (strstr(flags, "slave") != NULL || (master_id != NULL && master_id[0] != '-'));
|
||||
int is_replica = (strstr(flags, "slave") != NULL || (primary_id != NULL && primary_id[0] != '-'));
|
||||
if (is_replica) continue;
|
||||
if (addr == NULL) {
|
||||
fprintf(stderr, "Invalid CLUSTER NODES reply: missing addr.\n");
|
||||
@ -1249,7 +1249,7 @@ static int fetchClusterSlotsConfiguration(client c) {
|
||||
NULL /* allow to expand */
|
||||
};
|
||||
/* printf("[%d] fetchClusterSlotsConfiguration\n", c->thread_id); */
|
||||
dict *masters = dictCreate(&dtype);
|
||||
dict *primaries = dictCreate(&dtype);
|
||||
redisContext *ctx = NULL;
|
||||
for (i = 0; i < (size_t)config.cluster_node_count; i++) {
|
||||
clusterNode *node = config.cluster_nodes[i];
|
||||
@ -1267,7 +1267,7 @@ static int fetchClusterSlotsConfiguration(client c) {
|
||||
if (node->updated_slots != NULL) zfree(node->updated_slots);
|
||||
node->updated_slots = NULL;
|
||||
node->updated_slots_count = 0;
|
||||
dictReplace(masters, node->name, node);
|
||||
dictReplace(primaries, node->name, node);
|
||||
}
|
||||
reply = redisCommand(ctx, "CLUSTER SLOTS");
|
||||
if (reply == NULL || reply->type == REDIS_REPLY_ERROR) {
|
||||
@ -1287,7 +1287,7 @@ static int fetchClusterSlotsConfiguration(client c) {
|
||||
assert(nr->type == REDIS_REPLY_ARRAY && nr->elements >= 3);
|
||||
assert(nr->element[2]->str != NULL);
|
||||
sds name = sdsnew(nr->element[2]->str);
|
||||
dictEntry *entry = dictFind(masters, name);
|
||||
dictEntry *entry = dictFind(primaries, name);
|
||||
if (entry == NULL) {
|
||||
success = 0;
|
||||
fprintf(stderr,
|
||||
@ -1306,7 +1306,7 @@ static int fetchClusterSlotsConfiguration(client c) {
|
||||
cleanup:
|
||||
freeReplyObject(reply);
|
||||
redisFree(ctx);
|
||||
dictRelease(masters);
|
||||
dictRelease(primaries);
|
||||
atomic_store_explicit(&config.is_fetching_slots, 0, memory_order_relaxed);
|
||||
return success;
|
||||
}
|
||||
|
330
src/valkey-cli.c
330
src/valkey-cli.c
@ -92,7 +92,7 @@
|
||||
"address (ie. 120.0.0.1:7000) or space separated IP " \
|
||||
"and port (ie. 120.0.0.1 7000)\n"
|
||||
#define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL)
|
||||
#define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) ((nodes) / ((replicas) + 1))
|
||||
#define CLUSTER_MANAGER_PRIMARIES_COUNT(nodes, replicas) ((nodes) / ((replicas) + 1))
|
||||
#define CLUSTER_MANAGER_COMMAND(n, ...) (redisCommand((n)->context, __VA_ARGS__))
|
||||
|
||||
#define CLUSTER_MANAGER_NODE_ARRAY_FREE(array) zfree((array)->alloc)
|
||||
@ -109,25 +109,25 @@
|
||||
#define clusterManagerLogOk(...) clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_SUCCESS, __VA_ARGS__)
|
||||
|
||||
#define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0
|
||||
#define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1
|
||||
#define CLUSTER_MANAGER_FLAG_REPLICA 1 << 1
|
||||
#define CLUSTER_MANAGER_FLAG_FRIEND 1 << 2
|
||||
#define CLUSTER_MANAGER_FLAG_NOADDR 1 << 3
|
||||
#define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4
|
||||
#define CLUSTER_MANAGER_FLAG_FAIL 1 << 5
|
||||
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_REPLICA 1 << 1
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_EMPTY_PRIMARY 1 << 4
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_REPLACE 1 << 6
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_COPY 1 << 7
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 8
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_CHECK_OWNERS 1 << 9
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS 1 << 10
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_MASTERS_ONLY 1 << 11
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_SLAVES_ONLY 1 << 12
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_PRIMARIES 1 << 10
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_PRIMARIES_ONLY 1 << 11
|
||||
#define CLUSTER_MANAGER_CMD_FLAG_REPLICAS_ONLY 1 << 12
|
||||
|
||||
#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0
|
||||
#define CLUSTER_MANAGER_OPT_COLD 1 << 1
|
||||
@ -188,7 +188,7 @@ typedef struct clusterManagerCommand {
|
||||
char *to;
|
||||
char **weight;
|
||||
int weight_argc;
|
||||
char *master_id;
|
||||
char *primary_id;
|
||||
int slots;
|
||||
int timeout;
|
||||
int pipeline;
|
||||
@ -225,7 +225,7 @@ static struct config {
|
||||
int cluster_mode;
|
||||
int cluster_reissue_command;
|
||||
int cluster_send_asking;
|
||||
int slave_mode;
|
||||
int replica_mode;
|
||||
int pipe_mode;
|
||||
int pipe_timeout;
|
||||
int getrdb_mode;
|
||||
@ -281,7 +281,7 @@ static struct pref {
|
||||
|
||||
static volatile sig_atomic_t force_cancel_loop = 0;
|
||||
static void usage(int err);
|
||||
static void slaveMode(int send_sync);
|
||||
static void replicaMode(int send_sync);
|
||||
static int cliConnect(int flags);
|
||||
|
||||
static char *getInfoField(char *info, char *field);
|
||||
@ -2325,7 +2325,7 @@ static int cliSendCommand(int argc, char **argv, long repeat) {
|
||||
(!strcasecmp(command, "subscribe") || !strcasecmp(command, "psubscribe") || !strcasecmp(command, "ssubscribe"));
|
||||
int is_unsubscribe = (!strcasecmp(command, "unsubscribe") || !strcasecmp(command, "punsubscribe") ||
|
||||
!strcasecmp(command, "sunsubscribe"));
|
||||
if (!strcasecmp(command, "sync") || !strcasecmp(command, "psync")) config.slave_mode = 1;
|
||||
if (!strcasecmp(command, "sync") || !strcasecmp(command, "psync")) config.replica_mode = 1;
|
||||
|
||||
/* When the user manually calls SCRIPT DEBUG, setup the activation of
|
||||
* debugging mode on the next eval if needed. */
|
||||
@ -2378,12 +2378,12 @@ static int cliSendCommand(int argc, char **argv, long repeat) {
|
||||
redisSetPushCallback(context, NULL);
|
||||
}
|
||||
|
||||
if (config.slave_mode) {
|
||||
if (config.replica_mode) {
|
||||
printf("Entering replica output mode... (press Ctrl-C to quit)\n");
|
||||
slaveMode(0);
|
||||
config.slave_mode = 0;
|
||||
replicaMode(0);
|
||||
config.replica_mode = 0;
|
||||
zfree(argvlen);
|
||||
return REDIS_ERR; /* Error = slaveMode lost connection to master */
|
||||
return REDIS_ERR; /* Error = replilcaMode lost connection to primary */
|
||||
}
|
||||
|
||||
/* Read response, possibly skipping pubsub/push messages. */
|
||||
@ -2604,9 +2604,9 @@ static int parseOptions(int argc, char **argv) {
|
||||
config.lru_test_mode = 1;
|
||||
config.lru_test_sample_size = strtoll(argv[++i], NULL, 10);
|
||||
} else if (!strcmp(argv[i], "--slave")) {
|
||||
config.slave_mode = 1;
|
||||
config.replica_mode = 1;
|
||||
} else if (!strcmp(argv[i], "--replica")) {
|
||||
config.slave_mode = 1;
|
||||
config.replica_mode = 1;
|
||||
} else if (!strcmp(argv[i], "--stat")) {
|
||||
config.stat_mode = 1;
|
||||
} else if (!strcmp(argv[i], "--scan")) {
|
||||
@ -2682,14 +2682,14 @@ static int parseOptions(int argc, char **argv) {
|
||||
i = j;
|
||||
} else if (!strcmp(argv[i], "--cluster") && lastarg) {
|
||||
usage(1);
|
||||
} else if ((!strcmp(argv[i], "--cluster-only-masters"))) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_MASTERS_ONLY;
|
||||
} else if (!strcmp(argv[i], "--cluster-only-masters") || !strcmp(argv[i], "--cluster-only-primaries")) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_PRIMARIES_ONLY;
|
||||
} else if ((!strcmp(argv[i], "--cluster-only-replicas"))) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_SLAVES_ONLY;
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_REPLICAS_ONLY;
|
||||
} else if (!strcmp(argv[i], "--cluster-replicas") && !lastarg) {
|
||||
config.cluster_manager_command.replicas = atoi(argv[++i]);
|
||||
} else if (!strcmp(argv[i], "--cluster-master-id") && !lastarg) {
|
||||
config.cluster_manager_command.master_id = argv[++i];
|
||||
} else if ((!strcmp(argv[i], "--cluster-master-id") || !strcmp(argv[i], "--cluster-primary-id")) && !lastarg) {
|
||||
config.cluster_manager_command.primary_id = argv[++i];
|
||||
} else if (!strcmp(argv[i], "--cluster-from") && !lastarg) {
|
||||
config.cluster_manager_command.from = argv[++i];
|
||||
} else if (!strcmp(argv[i], "--cluster-to") && !lastarg) {
|
||||
@ -2738,14 +2738,16 @@ static int parseOptions(int argc, char **argv) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_REPLACE;
|
||||
} else if (!strcmp(argv[i], "--cluster-copy")) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_COPY;
|
||||
} else if (!strcmp(argv[i], "--cluster-slave")) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_SLAVE;
|
||||
} else if (!strcmp(argv[i], "--cluster-use-empty-masters")) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER;
|
||||
} else if (!strcmp(argv[i], "--cluster-slave") || !strcmp(argv[i], "--cluster-replica")) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_REPLICA;
|
||||
} else if (!strcmp(argv[i], "--cluster-use-empty-masters") ||
|
||||
!strcmp(argv[i], "--cluster-use-empty-primaries")) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTY_PRIMARY;
|
||||
} else if (!strcmp(argv[i], "--cluster-search-multiple-owners")) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_CHECK_OWNERS;
|
||||
} else if (!strcmp(argv[i], "--cluster-fix-with-unreachable-masters")) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS;
|
||||
} else if (!strcmp(argv[i], "--cluster-fix-with-unreachable-masters") ||
|
||||
!strcmp(argv[i], "--cluster-fix-with-unreachable-primaries")) {
|
||||
config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_PRIMARIES;
|
||||
} else if (!strcmp(argv[i], "--test_hint") && !lastarg) {
|
||||
config.test_hint = argv[++i];
|
||||
} else if (!strcmp(argv[i], "--test_hint_file") && !lastarg) {
|
||||
@ -2958,7 +2960,7 @@ version,tls_usage);
|
||||
" --latency-dist Shows latency as a spectrum, requires xterm 256 colors.\n"
|
||||
" Default time interval is 1 sec. Change it using -i.\n"
|
||||
" --lru-test <keys> Simulate a cache workload with an 80-20 distribution.\n"
|
||||
" --replica Simulate a replica showing commands received from the master.\n"
|
||||
" --replica Simulate a replica showing commands received from the primaries.\n"
|
||||
" --rdb <filename> Transfer an RDB dump from remote server to local file.\n"
|
||||
" Use filename of \"-\" to write to stdout.\n"
|
||||
" --functions-rdb <filename> Like --rdb but only get the functions (not the keys)\n"
|
||||
@ -3486,7 +3488,7 @@ static int evalMode(int argc, char **argv) {
|
||||
static struct clusterManager {
|
||||
list *nodes; /* List of nodes in the configuration. */
|
||||
list *errors;
|
||||
int unreachable_masters; /* Masters we are not able to reach. */
|
||||
int unreachable_primaries; /* Primaries we are not able to reach. */
|
||||
} cluster_manager;
|
||||
|
||||
/* Used by clusterManagerFixSlotsCoverage */
|
||||
@ -3503,7 +3505,7 @@ typedef struct clusterManagerNode {
|
||||
time_t ping_recv;
|
||||
int flags;
|
||||
list *flags_str; /* Flags string representations */
|
||||
sds replicate; /* Master ID if node is a slave */
|
||||
sds replicate; /* Primary ID if node is a replica */
|
||||
int dirty; /* Node has changes that can be flushed */
|
||||
uint8_t slots[CLUSTER_MANAGER_SLOTS];
|
||||
int slots_count;
|
||||
@ -3624,18 +3626,18 @@ clusterManagerCommandDef clusterManagerCommands[] = {
|
||||
"search-multiple-owners"},
|
||||
{"info", clusterManagerCommandInfo, -1, "<host:port> or <host> <port> - separated by either colon or space", NULL},
|
||||
{"fix", clusterManagerCommandFix, -1, "<host:port> or <host> <port> - separated by either colon or space",
|
||||
"search-multiple-owners,fix-with-unreachable-masters"},
|
||||
"search-multiple-owners,fix-with-unreachable-primaries"},
|
||||
{"reshard", clusterManagerCommandReshard, -1, "<host:port> or <host> <port> - separated by either colon or space",
|
||||
"from <arg>,to <arg>,slots <arg>,yes,timeout <arg>,pipeline <arg>,"
|
||||
"replace"},
|
||||
{"rebalance", clusterManagerCommandRebalance, -1,
|
||||
"<host:port> or <host> <port> - separated by either colon or space",
|
||||
"weight <node1=w1...nodeN=wN>,use-empty-masters,"
|
||||
"weight <node1=w1...nodeN=wN>,use-empty-primaries,"
|
||||
"timeout <arg>,simulate,pipeline <arg>,threshold <arg>,replace"},
|
||||
{"add-node", clusterManagerCommandAddNode, 2, "new_host:new_port existing_host:existing_port",
|
||||
"slave,master-id <arg>"},
|
||||
"replica,primaries-id <arg>"},
|
||||
{"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id", NULL},
|
||||
{"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", "only-masters,only-replicas"},
|
||||
{"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", "only-primaries,only-replicas"},
|
||||
{"set-timeout", clusterManagerCommandSetTimeout, 2, "host:port milliseconds", NULL},
|
||||
{"import", clusterManagerCommandImport, 1, "host:port",
|
||||
"from <arg>,from-user <arg>,from-pass <arg>,from-askpass,copy,replace"},
|
||||
@ -4055,23 +4057,23 @@ result:
|
||||
|
||||
/* Return the anti-affinity score, which is a measure of the amount of
|
||||
* violations of anti-affinity in the current cluster layout, that is, how
|
||||
* badly the masters and slaves are distributed in the different IP
|
||||
* addresses so that slaves of the same master are not in the master
|
||||
* badly the primaries and replicas are distributed in the different IP
|
||||
* addresses so that replicas of the same primary are not in the primary
|
||||
* host and are also in different hosts.
|
||||
*
|
||||
* The score is calculated as follows:
|
||||
*
|
||||
* SAME_AS_MASTER = 10000 * each slave in the same IP of its master.
|
||||
* SAME_AS_SLAVE = 1 * each slave having the same IP as another slave
|
||||
of the same master.
|
||||
* FINAL_SCORE = SAME_AS_MASTER + SAME_AS_SLAVE
|
||||
* SAME_AS_PRIMARY = 10000 * each replica in the same IP of its primary.
|
||||
* SAME_AS_REPLICA = 1 * each replica having the same IP as another replica
|
||||
of the same primary.
|
||||
* FINAL_SCORE = SAME_AS_PRIMARY + SAME_AS_REPLICA
|
||||
*
|
||||
* So a greater score means a worse anti-affinity level, while zero
|
||||
* means perfect anti-affinity.
|
||||
*
|
||||
* The anti affinity optimization will try to get a score as low as
|
||||
* possible. Since we do not want to sacrifice the fact that slaves should
|
||||
* not be in the same host as the master, we assign 10000 times the score
|
||||
* possible. Since we do not want to sacrifice the fact that replicas should
|
||||
* not be in the same host as the primary, we assign 10000 times the score
|
||||
* to this violation, so that we'll optimize for the second factor only
|
||||
* if it does not impact the first one.
|
||||
*
|
||||
@ -4079,9 +4081,9 @@ result:
|
||||
* each IP, while ip_count is the total number of IPs in the configuration.
|
||||
*
|
||||
* The function returns the above score, and the list of
|
||||
* offending slaves can be stored into the 'offending' argument,
|
||||
* offending replicas can be stored into the 'offending' argument,
|
||||
* so that the optimizer can try changing the configuration of the
|
||||
* slaves violating the anti-affinity goals. */
|
||||
* replicas violating the anti-affinity goals. */
|
||||
static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes,
|
||||
int ip_count,
|
||||
clusterManagerNode ***offending,
|
||||
@ -4094,7 +4096,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes,
|
||||
offending_p = *offending;
|
||||
}
|
||||
/* For each set of nodes in the same host, split by
|
||||
* related nodes (masters and slaves which are involved in
|
||||
* related nodes (primaries and replicas which are involved in
|
||||
* replication of each other) */
|
||||
for (i = 0; i < ip_count; i++) {
|
||||
clusterManagerNodeArray *node_array = &(ipnodes[i]);
|
||||
@ -4105,7 +4107,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes,
|
||||
if (node == NULL) continue;
|
||||
if (!ip) ip = node->ip;
|
||||
sds types;
|
||||
/* We always use the Master ID as key. */
|
||||
/* We always use the Primary ID as key. */
|
||||
sds key = (!node->replicate ? node->name : node->replicate);
|
||||
assert(key != NULL);
|
||||
dictEntry *entry = dictFind(related, key);
|
||||
@ -4113,7 +4115,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes,
|
||||
types = sdsdup((sds)dictGetVal(entry));
|
||||
else
|
||||
types = sdsempty();
|
||||
/* Master type 'm' is always set as the first character of the
|
||||
/* Primary type 'm' is always set as the first character of the
|
||||
* types string. */
|
||||
if (node->replicate)
|
||||
types = sdscat(types, "s");
|
||||
@ -4176,8 +4178,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes,
|
||||
}
|
||||
score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, &offenders, &offending_len);
|
||||
if (score == 0 || offending_len == 0) break; // Optimal anti affinity reached
|
||||
/* We'll try to randomly swap a slave's assigned master causing
|
||||
* an affinity problem with another random slave, to see if we
|
||||
/* We'll try to randomly swap a replica's assigned primary causing
|
||||
* an affinity problem with another random replica, to see if we
|
||||
* can improve the affinity. */
|
||||
int rand_idx = rand() % offending_len;
|
||||
clusterManagerNode *first = offenders[rand_idx], *second = NULL;
|
||||
@ -4196,16 +4198,16 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes,
|
||||
}
|
||||
rand_idx = rand() % other_replicas_count;
|
||||
second = other_replicas[rand_idx];
|
||||
char *first_master = first->replicate, *second_master = second->replicate;
|
||||
first->replicate = second_master, first->dirty = 1;
|
||||
second->replicate = first_master, second->dirty = 1;
|
||||
char *first_primary = first->replicate, *second_primary = second->replicate;
|
||||
first->replicate = second_primary, first->dirty = 1;
|
||||
second->replicate = first_primary, second->dirty = 1;
|
||||
int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL);
|
||||
/* If the change actually makes thing worse, revert. Otherwise
|
||||
* leave as it is because the best solution may need a few
|
||||
* combined swaps. */
|
||||
if (new_score > score) {
|
||||
first->replicate = first_master;
|
||||
second->replicate = second_master;
|
||||
first->replicate = first_primary;
|
||||
second->replicate = second_primary;
|
||||
}
|
||||
zfree(other_replicas);
|
||||
maxiter--;
|
||||
@ -4376,8 +4378,8 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) {
|
||||
int i;
|
||||
for (i = 0; i < indent; i++) spaces = sdscat(spaces, " ");
|
||||
if (indent) info = sdscat(info, spaces);
|
||||
int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE);
|
||||
char *role = (is_master ? "M" : "S");
|
||||
int is_primary = !(node->flags & CLUSTER_MANAGER_FLAG_REPLICA);
|
||||
char *role = (is_primary ? "M" : "S");
|
||||
sds slots = NULL;
|
||||
if (node->dirty && node->replicate != NULL)
|
||||
info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port);
|
||||
@ -4413,14 +4415,14 @@ static void clusterManagerShowNodes(void) {
|
||||
}
|
||||
|
||||
static void clusterManagerShowClusterInfo(void) {
|
||||
int masters = 0;
|
||||
int primaries = 0;
|
||||
long long keys = 0;
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *node = ln->value;
|
||||
if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) {
|
||||
if (!(node->flags & CLUSTER_MANAGER_FLAG_REPLICA)) {
|
||||
if (!node->name) continue;
|
||||
int replicas = 0;
|
||||
long long dbsize = -1;
|
||||
@ -4432,7 +4434,7 @@ static void clusterManagerShowClusterInfo(void) {
|
||||
listRewind(cluster_manager.nodes, &ri);
|
||||
while ((rn = listNext(&ri)) != NULL) {
|
||||
clusterManagerNode *n = rn->value;
|
||||
if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_SLAVE)) continue;
|
||||
if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_REPLICA)) continue;
|
||||
if (n->replicate && !strcmp(n->replicate, node->name)) replicas++;
|
||||
}
|
||||
redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "DBSIZE");
|
||||
@ -4447,11 +4449,11 @@ static void clusterManagerShowClusterInfo(void) {
|
||||
if (reply != NULL) freeReplyObject(reply);
|
||||
printf("%s:%d (%s...) -> %lld keys | %d slots | %d slaves.\n", node->ip, node->port, name, dbsize,
|
||||
node->slots_count, replicas);
|
||||
masters++;
|
||||
primaries++;
|
||||
keys += dbsize;
|
||||
}
|
||||
}
|
||||
clusterManagerLogOk("[OK] %lld keys in %d masters.\n", keys, masters);
|
||||
clusterManagerLogOk("[OK] %lld keys in %d masters.\n", keys, primaries);
|
||||
float keys_per_slot = keys / (float)CLUSTER_MANAGER_SLOTS;
|
||||
printf("%.2f keys per slot on average.\n", keys_per_slot);
|
||||
}
|
||||
@ -5013,7 +5015,7 @@ clusterManagerMoveSlot(clusterManagerNode *source, clusterManagerNode *target, i
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n == target || n == source) continue; /* already done */
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA) continue;
|
||||
success = clusterManagerSetSlot(n, target, slot, "node", err);
|
||||
if (!success) return 0;
|
||||
}
|
||||
@ -5026,8 +5028,8 @@ clusterManagerMoveSlot(clusterManagerNode *source, clusterManagerNode *target, i
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Flush the dirty node configuration by calling replicate for slaves or
|
||||
* adding the slots defined in the masters. */
|
||||
/* Flush the dirty node configuration by calling replicate for replicas or
|
||||
* adding the slots defined in the primaries. */
|
||||
static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) {
|
||||
if (!node->dirty) return 0;
|
||||
redisReply *reply = NULL;
|
||||
@ -5042,7 +5044,7 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) {
|
||||
}
|
||||
success = 0;
|
||||
/* If the cluster did not already joined it is possible that
|
||||
* the slave does not know the master node yet. So on errors
|
||||
* the replica does not know the primary node yet. So on errors
|
||||
* we return ASAP leaving the dirty flag set, to flush the
|
||||
* config later. */
|
||||
goto cleanup;
|
||||
@ -5131,7 +5133,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char *
|
||||
*p = '\0';
|
||||
line = lines;
|
||||
lines = p + 1;
|
||||
char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, *ping_sent = NULL, *ping_recv = NULL,
|
||||
char *name = NULL, *addr = NULL, *flags = NULL, *primary_id = NULL, *ping_sent = NULL, *ping_recv = NULL,
|
||||
*config_epoch = NULL, *link_status = NULL;
|
||||
UNUSED(link_status);
|
||||
int i = 0;
|
||||
@ -5144,7 +5146,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char *
|
||||
case 0: name = token; break;
|
||||
case 1: addr = token; break;
|
||||
case 2: flags = token; break;
|
||||
case 3: master_id = token; break;
|
||||
case 3: primary_id = token; break;
|
||||
case 4: ping_sent = token; break;
|
||||
case 5: ping_recv = token; break;
|
||||
case 6: config_epoch = token; break;
|
||||
@ -5265,10 +5267,10 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char *
|
||||
else if (strcmp(flag, "fail") == 0)
|
||||
currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL;
|
||||
else if (strcmp(flag, "slave") == 0) {
|
||||
currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE;
|
||||
if (master_id != NULL) {
|
||||
currentNode->flags |= CLUSTER_MANAGER_FLAG_REPLICA;
|
||||
if (primary_id != NULL) {
|
||||
if (currentNode->replicate) sdsfree(currentNode->replicate);
|
||||
currentNode->replicate = sdsnew(master_id);
|
||||
currentNode->replicate = sdsnew(primary_id);
|
||||
}
|
||||
}
|
||||
listAddNodeTail(currentNode->flags_str, flag);
|
||||
@ -5338,7 +5340,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node) {
|
||||
}
|
||||
continue;
|
||||
invalid_friend:
|
||||
if (!(friend->flags & CLUSTER_MANAGER_FLAG_SLAVE)) cluster_manager.unreachable_masters++;
|
||||
if (!(friend->flags & CLUSTER_MANAGER_FLAG_REPLICA)) cluster_manager.unreachable_primaries++;
|
||||
freeClusterManagerNode(friend);
|
||||
}
|
||||
listRelease(node->friends);
|
||||
@ -5349,13 +5351,13 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node) {
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->replicate != NULL) {
|
||||
clusterManagerNode *master = clusterManagerNodeByName(n->replicate);
|
||||
if (master == NULL) {
|
||||
clusterManagerNode *primary = clusterManagerNodeByName(n->replicate);
|
||||
if (primary == NULL) {
|
||||
clusterManagerLogWarn("*** WARNING: %s:%d claims to be "
|
||||
"slave of unknown node ID %s.\n",
|
||||
n->ip, n->port, n->replicate);
|
||||
} else
|
||||
master->replicas_count++;
|
||||
primary->replicas_count++;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
@ -5637,7 +5639,7 @@ static clusterManagerNode *clusterManagerGetNodeWithMostKeysInSlot(list *nodes,
|
||||
if (err) *err = NULL;
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA || n->replicate) continue;
|
||||
redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOT %d", slot);
|
||||
int success = clusterManagerCheckRedisReply(n, r, err);
|
||||
if (success) {
|
||||
@ -5657,8 +5659,8 @@ static clusterManagerNode *clusterManagerGetNodeWithMostKeysInSlot(list *nodes,
|
||||
return node;
|
||||
}
|
||||
|
||||
/* This function returns the master that has the least number of replicas
|
||||
* in the cluster. If there are multiple masters with the same smaller
|
||||
/* This function returns the primary that has the least number of replicas
|
||||
* in the cluster. If there are multiple primaries with the same smaller
|
||||
* number of replicas, one at random is returned. */
|
||||
|
||||
static clusterManagerNode *clusterManagerNodeWithLeastReplicas(void) {
|
||||
@ -5669,7 +5671,7 @@ static clusterManagerNode *clusterManagerNodeWithLeastReplicas(void) {
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA) continue;
|
||||
if (node == NULL || n->replicas_count < lowest_count) {
|
||||
node = n;
|
||||
lowest_count = n->replicas_count;
|
||||
@ -5678,27 +5680,27 @@ static clusterManagerNode *clusterManagerNodeWithLeastReplicas(void) {
|
||||
return node;
|
||||
}
|
||||
|
||||
/* This function returns a random master node, return NULL if none */
|
||||
/* This function returns a random primary node, return NULL if none */
|
||||
|
||||
static clusterManagerNode *clusterManagerNodeMasterRandom(void) {
|
||||
int master_count = 0;
|
||||
static clusterManagerNode *clusterManagerNodePrimaryRandom(void) {
|
||||
int primary_count = 0;
|
||||
int idx;
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
|
||||
master_count++;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA) continue;
|
||||
primary_count++;
|
||||
}
|
||||
|
||||
assert(master_count > 0);
|
||||
assert(primary_count > 0);
|
||||
srand(time(NULL));
|
||||
idx = rand() % master_count;
|
||||
idx = rand() % primary_count;
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA) continue;
|
||||
if (!idx--) {
|
||||
return n;
|
||||
}
|
||||
@ -5708,15 +5710,15 @@ static clusterManagerNode *clusterManagerNodeMasterRandom(void) {
|
||||
}
|
||||
|
||||
static int clusterManagerFixSlotsCoverage(char *all_slots) {
|
||||
int force_fix = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS;
|
||||
int force_fix = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_PRIMARIES;
|
||||
|
||||
if (cluster_manager.unreachable_masters > 0 && !force_fix) {
|
||||
if (cluster_manager.unreachable_primaries > 0 && !force_fix) {
|
||||
clusterManagerLogWarn(
|
||||
"*** Fixing slots coverage with %d unreachable masters is dangerous: valkey-cli will assume that slots "
|
||||
"about masters that are not reachable are not covered, and will try to reassign them to the reachable "
|
||||
"nodes. This can cause data loss and is rarely what you want to do. If you really want to proceed use the "
|
||||
"--cluster-fix-with-unreachable-masters option.\n",
|
||||
cluster_manager.unreachable_masters);
|
||||
cluster_manager.unreachable_primaries);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -5734,7 +5736,7 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) {
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA || n->replicate) continue;
|
||||
redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CLUSTER GETKEYSINSLOT %d %d", i, 1);
|
||||
if (!clusterManagerCheckRedisReply(n, reply, NULL)) {
|
||||
fixed = -1;
|
||||
@ -5789,7 +5791,7 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) {
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
sds slot = ln->value;
|
||||
int s = atoi(slot);
|
||||
clusterManagerNode *n = clusterManagerNodeMasterRandom();
|
||||
clusterManagerNode *n = clusterManagerNodePrimaryRandom();
|
||||
clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", slot, n->ip, n->port);
|
||||
if (!clusterManagerSetSlotOwner(n, s, 0)) {
|
||||
fixed = -1;
|
||||
@ -5902,15 +5904,15 @@ cleanup:
|
||||
* more nodes. This function fixes this condition by migrating keys where
|
||||
* it seems more sensible. */
|
||||
static int clusterManagerFixOpenSlot(int slot) {
|
||||
int force_fix = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_MASTERS;
|
||||
int force_fix = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_FIX_WITH_UNREACHABLE_PRIMARIES;
|
||||
|
||||
if (cluster_manager.unreachable_masters > 0 && !force_fix) {
|
||||
if (cluster_manager.unreachable_primaries > 0 && !force_fix) {
|
||||
clusterManagerLogWarn(
|
||||
"*** Fixing open slots with %d unreachable masters is dangerous: valkey-cli will assume that slots about "
|
||||
"masters that are not reachable are not covered, and will try to reassign them to the reachable nodes. "
|
||||
"This can cause data loss and is rarely what you want to do. If you really want to proceed use the "
|
||||
"--cluster-fix-with-unreachable-masters option.\n",
|
||||
cluster_manager.unreachable_masters);
|
||||
cluster_manager.unreachable_primaries);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -5934,7 +5936,7 @@ static int clusterManagerFixOpenSlot(int slot) {
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA) continue;
|
||||
if (n->slots[slot]) {
|
||||
listAddNodeTail(owners, n);
|
||||
} else {
|
||||
@ -5961,7 +5963,7 @@ static int clusterManagerFixOpenSlot(int slot) {
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA) continue;
|
||||
int is_migrating = 0, is_importing = 0;
|
||||
if (n->migrating) {
|
||||
for (int i = 0; i < n->migrating_count; i += 2) {
|
||||
@ -6110,7 +6112,7 @@ static int clusterManagerFixOpenSlot(int slot) {
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n == owner) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA) continue;
|
||||
success = clusterManagerSetSlot(n, owner, slot, "NODE", NULL);
|
||||
if (!success) goto cleanup;
|
||||
}
|
||||
@ -6238,13 +6240,13 @@ static int clusterManagerFixMultipleSlotOwners(int slot, list *owners) {
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
/* Update configuration in all the other master nodes by assigning the slot
|
||||
/* Update configuration in all the other primary nodes by assigning the slot
|
||||
* itself to the new owner, and by eventually migrating keys if the node
|
||||
* has keys for the slot. */
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n == owner) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA) continue;
|
||||
int count = clusterManagerCountKeysInSlot(n, slot);
|
||||
success = (count >= 0);
|
||||
if (!success) break;
|
||||
@ -6379,7 +6381,7 @@ static int clusterManagerCheckCluster(int quiet) {
|
||||
list *owners = listCreate();
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA) continue;
|
||||
if (n->slots[slot])
|
||||
listAddNodeTail(owners, n);
|
||||
else {
|
||||
@ -6423,7 +6425,7 @@ static clusterManagerNode *clusterNodeForResharding(char *id, clusterManagerNode
|
||||
"or not a master, please retry.\n";
|
||||
node = clusterManagerNodeByName(id);
|
||||
*raise_err = 0;
|
||||
if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) {
|
||||
if (!node || node->flags & CLUSTER_MANAGER_FLAG_REPLICA) {
|
||||
clusterManagerLogErr(invalid_node_msg, id);
|
||||
*raise_err = 1;
|
||||
return NULL;
|
||||
@ -6639,8 +6641,8 @@ static int clusterManagerCommandCreate(int argc, char **argv) {
|
||||
}
|
||||
int node_len = cluster_manager.nodes->len;
|
||||
int replicas = config.cluster_manager_command.replicas;
|
||||
int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas);
|
||||
if (masters_count < 3) {
|
||||
int primaries_count = CLUSTER_MANAGER_PRIMARIES_COUNT(node_len, replicas);
|
||||
if (primaries_count < 3) {
|
||||
clusterManagerLogErr("*** ERROR: Invalid configuration for cluster creation.\n"
|
||||
"*** Valkey Cluster requires at least 3 master nodes.\n"
|
||||
"*** This is not possible with %d nodes and %d replicas per node.",
|
||||
@ -6685,24 +6687,24 @@ static int clusterManagerCommandCreate(int argc, char **argv) {
|
||||
}
|
||||
}
|
||||
}
|
||||
clusterManagerNode **masters = interleaved;
|
||||
interleaved += masters_count;
|
||||
interleaved_len -= masters_count;
|
||||
float slots_per_node = CLUSTER_MANAGER_SLOTS / (float)masters_count;
|
||||
clusterManagerNode **primaries = interleaved;
|
||||
interleaved += primaries_count;
|
||||
interleaved_len -= primaries_count;
|
||||
float slots_per_node = CLUSTER_MANAGER_SLOTS / (float)primaries_count;
|
||||
long first = 0;
|
||||
float cursor = 0.0f;
|
||||
for (i = 0; i < masters_count; i++) {
|
||||
clusterManagerNode *master = masters[i];
|
||||
for (i = 0; i < primaries_count; i++) {
|
||||
clusterManagerNode *primary = primaries[i];
|
||||
long last = lround(cursor + slots_per_node - 1);
|
||||
if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1)) last = CLUSTER_MANAGER_SLOTS - 1;
|
||||
if (last > CLUSTER_MANAGER_SLOTS || i == (primaries_count - 1)) last = CLUSTER_MANAGER_SLOTS - 1;
|
||||
if (last < first) last = first;
|
||||
printf("Master[%d] -> Slots %ld - %ld\n", i, first, last);
|
||||
master->slots_count = 0;
|
||||
primary->slots_count = 0;
|
||||
for (j = first; j <= last; j++) {
|
||||
master->slots[j] = 1;
|
||||
master->slots_count++;
|
||||
primary->slots[j] = 1;
|
||||
primary->slots_count++;
|
||||
}
|
||||
master->dirty = 1;
|
||||
primary->dirty = 1;
|
||||
first = last + 1;
|
||||
cursor += slots_per_node;
|
||||
}
|
||||
@ -6714,17 +6716,17 @@ static int clusterManagerCommandCreate(int argc, char **argv) {
|
||||
interleaved[interleaved_len - 1] = first_node;
|
||||
int assign_unused = 0, available_count = interleaved_len;
|
||||
assign_replicas:
|
||||
for (i = 0; i < masters_count; i++) {
|
||||
clusterManagerNode *master = masters[i];
|
||||
for (i = 0; i < primaries_count; i++) {
|
||||
clusterManagerNode *primary = primaries[i];
|
||||
int assigned_replicas = 0;
|
||||
while (assigned_replicas < replicas) {
|
||||
if (available_count == 0) break;
|
||||
clusterManagerNode *found = NULL, *slave = NULL;
|
||||
clusterManagerNode *found = NULL, *replica = NULL;
|
||||
int firstNodeIdx = -1;
|
||||
for (j = 0; j < interleaved_len; j++) {
|
||||
clusterManagerNode *n = interleaved[j];
|
||||
if (n == NULL) continue;
|
||||
if (strcmp(n->ip, master->ip)) {
|
||||
if (strcmp(n->ip, primary->ip)) {
|
||||
found = n;
|
||||
interleaved[j] = NULL;
|
||||
break;
|
||||
@ -6732,21 +6734,21 @@ assign_replicas:
|
||||
if (firstNodeIdx < 0) firstNodeIdx = j;
|
||||
}
|
||||
if (found)
|
||||
slave = found;
|
||||
replica = found;
|
||||
else if (firstNodeIdx >= 0) {
|
||||
slave = interleaved[firstNodeIdx];
|
||||
replica = interleaved[firstNodeIdx];
|
||||
interleaved_len -= (firstNodeIdx + 1);
|
||||
interleaved += (firstNodeIdx + 1);
|
||||
}
|
||||
if (slave != NULL) {
|
||||
if (replica != NULL) {
|
||||
assigned_replicas++;
|
||||
available_count--;
|
||||
if (slave->replicate) sdsfree(slave->replicate);
|
||||
slave->replicate = sdsnew(master->name);
|
||||
slave->dirty = 1;
|
||||
if (replica->replicate) sdsfree(replica->replicate);
|
||||
replica->replicate = sdsnew(primary->name);
|
||||
replica->dirty = 1;
|
||||
} else
|
||||
break;
|
||||
printf("Adding replica %s:%d to %s:%d\n", slave->ip, slave->port, master->ip, master->port);
|
||||
printf("Adding replica %s:%d to %s:%d\n", replica->ip, replica->port, primary->ip, primary->port);
|
||||
if (assign_unused) break;
|
||||
}
|
||||
}
|
||||
@ -6874,7 +6876,7 @@ assign_replicas:
|
||||
}
|
||||
cleanup:
|
||||
/* Free everything */
|
||||
zfree(masters);
|
||||
zfree(primaries);
|
||||
zfree(ips);
|
||||
for (i = 0; i < node_len; i++) {
|
||||
clusterManagerNodeArray *node_array = ip_nodes + i;
|
||||
@ -6899,21 +6901,21 @@ static int clusterManagerCommandAddNode(int argc, char **argv) {
|
||||
if (!clusterManagerLoadInfoFromNode(refnode)) return 0;
|
||||
if (!clusterManagerCheckCluster(0)) return 0;
|
||||
|
||||
/* If --cluster-master-id was specified, try to resolve it now so that we
|
||||
/* If --cluster-primary-id was specified, try to resolve it now so that we
|
||||
* abort before starting with the node configuration. */
|
||||
clusterManagerNode *master_node = NULL;
|
||||
if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVE) {
|
||||
char *master_id = config.cluster_manager_command.master_id;
|
||||
if (master_id != NULL) {
|
||||
master_node = clusterManagerNodeByName(master_id);
|
||||
if (master_node == NULL) {
|
||||
clusterManagerLogErr("[ERR] No such master ID %s\n", master_id);
|
||||
clusterManagerNode *primary_node = NULL;
|
||||
if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLICA) {
|
||||
char *primary_id = config.cluster_manager_command.primary_id;
|
||||
if (primary_id != NULL) {
|
||||
primary_node = clusterManagerNodeByName(primary_id);
|
||||
if (primary_node == NULL) {
|
||||
clusterManagerLogErr("[ERR] No such master ID %s\n", primary_id);
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
master_node = clusterManagerNodeWithLeastReplicas();
|
||||
assert(master_node != NULL);
|
||||
printf("Automatically selected master %s:%d\n", master_node->ip, master_node->port);
|
||||
primary_node = clusterManagerNodeWithLeastReplicas();
|
||||
assert(primary_node != NULL);
|
||||
printf("Automatically selected master %s:%d\n", primary_node->ip, primary_node->port);
|
||||
}
|
||||
}
|
||||
|
||||
@ -6948,7 +6950,7 @@ static int clusterManagerCommandAddNode(int argc, char **argv) {
|
||||
listAddNodeTail(cluster_manager.nodes, new_node);
|
||||
added = 1;
|
||||
|
||||
if (!master_node) {
|
||||
if (!primary_node) {
|
||||
/* Send functions to the new node, if new node is a replica it will get the functions from its primary. */
|
||||
clusterManagerLogInfo(">>> Getting functions from cluster\n");
|
||||
reply = CLUSTER_MANAGER_COMMAND(refnode, "FUNCTION DUMP");
|
||||
@ -7014,13 +7016,13 @@ static int clusterManagerCommandAddNode(int argc, char **argv) {
|
||||
|
||||
if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) goto cleanup;
|
||||
|
||||
/* Additional configuration is needed if the node is added as a slave. */
|
||||
if (master_node) {
|
||||
/* Additional configuration is needed if the node is added as a replica. */
|
||||
if (primary_node) {
|
||||
sleep(1);
|
||||
clusterManagerWaitForClusterJoin();
|
||||
clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n", master_node->ip, master_node->port);
|
||||
clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n", primary_node->ip, primary_node->port);
|
||||
freeReplyObject(reply);
|
||||
reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s", master_node->name);
|
||||
reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s", primary_node->name);
|
||||
if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) goto cleanup;
|
||||
}
|
||||
clusterManagerLogOk("[OK] New node added correctly.\n");
|
||||
@ -7072,11 +7074,11 @@ static int clusterManagerCommandDeleteNode(int argc, char **argv) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n == node) continue;
|
||||
if (n->replicate && !strcasecmp(n->replicate, node_id)) {
|
||||
// Reconfigure the slave to replicate with some other node
|
||||
clusterManagerNode *master = clusterManagerNodeWithLeastReplicas();
|
||||
assert(master != NULL);
|
||||
clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", n->ip, n->port, master->ip, master->port);
|
||||
redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s", master->name);
|
||||
// Reconfigure the replica to replicate with some other node
|
||||
clusterManagerNode *primary = clusterManagerNodeWithLeastReplicas();
|
||||
assert(primary != NULL);
|
||||
clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", n->ip, n->port, primary->ip, primary->port);
|
||||
redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s", primary->name);
|
||||
success = clusterManagerCheckRedisReply(n, r, NULL);
|
||||
if (r) freeReplyObject(r);
|
||||
if (!success) return 0;
|
||||
@ -7253,7 +7255,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) {
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA || n->replicate) continue;
|
||||
if (!sdscmp(n->name, target->name)) continue;
|
||||
listAddNodeTail(sources, n);
|
||||
}
|
||||
@ -7345,7 +7347,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) {
|
||||
}
|
||||
float total_weight = 0;
|
||||
int nodes_involved = 0;
|
||||
int use_empty = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER;
|
||||
int use_empty = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_EMPTY_PRIMARY;
|
||||
involved = listCreate();
|
||||
listIter li;
|
||||
listNode *ln;
|
||||
@ -7353,7 +7355,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) {
|
||||
/* Compute the total cluster weight. */
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA || n->replicate) continue;
|
||||
if (!use_empty && n->slots_count == 0) {
|
||||
n->weight = 0;
|
||||
continue;
|
||||
@ -7623,7 +7625,7 @@ static int clusterManagerCommandImport(int argc, char **argv) {
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue;
|
||||
if (n->flags & CLUSTER_MANAGER_FLAG_REPLICA) continue;
|
||||
if (n->slots_count == 0) continue;
|
||||
if (n->slots[i]) {
|
||||
slots_map[i] = n;
|
||||
@ -7715,10 +7717,10 @@ static int clusterManagerCommandCall(int argc, char **argv) {
|
||||
listRewind(cluster_manager.nodes, &li);
|
||||
while ((ln = listNext(&li)) != NULL) {
|
||||
clusterManagerNode *n = ln->value;
|
||||
if ((config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_MASTERS_ONLY) && (n->replicate != NULL))
|
||||
continue; // continue if node is slave
|
||||
if ((config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVES_ONLY) && (n->replicate == NULL))
|
||||
continue; // continue if node is master
|
||||
if ((config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_PRIMARIES_ONLY) && (n->replicate != NULL))
|
||||
continue; // continue if node is replica
|
||||
if ((config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLICAS_ONLY) && (n->replicate == NULL))
|
||||
continue; // continue if node is primary
|
||||
if (!n->context && !clusterManagerNodeConnect(n)) continue;
|
||||
redisReply *reply = NULL;
|
||||
redisAppendCommandArgv(n->context, argc, (const char **)argv, argvlen);
|
||||
@ -8069,7 +8071,7 @@ static void latencyDistMode(void) {
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* Slave mode
|
||||
* Replica mode
|
||||
*--------------------------------------------------------------------------- */
|
||||
|
||||
#define RDB_EOF_MARK_SIZE 40
|
||||
@ -8108,7 +8110,7 @@ static ssize_t readConn(redisContext *c, char *buf, size_t len) {
|
||||
}
|
||||
|
||||
/* Sends SYNC and reads the number of bytes in the payload. Used both by
|
||||
* slaveMode() and getRDB().
|
||||
* replicaMode() and getRDB().
|
||||
*
|
||||
* send_sync if 1 means we will explicitly send SYNC command. If 0 means
|
||||
* we will not send SYNC command, will send the command that in c->obuf.
|
||||
@ -8191,7 +8193,7 @@ unsigned long long sendSync(redisContext *c, int send_sync, char *out_eof, int *
|
||||
return strtoull(buf + 1, NULL, 10);
|
||||
}
|
||||
|
||||
static void slaveMode(int send_sync) {
|
||||
static void replicaMode(int send_sync) {
|
||||
static char eofmark[RDB_EOF_MARK_SIZE];
|
||||
static char lastbytes[RDB_EOF_MARK_SIZE];
|
||||
static int usemark = 0;
|
||||
@ -8248,7 +8250,7 @@ static void slaveMode(int send_sync) {
|
||||
if (usemark) {
|
||||
unsigned long long offset = ULLONG_MAX - payload;
|
||||
fprintf(stderr, "%s done after %llu bytes. Logging commands from master.\n", info, offset);
|
||||
/* put the slave online */
|
||||
/* put the replica online */
|
||||
sleep(1);
|
||||
sendReplconf("ACK", "0");
|
||||
} else
|
||||
@ -9456,7 +9458,7 @@ int main(int argc, char **argv) {
|
||||
config.lru_test_sample_size = 0;
|
||||
config.cluster_mode = 0;
|
||||
config.cluster_send_asking = 0;
|
||||
config.slave_mode = 0;
|
||||
config.replica_mode = 0;
|
||||
config.getrdb_mode = 0;
|
||||
config.get_functions_rdb_mode = 0;
|
||||
config.stat_mode = 0;
|
||||
@ -9567,12 +9569,12 @@ int main(int argc, char **argv) {
|
||||
latencyDistMode();
|
||||
}
|
||||
|
||||
/* Slave mode */
|
||||
if (config.slave_mode) {
|
||||
/* Replica mode */
|
||||
if (config.replica_mode) {
|
||||
if (cliConnect(0) == REDIS_ERR) exit(1);
|
||||
sendCapa();
|
||||
sendReplconf("rdb-filter-only", "");
|
||||
slaveMode(1);
|
||||
replicaMode(1);
|
||||
}
|
||||
|
||||
/* Get RDB/functions mode. */
|
||||
|
@ -691,9 +691,9 @@ typedef struct ValkeyModuleReplicationInfo {
|
||||
uint64_t version; /* Not used since this structure is never passed
|
||||
from the module to the core right now. Here
|
||||
for future compatibility. */
|
||||
int master; /* true if primary, false if replica */
|
||||
char *masterhost; /* primary instance hostname for NOW_REPLICA */
|
||||
int masterport; /* primary instance port for NOW_REPLICA */
|
||||
int primary; /* true if primary, false if replica */
|
||||
char *primary_host; /* primary instance hostname for NOW_REPLICA */
|
||||
int primary_port; /* primary instance port for NOW_REPLICA */
|
||||
char *replid1; /* Main replication ID */
|
||||
char *replid2; /* Secondary replication ID */
|
||||
uint64_t repl1_offset; /* Main replication offset */
|
||||
|
@ -165,7 +165,7 @@ void roleChangeCallback(ValkeyModuleCtx *ctx, ValkeyModuleEvent e, uint64_t sub,
|
||||
ValkeyModuleReplicationInfo *ri = data;
|
||||
char *keyname = (sub == VALKEYMODULE_EVENT_REPLROLECHANGED_NOW_PRIMARY) ?
|
||||
"role-master" : "role-replica";
|
||||
LogStringEvent(ctx, keyname, ri->masterhost);
|
||||
LogStringEvent(ctx, keyname, ri->primary_host);
|
||||
}
|
||||
|
||||
void replicationChangeCallback(ValkeyModuleCtx *ctx, ValkeyModuleEvent e, uint64_t sub, void *data)
|
||||
|
142
valkey.conf
142
valkey.conf
@ -87,7 +87,7 @@
|
||||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
bind 127.0.0.1 -::1
|
||||
|
||||
# By default, outgoing connections (from replica to master, from Sentinel to
|
||||
# By default, outgoing connections (from replica to primary, from Sentinel to
|
||||
# instances, cluster bus, etc.) are not bound to a specific local address. In
|
||||
# most cases, this means the operating system will handle that based on routing
|
||||
# and the interface through which the connection goes out.
|
||||
@ -193,7 +193,7 @@ tcp-keepalive 300
|
||||
# tls-port 6379
|
||||
|
||||
# Configure a X.509 certificate and private key to use for authenticating the
|
||||
# server to connected clients, masters or cluster peers. These files should be
|
||||
# server to connected clients, primaries or cluster peers. These files should be
|
||||
# PEM formatted.
|
||||
#
|
||||
# tls-cert-file valkey.crt
|
||||
@ -205,7 +205,7 @@ tcp-keepalive 300
|
||||
# tls-key-file-pass secret
|
||||
|
||||
# Normally the server uses the same certificate for both server functions (accepting
|
||||
# connections) and client functions (replicating from a master, establishing
|
||||
# connections) and client functions (replicating from a primary, establishing
|
||||
# cluster bus connections, etc.).
|
||||
#
|
||||
# Sometimes certificates are issued with attributes that designate them as
|
||||
@ -245,7 +245,7 @@ tcp-keepalive 300
|
||||
# tls-auth-clients optional
|
||||
|
||||
# By default, a replica does not attempt to establish a TLS connection
|
||||
# with its master.
|
||||
# with its primary.
|
||||
#
|
||||
# Use the following directive to enable TLS on replication links.
|
||||
#
|
||||
@ -482,7 +482,7 @@ rdbchecksum yes
|
||||
# no - Never perform full sanitization
|
||||
# yes - Always perform full sanitization
|
||||
# clients - Perform full sanitization only for user connections.
|
||||
# Excludes: RDB files, RESTORE commands received from the master
|
||||
# Excludes: RDB files, RESTORE commands received from the primary
|
||||
# connection, and client connections which have the
|
||||
# skip-sanitize-payload ACL flag.
|
||||
# The default should be 'clients' but since it currently affects cluster
|
||||
@ -496,13 +496,13 @@ dbfilename dump.rdb
|
||||
# Remove RDB files used by replication in instances without persistence
|
||||
# enabled. By default this option is disabled, however there are environments
|
||||
# where for regulations or other security concerns, RDB files persisted on
|
||||
# disk by masters in order to feed replicas, or stored on disk by replicas
|
||||
# disk by primaries in order to feed replicas, or stored on disk by replicas
|
||||
# in order to load them for the initial synchronization, should be deleted
|
||||
# ASAP. Note that this option ONLY WORKS in instances that have both AOF
|
||||
# and RDB persistence disabled, otherwise is completely ignored.
|
||||
#
|
||||
# An alternative (and sometimes better) way to obtain the same effect is
|
||||
# to use diskless replication on both master and replicas instances. However
|
||||
# to use diskless replication on both primary and replicas instances. However
|
||||
# in the case of replicas, diskless is not always an option.
|
||||
rdb-del-sync-files no
|
||||
|
||||
@ -526,22 +526,22 @@ dir ./
|
||||
# | (receive writes) | | (exact copy) |
|
||||
# +------------------+ +---------------+
|
||||
#
|
||||
# 1) Replication is asynchronous, but you can configure a master to
|
||||
# 1) Replication is asynchronous, but you can configure a primary to
|
||||
# stop accepting writes if it appears to be not connected with at least
|
||||
# a given number of replicas.
|
||||
# 2) Replicas are able to perform a partial resynchronization with the
|
||||
# master if the replication link is lost for a relatively small amount of
|
||||
# primary if the replication link is lost for a relatively small amount of
|
||||
# time. You may want to configure the replication backlog size (see the next
|
||||
# sections of this file) with a sensible value depending on your needs.
|
||||
# 3) Replication is automatic and does not need user intervention. After a
|
||||
# network partition replicas automatically try to reconnect to masters
|
||||
# network partition replicas automatically try to reconnect to primaries
|
||||
# and resynchronize with them.
|
||||
#
|
||||
# replicaof <masterip> <masterport>
|
||||
# replicaof <primary_ip> <primary_port>
|
||||
|
||||
# If the master is password protected (using the "requirepass" configuration
|
||||
# If the primary is password protected (using the "requirepass" configuration
|
||||
# directive below) it is possible to tell the replica to authenticate before
|
||||
# starting the replication synchronization process, otherwise the master will
|
||||
# starting the replication synchronization process, otherwise the primary will
|
||||
# refuse the replica request.
|
||||
#
|
||||
# primaryauth <primary-password>
|
||||
@ -555,9 +555,9 @@ dir ./
|
||||
# primaryuser <username>
|
||||
#
|
||||
# When primaryuser is specified, the replica will authenticate against its
|
||||
# master using the new AUTH form: AUTH <username> <password>.
|
||||
# primary using the new AUTH form: AUTH <username> <password>.
|
||||
|
||||
# When a replica loses its connection with the master, or when the replication
|
||||
# When a replica loses its connection with the primary, or when the replication
|
||||
# is still in progress, the replica can act in two different ways:
|
||||
#
|
||||
# 1) if replica-serve-stale-data is set to 'yes' (the default) the replica will
|
||||
@ -575,7 +575,7 @@ replica-serve-stale-data yes
|
||||
|
||||
# You can configure a replica instance to accept writes or not. Writing against
|
||||
# a replica instance may be useful to store some ephemeral data (because data
|
||||
# written on a replica will be easily deleted after resync with the master) but
|
||||
# written on a replica will be easily deleted after resync with the primary) but
|
||||
# may also cause problems if clients are writing to it because of a
|
||||
# misconfiguration.
|
||||
#
|
||||
@ -593,15 +593,15 @@ replica-read-only yes
|
||||
#
|
||||
# New replicas and reconnecting replicas that are not able to continue the
|
||||
# replication process just receiving differences, need to do what is called a
|
||||
# "full synchronization". An RDB file is transmitted from the master to the
|
||||
# "full synchronization". An RDB file is transmitted from the primary to the
|
||||
# replicas.
|
||||
#
|
||||
# The transmission can happen in two different ways:
|
||||
#
|
||||
# 1) Disk-backed: The master creates a new process that writes the RDB
|
||||
# 1) Disk-backed: The primary creates a new process that writes the RDB
|
||||
# file on disk. Later the file is transferred by the parent
|
||||
# process to the replicas incrementally.
|
||||
# 2) Diskless: The master creates a new process that directly writes the
|
||||
# 2) Diskless: The primary creates a new process that directly writes the
|
||||
# RDB file to replica sockets, without touching the disk at all.
|
||||
#
|
||||
# With disk-backed replication, while the RDB file is generated, more replicas
|
||||
@ -610,7 +610,7 @@ replica-read-only yes
|
||||
# once the transfer starts, new replicas arriving will be queued and a new
|
||||
# transfer will start when the current one terminates.
|
||||
#
|
||||
# When diskless replication is used, the master waits a configurable amount of
|
||||
# When diskless replication is used, the primary waits a configurable amount of
|
||||
# time (in seconds) before starting the transfer in the hope that multiple
|
||||
# replicas will arrive and the transfer can be parallelized.
|
||||
#
|
||||
@ -640,15 +640,15 @@ repl-diskless-sync-max-replicas 0
|
||||
# WARNING: Since in this setup the replica does not immediately store an RDB on
|
||||
# disk, it may cause data loss during failovers. RDB diskless load + server
|
||||
# modules not handling I/O reads may cause the server to abort in case of I/O errors
|
||||
# during the initial synchronization stage with the master.
|
||||
# during the initial synchronization stage with the primary.
|
||||
# -----------------------------------------------------------------------------
|
||||
#
|
||||
# Replica can load the RDB it reads from the replication link directly from the
|
||||
# socket, or store the RDB to a file and read that file after it was completely
|
||||
# received from the master.
|
||||
# received from the primary.
|
||||
#
|
||||
# In many cases the disk is slower than the network, and storing and loading
|
||||
# the RDB file may increase replication time (and even increase the master's
|
||||
# the RDB file may increase replication time (and even increase the primary's
|
||||
# Copy on Write memory and replica buffers).
|
||||
# However, when parsing the RDB file directly from the socket, in order to avoid
|
||||
# data loss it's only safe to flush the current dataset when the new dataset is
|
||||
@ -659,7 +659,7 @@ repl-diskless-sync-max-replicas 0
|
||||
# "swapdb" - Keep current db contents in RAM while parsing the data directly
|
||||
# from the socket. Replicas in this mode can keep serving current
|
||||
# dataset while replication is in progress, except for cases where
|
||||
# they can't recognize master as having a data set from same
|
||||
# they can't recognize primary as having a data set from same
|
||||
# replication history.
|
||||
# Note that this requires sufficient memory, if you don't have it,
|
||||
# you risk an OOM kill.
|
||||
@ -678,11 +678,11 @@ repl-diskless-load disabled
|
||||
#
|
||||
# 1) Bulk transfer I/O during SYNC, from the point of view of replica.
|
||||
# 2) Master timeout from the point of view of replicas (data, pings).
|
||||
# 3) Replica timeout from the point of view of masters (REPLCONF ACK pings).
|
||||
# 3) Replica timeout from the point of view of primaries (REPLCONF ACK pings).
|
||||
#
|
||||
# It is important to make sure that this value is greater than the value
|
||||
# specified for repl-ping-replica-period otherwise a timeout will be detected
|
||||
# every time there is low traffic between the master and the replica. The default
|
||||
# every time there is low traffic between the primary and the replica. The default
|
||||
# value is 60 seconds.
|
||||
#
|
||||
# repl-timeout 60
|
||||
@ -698,7 +698,7 @@ repl-diskless-load disabled
|
||||
# be reduced but more bandwidth will be used for replication.
|
||||
#
|
||||
# By default we optimize for low latency, but in very high traffic conditions
|
||||
# or when the master and replicas are many hops away, turning this to "yes" may
|
||||
# or when the primary and replicas are many hops away, turning this to "yes" may
|
||||
# be a good idea.
|
||||
repl-disable-tcp-nodelay no
|
||||
|
||||
@ -715,13 +715,13 @@ repl-disable-tcp-nodelay no
|
||||
#
|
||||
# repl-backlog-size 1mb
|
||||
|
||||
# After a master has no connected replicas for some time, the backlog will be
|
||||
# After a primary has no connected replicas for some time, the backlog will be
|
||||
# freed. The following option configures the amount of seconds that need to
|
||||
# elapse, starting from the time the last replica disconnected, for the backlog
|
||||
# buffer to be freed.
|
||||
#
|
||||
# Note that replicas never free the backlog for timeout, since they may be
|
||||
# promoted to masters later, and should be able to correctly "partially
|
||||
# promoted to primaries later, and should be able to correctly "partially
|
||||
# resynchronize" with other replicas: hence they should always accumulate backlog.
|
||||
#
|
||||
# A value of 0 means to never release the backlog.
|
||||
@ -730,21 +730,21 @@ repl-disable-tcp-nodelay no
|
||||
|
||||
# The replica priority is an integer number published by the server in the INFO
|
||||
# output. It is used by Sentinel in order to select a replica to promote
|
||||
# into a master if the master is no longer working correctly.
|
||||
# into a primary if the primary is no longer working correctly.
|
||||
#
|
||||
# A replica with a low priority number is considered better for promotion, so
|
||||
# for instance if there are three replicas with priority 10, 100, 25 Sentinel
|
||||
# will pick the one with priority 10, that is the lowest.
|
||||
#
|
||||
# However a special priority of 0 marks the replica as not able to perform the
|
||||
# role of master, so a replica with priority of 0 will never be selected by
|
||||
# role of primary, so a replica with priority of 0 will never be selected by
|
||||
# Sentinel for promotion.
|
||||
#
|
||||
# By default the priority is 100.
|
||||
replica-priority 100
|
||||
|
||||
# The propagation error behavior controls how the server will behave when it is
|
||||
# unable to handle a command being processed in the replication stream from a master
|
||||
# unable to handle a command being processed in the replication stream from a primary
|
||||
# or processed while reading from an AOF file. Errors that occur during propagation
|
||||
# are unexpected, and can cause data inconsistency.
|
||||
#
|
||||
@ -757,7 +757,7 @@ replica-priority 100
|
||||
# propagation-error-behavior ignore
|
||||
|
||||
# Replica ignore disk write errors controls the behavior of a replica when it is
|
||||
# unable to persist a write command received from its master to disk. By default,
|
||||
# unable to persist a write command received from its primary to disk. By default,
|
||||
# this configuration is set to 'no' and will crash the replica in this condition.
|
||||
# It is not recommended to change this default.
|
||||
#
|
||||
@ -766,16 +766,16 @@ replica-priority 100
|
||||
# -----------------------------------------------------------------------------
|
||||
# By default, Sentinel includes all replicas in its reports. A replica
|
||||
# can be excluded from Sentinel's announcements. An unannounced replica
|
||||
# will be ignored by the 'sentinel replicas <master>' command and won't be
|
||||
# will be ignored by the 'sentinel replicas <primary>' command and won't be
|
||||
# exposed to Sentinel's clients.
|
||||
#
|
||||
# This option does not change the behavior of replica-priority. Even with
|
||||
# replica-announced set to 'no', the replica can be promoted to master. To
|
||||
# replica-announced set to 'no', the replica can be promoted to primary. To
|
||||
# prevent this behavior, set replica-priority to 0.
|
||||
#
|
||||
# replica-announced yes
|
||||
|
||||
# It is possible for a master to stop accepting writes if there are less than
|
||||
# It is possible for a primary to stop accepting writes if there are less than
|
||||
# N replicas connected, having a lag less or equal than M seconds.
|
||||
#
|
||||
# The N replicas need to be in "online" state.
|
||||
@ -797,18 +797,18 @@ replica-priority 100
|
||||
# By default min-replicas-to-write is set to 0 (feature disabled) and
|
||||
# min-replicas-max-lag is set to 10.
|
||||
|
||||
# A master is able to list the address and port of the attached
|
||||
# A primary is able to list the address and port of the attached
|
||||
# replicas in different ways. For example the "INFO replication" section
|
||||
# offers this information, which is used, among other tools, by
|
||||
# Sentinel in order to discover replica instances.
|
||||
# Another place where this info is available is in the output of the
|
||||
# "ROLE" command of a master.
|
||||
# "ROLE" command of a primary.
|
||||
#
|
||||
# The listed IP address and port normally reported by a replica is
|
||||
# obtained in the following way:
|
||||
#
|
||||
# IP: The address is auto detected by checking the peer address
|
||||
# of the socket used by the replica to connect with the master.
|
||||
# of the socket used by the replica to connect with the primary.
|
||||
#
|
||||
# Port: The port is communicated by the replica during the replication
|
||||
# handshake, and is normally the port that the replica is using to
|
||||
@ -817,7 +817,7 @@ replica-priority 100
|
||||
# However when port forwarding or Network Address Translation (NAT) is
|
||||
# used, the replica may actually be reachable via different IP and port
|
||||
# pairs. The following two options can be used by a replica in order to
|
||||
# report to its master a specific set of IP and port, so that both INFO
|
||||
# report to its primary a specific set of IP and port, so that both INFO
|
||||
# and ROLE will report those values.
|
||||
#
|
||||
# There is no need to use both the options if you need to override just
|
||||
@ -1175,11 +1175,11 @@ acllog-max-len 128
|
||||
# maxmemory-eviction-tenacity 10
|
||||
|
||||
# By default a replica will ignore its maxmemory setting
|
||||
# (unless it is promoted to master after a failover or manually). It means
|
||||
# that the eviction of keys will be just handled by the master, sending the
|
||||
# DEL commands to the replica as keys evict in the master side.
|
||||
# (unless it is promoted to primary after a failover or manually). It means
|
||||
# that the eviction of keys will be just handled by the primary, sending the
|
||||
# DEL commands to the replica as keys evict in the primary side.
|
||||
#
|
||||
# This behavior ensures that masters and replicas stay consistent, and is usually
|
||||
# This behavior ensures that primaries and replicas stay consistent, and is usually
|
||||
# what you want, however if your replica is writable, or you want the replica
|
||||
# to have a different memory setting, and you are sure all the writes performed
|
||||
# to the replica are idempotent, then you may change this default (but be sure
|
||||
@ -1190,7 +1190,7 @@ acllog-max-len 128
|
||||
# be larger on the replica, or data structures may sometimes take more memory
|
||||
# and so forth). So make sure you monitor your replicas and make sure they
|
||||
# have enough memory to never hit a real out-of-memory condition before the
|
||||
# master hits the configured maxmemory setting.
|
||||
# primary hits the configured maxmemory setting.
|
||||
#
|
||||
# replica-ignore-maxmemory yes
|
||||
|
||||
@ -1247,7 +1247,7 @@ acllog-max-len 128
|
||||
# itself removes any old content of the specified key in order to replace
|
||||
# it with the specified string.
|
||||
# 4) During replication, when a replica performs a full resynchronization with
|
||||
# its master, the content of the whole database is removed in order to
|
||||
# its primary, the content of the whole database is removed in order to
|
||||
# load the RDB file just transferred.
|
||||
#
|
||||
# In all the above cases the default is to delete objects in a blocking way,
|
||||
@ -1328,7 +1328,7 @@ lazyfree-lazy-user-flush no
|
||||
# Enabling this feature makes the server actively control the oom_score_adj value
|
||||
# for all its processes, depending on their role. The default scores will
|
||||
# attempt to have background child processes killed before all others, and
|
||||
# replicas killed before masters.
|
||||
# replicas killed before primaries.
|
||||
#
|
||||
# The server supports these options:
|
||||
#
|
||||
@ -1342,7 +1342,7 @@ lazyfree-lazy-user-flush no
|
||||
oom-score-adj no
|
||||
|
||||
# When oom-score-adj is used, this directive controls the specific values used
|
||||
# for master, replica and background child processes. Values range -2000 to
|
||||
# for primary, replica and background child processes. Values range -2000 to
|
||||
# 2000 (higher means more likely to be killed).
|
||||
#
|
||||
# Unprivileged processes (not root, and without CAP_SYS_RESOURCE capabilities)
|
||||
@ -1530,7 +1530,7 @@ aof-timestamp-enabled no
|
||||
# Maximum time to wait for replicas when shutting down, in seconds.
|
||||
#
|
||||
# During shut down, a grace period allows any lagging replicas to catch up with
|
||||
# the latest replication offset before the master exists. This period can
|
||||
# the latest replication offset before the primary exists. This period can
|
||||
# prevent data loss, especially for deployments without configured disk backups.
|
||||
#
|
||||
# The 'shutdown-timeout' value is the grace period's duration in seconds. It is
|
||||
@ -1606,7 +1606,7 @@ aof-timestamp-enabled no
|
||||
# you to specify the cluster bus port when executing cluster meet.
|
||||
# cluster-port 0
|
||||
|
||||
# A replica of a failing master will avoid to start a failover if its data
|
||||
# A replica of a failing primary will avoid to start a failover if its data
|
||||
# looks too old.
|
||||
#
|
||||
# There is no simple way for a replica to actually have an exact measure of
|
||||
@ -1614,35 +1614,35 @@ aof-timestamp-enabled no
|
||||
#
|
||||
# 1) If there are multiple replicas able to failover, they exchange messages
|
||||
# in order to try to give an advantage to the replica with the best
|
||||
# replication offset (more data from the master processed).
|
||||
# replication offset (more data from the primary processed).
|
||||
# Replicas will try to get their rank by offset, and apply to the start
|
||||
# of the failover a delay proportional to their rank.
|
||||
#
|
||||
# 2) Every single replica computes the time of the last interaction with
|
||||
# its master. This can be the last ping or command received (if the master
|
||||
# its primary. This can be the last ping or command received (if the primary
|
||||
# is still in the "connected" state), or the time that elapsed since the
|
||||
# disconnection with the master (if the replication link is currently down).
|
||||
# disconnection with the primary (if the replication link is currently down).
|
||||
# If the last interaction is too old, the replica will not try to failover
|
||||
# at all.
|
||||
#
|
||||
# The point "2" can be tuned by user. Specifically a replica will not perform
|
||||
# the failover if, since the last interaction with the master, the time
|
||||
# the failover if, since the last interaction with the primary, the time
|
||||
# elapsed is greater than:
|
||||
#
|
||||
# (node-timeout * cluster-replica-validity-factor) + repl-ping-replica-period
|
||||
#
|
||||
# So for example if node-timeout is 30 seconds, and the cluster-replica-validity-factor
|
||||
# is 10, and assuming a default repl-ping-replica-period of 10 seconds, the
|
||||
# replica will not try to failover if it was not able to talk with the master
|
||||
# replica will not try to failover if it was not able to talk with the primary
|
||||
# for longer than 310 seconds.
|
||||
#
|
||||
# A large cluster-replica-validity-factor may allow replicas with too old data to failover
|
||||
# a master, while a too small value may prevent the cluster from being able to
|
||||
# a primary, while a too small value may prevent the cluster from being able to
|
||||
# elect a replica at all.
|
||||
#
|
||||
# For maximum availability, it is possible to set the cluster-replica-validity-factor
|
||||
# to a value of 0, which means, that replicas will always try to failover the
|
||||
# master regardless of the last time they interacted with the master.
|
||||
# primary regardless of the last time they interacted with the primary.
|
||||
# (However they'll always try to apply a delay proportional to their
|
||||
# offset rank).
|
||||
#
|
||||
@ -1651,19 +1651,19 @@ aof-timestamp-enabled no
|
||||
#
|
||||
# cluster-replica-validity-factor 10
|
||||
|
||||
# Cluster replicas are able to migrate to orphaned masters, that are masters
|
||||
# Cluster replicas are able to migrate to orphaned primaries, that are primaries
|
||||
# that are left without working replicas. This improves the cluster ability
|
||||
# to resist to failures as otherwise an orphaned master can't be failed over
|
||||
# to resist to failures as otherwise an orphaned primary can't be failed over
|
||||
# in case of failure if it has no working replicas.
|
||||
#
|
||||
# Replicas migrate to orphaned masters only if there are still at least a
|
||||
# given number of other working replicas for their old master. This number
|
||||
# Replicas migrate to orphaned primaries only if there are still at least a
|
||||
# given number of other working replicas for their old primary. This number
|
||||
# is the "migration barrier". A migration barrier of 1 means that a replica
|
||||
# will migrate only if there is at least 1 other working replica for its master
|
||||
# will migrate only if there is at least 1 other working replica for its primary
|
||||
# and so forth. It usually reflects the number of replicas you want for every
|
||||
# master in your cluster.
|
||||
# primary in your cluster.
|
||||
#
|
||||
# Default is 1 (replicas migrate only if their masters remain with at least
|
||||
# Default is 1 (replicas migrate only if their primaries remain with at least
|
||||
# one replica). To disable migration just set it to a very large value or
|
||||
# set cluster-allow-replica-migration to 'no'.
|
||||
# A value of 0 can be set but is useful only for debugging and dangerous
|
||||
@ -1672,10 +1672,10 @@ aof-timestamp-enabled no
|
||||
# cluster-migration-barrier 1
|
||||
|
||||
# Turning off this option allows to use less automatic cluster configuration.
|
||||
# It disables migration of replicas to orphaned masters. Masters that become
|
||||
# empty due to losing their last slots to another master will not automatically
|
||||
# replicate from the master that took over their last slots. Instead, they will
|
||||
# remain as empty masters without any slots.
|
||||
# It disables migration of replicas to orphaned primaries. Masters that become
|
||||
# empty due to losing their last slots to another primary will not automatically
|
||||
# replicate from the primary that took over their last slots. Instead, they will
|
||||
# remain as empty primaries without any slots.
|
||||
#
|
||||
# Default is 'yes' (allow automatic migrations).
|
||||
#
|
||||
@ -1695,7 +1695,7 @@ aof-timestamp-enabled no
|
||||
# cluster-require-full-coverage yes
|
||||
|
||||
# This option, when set to yes, prevents replicas from trying to failover its
|
||||
# master during master failures. However the replica can still perform a
|
||||
# primary during primary failures. However the replica can still perform a
|
||||
# manual failover, if forced to do so.
|
||||
#
|
||||
# This is useful in different scenarios, especially in the case of multiple
|
||||
@ -1714,9 +1714,9 @@ aof-timestamp-enabled no
|
||||
#
|
||||
# The second use case is for configurations that don't meet the recommended
|
||||
# three shards but want to enable cluster mode and scale later. A
|
||||
# master outage in a 1 or 2 shard configuration causes a read/write outage to the
|
||||
# primary outage in a 1 or 2 shard configuration causes a read/write outage to the
|
||||
# entire cluster without this option set, with it set there is only a write outage.
|
||||
# Without a quorum of masters, slot ownership will not change automatically.
|
||||
# Without a quorum of primaries, slot ownership will not change automatically.
|
||||
#
|
||||
# cluster-allow-reads-when-down no
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user