Merge 6.2 RC3

This commit is contained in:
Oran Agra 2021-02-01 20:11:42 +02:00 committed by GitHub
commit a666cc1bb7
91 changed files with 6062 additions and 932 deletions

View File

@ -58,3 +58,14 @@ jobs:
run: |
yum -y install gcc make
make REDIS_CFLAGS='-Werror'
build-freebsd:
runs-on: macos-latest
steps:
- uses: actions/checkout@v2
- name: make
uses: vmactions/freebsd-vm@v0.1.0
with:
usesh: true
prepare: pkg install -y gmake
run: gmake

View File

@ -99,6 +99,23 @@ jobs:
./runtest-cluster --tls
./runtest-cluster
test-ubuntu-io-threads:
runs-on: ubuntu-latest
if: github.repository == 'redis/redis'
timeout-minutes: 14400
steps:
- uses: actions/checkout@v2
- name: make
run: |
make
- name: test
run: |
sudo apt-get install tcl8.5 tcl-tls
./runtest --config io-threads 4 --config io-threads-do-reads yes --accurate --verbose --tags network
- name: cluster tests
run: |
./runtest-cluster --config io-threads 4 --config io-threads-do-reads yes
test-valgrind:
runs-on: ubuntu-latest
if: github.repository == 'redis/redis'
@ -186,3 +203,20 @@ jobs:
- name: cluster tests
run: ./runtest-cluster
test-freebsd:
runs-on: macos-latest
if: github.repository == 'redis/redis'
timeout-minutes: 14400
steps:
- uses: actions/checkout@v2
- name: test
uses: vmactions/freebsd-vm@v0.1.0
with:
usesh: true
prepare: pkg install -y gmake lang/tcl85
run: |
gmake
./runtest --accurate --verbose --no-latency
MAKE=gmake ./runtest-moduleapi --verbose
./runtest-sentinel
./runtest-cluster

View File

@ -1,3 +1,53 @@
Redis 6.2 RC3 Released Tue Feb 1 14:00:00 IST 2021
================================================================================
Upgrade urgency LOW: This is the third Release Candidate of Redis 6.2.
Here is a comprehensive list of changes in this release compared to 6.2 RC2,
each one includes the PR number that added it, so you can get more details
at https://github.com/redis/redis/pull/<number>
New commands / args:
* Add HRANDFIELD and ZRANDMEMBER commands (#8297)
* Add FAILOVER command (#8315)
* Add GETEX, GETDEL commands (#8327)
* Add PXAT/EXAT arguments to SET command (#8327)
* Add SYNC arg to FLUSHALL and FLUSHDB, and ASYNC/SYNC arg to SCRIPT FLUSH (#8258)
Sentinel:
* Add hostname support to Sentinel (#8282)
* Prevent file descriptors from leaking into Sentinel scripts (#8242)
* Fix config file line order dependency and config rewrite sequence (#8271)
New configuration options:
* Add set-proc-title config option to disable changes to the process title (#3623)
* Add proc-title-template option to control what's shown in the process title (#8397)
* Add lazyfree-lazy-user-flush config option to control FLUSHALL, FLUSHDB and SCRIPT FLUSH (#8258)
Bug fixes:
* AOF: recover from last write error by turning on/off appendonly config (#8030)
* Exit on fsync error when the AOF fsync policy is 'always' (#8347)
* Avoid assertions (on older kernels) when testing arm64 CoW bug (#8405)
* CONFIG REWRITE should honor umask settings (#8371)
* Fix firstkey,lastkey,step in COMMAND command for some commands (#8367)
Special considerations:
* Fix misleading description of the save configuration directive (#8337)
Improvements:
* A way to get RDB file via replication without excessive replication buffers (#8303)
* Optimize performance of clusterGenNodesDescription for large clusters (#8182)
Info fields and introspection changes:
* SLOWLOG and LATENCY monitor include unblocking time of blocked commands (#7491)
Modules:
* Add modules API for streams (#8288)
* Add event for fork child birth and termination (#8289)
* Add RM_BlockedClientMeasureTime* etc, to track background processing in commandstats (#7491)
* Fix bug in v6.2, wrong value passed to the new unlink callback (#8381)
* Fix bug in v6.2, modules blocked on keys unblock on commands like LPUSH (#8356)
================================================================================
Redis 6.2 RC2 Released Tue Jan 12 16:17:20 IST 2021
================================================================================
@ -255,35 +305,39 @@ and we don't get reports of serious issues for a while.
A special thank you for the amount of work put into this release by:
- Oran Agra
- Yossi Gottlieb
- Itamar Haber
- Guy Benoish
- Filipe Oliveira
- Viktor Söderqvist
- Guy Benoish
- Itamar Haber
- Yang Bodong
- Madelyn Olson
- Wang Yuan
- Felipe Machado
- Yang Bodong
- Wen Hui
- Tatsuya Arisawa
- Jonah H. Harris
- Raghav Muddur
- Jim Brunner
- Yaacov Hazan
- Wen Hui
- Allen Farris
- Chen Yang
- Nitai Caro
- Meir Shpilraien
- maohuazhu
- Valentino Geron
- Qu Chen
- sundb
- George Prekas
- Zhao Zhao
- sundb
- Qu Chen
- George Prekas
- Tyson Andre
- Michael Grunder
- alexronke-channeladvisor
- Andy Pan
- Wu Yunlong
- Wei Kukey
- Yoav Steinberg
- Uri Shachar
- Greg Femec
- Uri Shachar
- Nykolas Laurentino de Lima
- xhe
- zhenwei pi

View File

@ -325,31 +325,52 @@ databases 16
# ASCII art logo in startup logs by setting the following option to yes.
always-show-logo no
################################ SNAPSHOTTING ################################
#
# Save the DB on disk:
#
# save <seconds> <changes>
#
# Will save the DB if both the given number of seconds and the given
# number of write operations against the DB occurred.
#
# In the example below the behavior will be to save:
# after 900 sec (15 min) if at least 1 key changed
# after 300 sec (5 min) if at least 10 keys changed
# after 60 sec if at least 10000 keys changed
#
# Note: you can disable saving completely by commenting out all "save" lines.
#
# It is also possible to remove all the previously configured save
# points by adding a save directive with a single empty string argument
# like in the following example:
#
# save ""
# By default, Redis modifies the process title (as seen in 'top' and 'ps') to
# provide some runtime information. It is possible to disable this and leave
# the process name as executed by setting the following to no.
set-proc-title yes
save 900 1
save 300 10
save 60 10000
# When changing the process title, Redis uses the following template to construct
# the modified title.
#
# Template variables are specified in curly brackets. The following variables are
# supported:
#
# {title} Name of process as executed if parent, or type of child process.
# {listen-addr} Bind address or '*' followed by TCP or TLS port listening on, or
# Unix socket if only that's available.
# {server-mode} Special mode, i.e. "[sentinel]" or "[cluster]".
# {port} TCP port listening on, or 0.
# {tls-port} TLS port listening on, or 0.
# {unixsocket} Unix domain socket listening on, or "".
# {config-file} Name of configuration file used.
#
proc-title-template "{title} {listen-addr} {server-mode}"
################################ SNAPSHOTTING ################################
# Save the DB to disk.
#
# save <seconds> <changes>
#
# Redis will save the DB if both the given number of seconds and the given
# number of write operations against the DB occurred.
#
# Snapshotting can be completely disabled with a single empty string argument
# as in following example:
#
# save ""
#
# Unless specified otherwise, by default Redis will save the DB:
# * After 3600 seconds (an hour) if at least 1 key changed
# * After 300 seconds (5 minutes) if at least 100 keys changed
# * After 60 seconds if at least 10000 keys changed
#
# You can set these explicitly by uncommenting the three following lines.
#
# save 3600 1
# save 300 100
# save 60 10000
# By default Redis will stop accepting writes if RDB snapshots are enabled
# (at least one save point) and the latest background save failed.
@ -1089,6 +1110,13 @@ replica-lazy-flush no
lazyfree-lazy-user-del no
# FLUSHDB, FLUSHALL, and SCRIPT FLUSH support both asynchronous and synchronous
# deletion, which can be controlled by passing the [SYNC|ASYNC] flags into the
# commands. When neither flag is passed, this directive will be used to determine
# if the data should be deleted asynchronously.
lazyfree-lazy-user-flush no
################################ THREADED I/O #################################
# Redis is mostly single threaded, however there are certain threaded

View File

@ -23,6 +23,7 @@ $TCLSH tests/test_helper.tcl \
--single unit/moduleapi/hooks \
--single unit/moduleapi/misc \
--single unit/moduleapi/blockonkeys \
--single unit/moduleapi/blockonbackground \
--single unit/moduleapi/scan \
--single unit/moduleapi/datatype \
--single unit/moduleapi/auth \
@ -31,4 +32,5 @@ $TCLSH tests/test_helper.tcl \
--single unit/moduleapi/getkeys \
--single unit/moduleapi/test_lazyfree \
--single unit/moduleapi/defrag \
--single unit/moduleapi/stream \
"${@}"

View File

@ -321,3 +321,21 @@ sentinel deny-scripts-reconfig yes
# is possible to just rename a command to itself:
#
# SENTINEL rename-command mymaster CONFIG CONFIG
# HOSTNAMES SUPPORT
#
# Normally Sentinel uses only IP addresses and requires SENTINEL MONITOR
# to specify an IP address. Also, it requires the Redis replica-announce-ip
# keyword to specify only IP addresses.
#
# You may enable hostnames support by enabling resolve-hostnames. Note
# that you must make sure your DNS is configured properly and that DNS
# resolution does not introduce very long delays.
#
SENTINEL resolve-hostnames no
# When resolve-hostnames is enabled, Sentinel still uses IP addresses
# when exposing instances to users, configuration files, etc. If you want
# to retain the hostnames when announced, enable announce-hostnames below.
#
SENTINEL announce-hostnames no

View File

@ -1024,8 +1024,8 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) {
/* Return a description of the error that occurred in ACLSetUser() according to
* the errno value set by the function on error. */
char *ACLSetUserStringError(void) {
char *errmsg = "Wrong format";
const char *ACLSetUserStringError(void) {
const char *errmsg = "Wrong format";
if (errno == ENOENT)
errmsg = "Unknown command or category name in ACL";
else if (errno == EINVAL)
@ -1454,7 +1454,7 @@ int ACLLoadConfiguredUsers(void) {
/* Load every rule defined for this user. */
for (int j = 1; aclrules[j]; j++) {
if (ACLSetUser(u,aclrules[j],sdslen(aclrules[j])) != C_OK) {
char *errmsg = ACLSetUserStringError();
const char *errmsg = ACLSetUserStringError();
serverLog(LL_WARNING,"Error loading ACL rule '%s' for "
"the user named '%s': %s",
aclrules[j],aclrules[0],errmsg);
@ -1587,7 +1587,7 @@ sds ACLLoadFromFile(const char *filename) {
for (j = 2; j < argc; j++) {
argv[j] = sdstrim(argv[j],"\t\r\n");
if (ACLSetUser(fakeuser,argv[j],sdslen(argv[j])) != C_OK) {
char *errmsg = ACLSetUserStringError();
const char *errmsg = ACLSetUserStringError();
errors = sdscatprintf(errors,
"%s:%d: %s. ",
server.acl_filename, linenum, errmsg);
@ -1908,7 +1908,7 @@ void aclCommand(client *c) {
for (int j = 3; j < c->argc; j++) {
if (ACLSetUser(tempu,c->argv[j]->ptr,sdslen(c->argv[j]->ptr)) != C_OK) {
char *errmsg = ACLSetUserStringError();
const char *errmsg = ACLSetUserStringError();
addReplyErrorFormat(c,
"Error in ACL SETUSER modifier '%s': %s",
(char*)c->argv[j]->ptr, errmsg);

View File

@ -31,6 +31,7 @@
*/
#include "ae.h"
#include "anet.h"
#include <stdio.h>
#include <sys/time.h>

View File

@ -51,6 +51,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) {
zfree(state);
return -1;
}
anetCloexec(state->epfd);
eventLoop->apidata = state;
return 0;
}

View File

@ -82,6 +82,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) {
zfree(state);
return -1;
}
anetCloexec(state->portfd);
state->npending = 0;

View File

@ -53,6 +53,7 @@ static int aeApiCreate(aeEventLoop *eventLoop) {
zfree(state);
return -1;
}
anetCloexec(state->kqfd);
eventLoop->apidata = state;
return 0;
}

View File

@ -69,6 +69,11 @@ int anetSetBlock(char *err, int fd, int non_block) {
return ANET_ERR;
}
/* Check if this flag has been set or unset, if so,
* then there is no need to call fcntl to set/unset it again. */
if (!!(flags & O_NONBLOCK) == !!non_block)
return ANET_OK;
if (non_block)
flags |= O_NONBLOCK;
else
@ -89,6 +94,29 @@ int anetBlock(char *err, int fd) {
return anetSetBlock(err,fd,0);
}
/* Enable the FD_CLOEXEC on the given fd to avoid fd leaks.
* This function should be invoked for fd's on specific places
* where fork + execve system calls are called. */
int anetCloexec(int fd) {
int r;
int flags;
do {
r = fcntl(fd, F_GETFD);
} while (r == -1 && errno == EINTR);
if (r == -1 || (r & FD_CLOEXEC))
return r;
flags = r | FD_CLOEXEC;
do {
r = fcntl(fd, F_SETFD, flags);
} while (r == -1 && errno == EINTR);
return r;
}
/* Set TCP keep alive option to detect dead peers. The interval option
* is only used for Linux as we are using Linux-specific APIs to set
* the probe send time, interval, and count. */
@ -207,14 +235,13 @@ int anetRecvTimeout(char *err, int fd, long long ms) {
return ANET_OK;
}
/* anetGenericResolve() is called by anetResolve() and anetResolveIP() to
* do the actual work. It resolves the hostname "host" and set the string
* representation of the IP address into the buffer pointed by "ipbuf".
/* Resolve the hostname "host" and set the string representation of the
* IP address into the buffer pointed by "ipbuf".
*
* If flags is set to ANET_IP_ONLY the function only resolves hostnames
* that are actually already IPv4 or IPv6 addresses. This turns the function
* into a validating / normalizing function. */
int anetGenericResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len,
int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len,
int flags)
{
struct addrinfo hints, *info;
@ -241,14 +268,6 @@ int anetGenericResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len,
return ANET_OK;
}
int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len) {
return anetGenericResolve(err,host,ipbuf,ipbuf_len,ANET_NONE);
}
int anetResolveIP(char *err, char *host, char *ipbuf, size_t ipbuf_len) {
return anetGenericResolve(err,host,ipbuf,ipbuf_len,ANET_IP_ONLY);
}
static int anetSetReuseAddr(char *err, int fd) {
int yes = 1;
/* Make sure connection-intensive things like the redis benchmark

View File

@ -60,8 +60,7 @@ int anetTcpNonBlockBestEffortBindConnect(char *err, const char *addr, int port,
int anetUnixConnect(char *err, const char *path);
int anetUnixNonBlockConnect(char *err, const char *path);
int anetRead(int fd, char *buf, int count);
int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len);
int anetResolveIP(char *err, char *host, char *ipbuf, size_t ipbuf_len);
int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len, int flags);
int anetTcpServer(char *err, int port, char *bindaddr, int backlog);
int anetTcp6Server(char *err, int port, char *bindaddr, int backlog);
int anetUnixServer(char *err, char *path, mode_t perm, int backlog);
@ -70,6 +69,7 @@ int anetUnixAccept(char *err, int serversock);
int anetWrite(int fd, char *buf, int count);
int anetNonBlock(char *err, int fd);
int anetBlock(char *err, int fd);
int anetCloexec(int fd);
int anetEnableTcpNoDelay(char *err, int fd);
int anetDisableTcpNoDelay(char *err, int fd);
int anetTcpKeepAlive(char *err, int fd);

View File

@ -235,6 +235,8 @@ void stopAppendOnly(void) {
serverAssert(server.aof_state != AOF_OFF);
flushAppendOnlyFile(1);
redis_fsync(server.aof_fd);
server.aof_fsync_offset = server.aof_current_size;
server.aof_last_fsync = server.unixtime;
close(server.aof_fd);
server.aof_fd = -1;
@ -242,6 +244,8 @@ void stopAppendOnly(void) {
server.aof_state = AOF_OFF;
server.aof_rewrite_scheduled = 0;
killAppendOnlyChild();
sdsfree(server.aof_buf);
server.aof_buf = sdsempty();
}
/* Called when the user switches from "appendonly no" to "appendonly yes"
@ -285,6 +289,12 @@ int startAppendOnly(void) {
server.aof_state = AOF_WAIT_REWRITE;
server.aof_last_fsync = server.unixtime;
server.aof_fd = newfd;
/* If AOF was in error state, we just ignore it and log the event. */
if (server.aof_last_write_status == C_ERR) {
serverLog(LL_WARNING,"AOF reopen, just ignore the last error.");
server.aof_last_write_status = C_OK;
}
return C_OK;
}
@ -451,10 +461,11 @@ void flushAppendOnlyFile(int force) {
/* Handle the AOF write error. */
if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
/* We can't recover when the fsync policy is ALWAYS since the
* reply for the client is already in the output buffers, and we
* have the contract with the user that on acknowledged write data
* is synced on disk. */
/* We can't recover when the fsync policy is ALWAYS since the reply
* for the client is already in the output buffers (both writes and
* reads), and the changes to the db can't be rolled back. Since we
* have a contract with the user that on acknowledged or observed
* writes are is synced on disk, we must exit. */
serverLog(LL_WARNING,"Can't recover from AOF write error when the AOF fsync policy is 'always'. Exiting...");
exit(1);
} else {
@ -502,7 +513,14 @@ try_fsync:
/* redis_fsync is defined as fdatasync() for Linux in order to avoid
* flushing metadata. */
latencyStartMonitor(latency);
redis_fsync(server.aof_fd); /* Let's try to get this data on the disk */
/* Let's try to get this data on the disk. To guarantee data safe when
* the AOF fsync policy is 'always', we should exit if failed to fsync
* AOF (see comment next to the exit(1) after write error above). */
if (redis_fsync(server.aof_fd) == -1) {
serverLog(LL_WARNING,"Can't persist AOF for fsync error when the "
"AOF fsync policy is 'always': %s. Exiting...", strerror(errno));
exit(1);
}
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-fsync-always",latency);
server.aof_fsync_offset = server.aof_current_size;
@ -581,8 +599,6 @@ sds catAppendOnlyExpireAtCommand(sds buf, struct redisCommand *cmd, robj *key, r
void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
sds buf = sdsempty();
robj *tmpargv[3];
/* The DB this command was targeting is not the same as the last command
* we appended. To issue a SELECT command is needed. */
if (dictid != server.aof_selected_db) {
@ -598,32 +614,31 @@ void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int a
cmd->proc == expireatCommand) {
/* Translate EXPIRE/PEXPIRE/EXPIREAT into PEXPIREAT */
buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);
} else if (cmd->proc == setexCommand || cmd->proc == psetexCommand) {
/* Translate SETEX/PSETEX to SET and PEXPIREAT */
tmpargv[0] = createStringObject("SET",3);
tmpargv[1] = argv[1];
tmpargv[2] = argv[3];
buf = catAppendOnlyGenericCommand(buf,3,tmpargv);
decrRefCount(tmpargv[0]);
buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);
} else if (cmd->proc == setCommand && argc > 3) {
int i;
robj *exarg = NULL, *pxarg = NULL;
for (i = 3; i < argc; i ++) {
if (!strcasecmp(argv[i]->ptr, "ex")) exarg = argv[i+1];
if (!strcasecmp(argv[i]->ptr, "px")) pxarg = argv[i+1];
robj *pxarg = NULL;
/* When SET is used with EX/PX argument setGenericCommand propagates them with PX millisecond argument.
* So since the command arguments are re-written there, we can rely here on the index of PX being 3. */
if (!strcasecmp(argv[3]->ptr, "px")) {
pxarg = argv[4];
}
serverAssert(!(exarg && pxarg));
/* For AOF we convert SET key value relative time in milliseconds to SET key value absolute time in
* millisecond. Whenever the condition is true it implies that original SET has been transformed
* to SET PX with millisecond time argument so we do not need to worry about unit here.*/
if (pxarg) {
robj *millisecond = getDecodedObject(pxarg);
long long when = strtoll(millisecond->ptr,NULL,10);
when += mstime();
if (exarg || pxarg) {
/* Translate SET [EX seconds][PX milliseconds] to SET and PEXPIREAT */
buf = catAppendOnlyGenericCommand(buf,3,argv);
if (exarg)
buf = catAppendOnlyExpireAtCommand(buf,server.expireCommand,argv[1],
exarg);
if (pxarg)
buf = catAppendOnlyExpireAtCommand(buf,server.pexpireCommand,argv[1],
pxarg);
decrRefCount(millisecond);
robj *newargs[5];
newargs[0] = argv[0];
newargs[1] = argv[1];
newargs[2] = argv[2];
newargs[3] = shared.pxat;
newargs[4] = createStringObjectFromLongLong(when);
buf = catAppendOnlyGenericCommand(buf,5,newargs);
decrRefCount(newargs[4]);
} else {
buf = catAppendOnlyGenericCommand(buf,argc,argv);
}
@ -1852,6 +1867,20 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
}
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-rewrite-diff-write",latency);
if (server.aof_fsync == AOF_FSYNC_EVERYSEC) {
aof_background_fsync(newfd);
} else if (server.aof_fsync == AOF_FSYNC_ALWAYS) {
latencyStartMonitor(latency);
if (redis_fsync(newfd) == -1) {
serverLog(LL_WARNING,
"Error trying to fsync the parent diff to the rewritten AOF: %s", strerror(errno));
close(newfd);
goto cleanup;
}
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-rewrite-done-fsync",latency);
}
serverLog(LL_NOTICE,
"Residual parent diff successfully flushed to the rewritten AOF (%.2f MB)", (double) aofRewriteBufferSize() / (1024*1024));
@ -1919,14 +1948,11 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
/* AOF enabled, replace the old fd with the new one. */
oldfd = server.aof_fd;
server.aof_fd = newfd;
if (server.aof_fsync == AOF_FSYNC_ALWAYS)
redis_fsync(newfd);
else if (server.aof_fsync == AOF_FSYNC_EVERYSEC)
aof_background_fsync(newfd);
server.aof_selected_db = -1; /* Make sure SELECT is re-issued */
aofUpdateCurrentSize();
server.aof_rewrite_base_size = server.aof_current_size;
server.aof_fsync_offset = server.aof_current_size;
server.aof_last_fsync = server.unixtime;
/* Clear regular AOF buffer since its contents was just written to
* the new AOF from the background rewrite buffer. */

View File

@ -61,6 +61,9 @@
*/
#include "server.h"
#include "slowlog.h"
#include "latency.h"
#include "monotonic.h"
int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb *db, robj *value, int wherefrom, int whereto);
int getListPositionFromObjectOrReply(client *c, robj *arg, int *position);
@ -97,6 +100,20 @@ void blockClient(client *c, int btype) {
}
}
/* This function is called after a client has finished a blocking operation
* in order to update the total command duration, log the command into
* the Slow log if needed, and log the reply duration event if needed. */
void updateStatsOnUnblock(client *c, long blocked_us, long reply_us){
const ustime_t total_cmd_duration = c->duration + blocked_us + reply_us;
c->lastcmd->microseconds += total_cmd_duration;
/* Log the command into the Slow log if needed. */
if (!(c->lastcmd->flags & CMD_SKIP_SLOWLOG)) {
slowlogPushEntryIfNeeded(c,c->argv,c->argc,total_cmd_duration);
/* Log the reply duration event. */
latencyAddSampleIfNeeded("command-unblocking",reply_us/1000);
}
}
/* This function is called in the beforeSleep() function of the event loop
* in order to process the pending input buffer of clients that were
* unblocked after a blocking operation. */
@ -264,6 +281,8 @@ void serveClientsBlockedOnListKey(robj *o, readyList *rl) {
if (dstkey) incrRefCount(dstkey);
unblockClient(receiver);
monotime replyTimer;
elapsedStart(&replyTimer);
if (serveClientBlockedOnList(receiver,
rl->key,dstkey,rl->db,value,
wherefrom, whereto) == C_ERR)
@ -272,6 +291,7 @@ void serveClientsBlockedOnListKey(robj *o, readyList *rl) {
* to also undo the POP operation. */
listTypePush(o,value,wherefrom);
}
updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
if (dstkey) decrRefCount(dstkey);
decrRefCount(value);
@ -316,7 +336,10 @@ void serveClientsBlockedOnSortedSetKey(robj *o, readyList *rl) {
receiver->lastcmd->proc == bzpopminCommand)
? ZSET_MIN : ZSET_MAX;
unblockClient(receiver);
monotime replyTimer;
elapsedStart(&replyTimer);
genericZpopCommand(receiver,&rl->key,1,where,1,NULL);
updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
zcard--;
/* Replicate the command. */
@ -406,6 +429,8 @@ void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) {
}
}
monotime replyTimer;
elapsedStart(&replyTimer);
/* Emit the two elements sub-array consisting of
* the name of the stream and the data we
* extracted from it. Wrapped in a single-item
@ -425,6 +450,7 @@ void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) {
streamReplyWithRange(receiver,s,&start,NULL,
receiver->bpop.xread_count,
0, group, consumer, noack, &pi);
updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
/* Note that after we unblock the client, 'gt'
* and other receiver->bpop stuff are no longer
@ -471,7 +497,10 @@ void serveClientsBlockedOnKeyByModule(readyList *rl) {
* different modules with different triggers to consider if a key
* is ready or not. This means we can't exit the loop but need
* to continue after the first failure. */
monotime replyTimer;
elapsedStart(&replyTimer);
if (!moduleTryServeClientBlockedOnKey(receiver, rl->key)) continue;
updateStatsOnUnblock(receiver, 0, elapsedUs(replyTimer));
moduleUnblockClient(receiver);
}
@ -684,10 +713,20 @@ static int getBlockedTypeByType(int type) {
void signalKeyAsReady(redisDb *db, robj *key, int type) {
readyList *rl;
/* If no clients are blocked on this type, just return */
/* Quick returns. */
int btype = getBlockedTypeByType(type);
if (btype == BLOCKED_NONE || !server.blocked_clients_by_type[btype])
if (btype == BLOCKED_NONE) {
/* The type can never block. */
return;
}
if (!server.blocked_clients_by_type[btype] &&
!server.blocked_clients_by_type[BLOCKED_MODULE]) {
/* No clients block on this type. Note: Blocked modules are represented
* by BLOCKED_MODULE, even if the intention is to wake up by normal
* types (list, zset, stream), so we need to check that there are no
* blocked modules before we do a quick return here. */
return;
}
/* No clients blocking for this key? No need to queue it. */
if (dictFind(db->blocking_keys,key) == NULL) return;

View File

@ -398,7 +398,7 @@ int clusterLockConfig(char *filename) {
/* To lock it, we need to open the file in a way it is created if
* it does not exist, otherwise there is a race condition with other
* processes. */
int fd = open(filename,O_WRONLY|O_CREAT,0644);
int fd = open(filename,O_WRONLY|O_CREAT|O_CLOEXEC,0644);
if (fd == -1) {
serverLog(LL_WARNING,
"Can't open %s in order to acquire a lock: %s",
@ -509,8 +509,7 @@ void clusterInit(void) {
serverLog(LL_WARNING, "Redis port number too high. "
"Cluster communication port is 10,000 port "
"numbers higher than your Redis port. "
"Your Redis port number must be "
"lower than 55535.");
"Your Redis port number must be 55535 or less.");
exit(1);
}
if (listenToPort(port+CLUSTER_PORT_INCR,
@ -779,6 +778,7 @@ clusterNode *createClusterNode(char *nodename, int flags) {
node->configEpoch = 0;
node->flags = flags;
memset(node->slots,0,sizeof(node->slots));
node->slots_info = NULL;
node->numslots = 0;
node->numslaves = 0;
node->slaves = NULL;
@ -4144,8 +4144,8 @@ sds clusterGenNodeDescription(clusterNode *node) {
sds ci;
/* Node coordinates */
ci = sdscatprintf(sdsempty(),"%.40s %s:%d@%d ",
node->name,
ci = sdscatlen(sdsempty(),node->name,CLUSTER_NAMELEN);
ci = sdscatfmt(ci," %s:%i@%i ",
node->ip,
node->port,
node->cport);
@ -4154,40 +4154,46 @@ sds clusterGenNodeDescription(clusterNode *node) {
ci = representClusterNodeFlags(ci, node->flags);
/* Slave of... or just "-" */
ci = sdscatlen(ci," ",1);
if (node->slaveof)
ci = sdscatprintf(ci," %.40s ",node->slaveof->name);
ci = sdscatlen(ci,node->slaveof->name,CLUSTER_NAMELEN);
else
ci = sdscatlen(ci," - ",3);
ci = sdscatlen(ci,"-",1);
unsigned long long nodeEpoch = node->configEpoch;
if (nodeIsSlave(node) && node->slaveof) {
nodeEpoch = node->slaveof->configEpoch;
}
/* Latency from the POV of this node, config epoch, link status */
ci = sdscatprintf(ci,"%lld %lld %llu %s",
ci = sdscatfmt(ci," %I %I %U %s",
(long long) node->ping_sent,
(long long) node->pong_received,
nodeEpoch,
(node->link || node->flags & CLUSTER_NODE_MYSELF) ?
"connected" : "disconnected");
/* Slots served by this instance */
start = -1;
for (j = 0; j < CLUSTER_SLOTS; j++) {
int bit;
/* Slots served by this instance. If we already have slots info,
* append it diretly, otherwise, generate slots only if it has. */
if (node->slots_info) {
ci = sdscatsds(ci, node->slots_info);
} else if (node->numslots > 0) {
start = -1;
for (j = 0; j < CLUSTER_SLOTS; j++) {
int bit;
if ((bit = clusterNodeGetSlotBit(node,j)) != 0) {
if (start == -1) start = j;
}
if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
if (bit && j == CLUSTER_SLOTS-1) j++;
if (start == j-1) {
ci = sdscatprintf(ci," %d",start);
} else {
ci = sdscatprintf(ci," %d-%d",start,j-1);
if ((bit = clusterNodeGetSlotBit(node,j)) != 0) {
if (start == -1) start = j;
}
if (start != -1 && (!bit || j == CLUSTER_SLOTS-1)) {
if (bit && j == CLUSTER_SLOTS-1) j++;
if (start == j-1) {
ci = sdscatfmt(ci," %i",start);
} else {
ci = sdscatfmt(ci," %i-%i",start,j-1);
}
start = -1;
}
start = -1;
}
}
@ -4208,6 +4214,41 @@ sds clusterGenNodeDescription(clusterNode *node) {
return ci;
}
/* Generate the slot topology for all nodes and store the string representation
* in the slots_info struct on the node. This is used to improve the efficiency
* of clusterGenNodesDescription() because it removes looping of the slot space
* for generating the slot info for each node individually. */
void clusterGenNodesSlotsInfo(int filter) {
clusterNode *n = NULL;
int start = -1;
for (int i = 0; i <= CLUSTER_SLOTS; i++) {
/* Find start node and slot id. */
if (n == NULL) {
if (i == CLUSTER_SLOTS) break;
n = server.cluster->slots[i];
start = i;
continue;
}
/* Generate slots info when occur different node with start
* or end of slot. */
if (i == CLUSTER_SLOTS || n != server.cluster->slots[i]) {
if (!(n->flags & filter)) {
if (n->slots_info == NULL) n->slots_info = sdsempty();
if (start == i-1) {
n->slots_info = sdscatfmt(n->slots_info," %i",start);
} else {
n->slots_info = sdscatfmt(n->slots_info," %i-%i",start,i-1);
}
}
if (i == CLUSTER_SLOTS) break;
n = server.cluster->slots[i];
start = i;
}
}
}
/* Generate a csv-alike representation of the nodes we are aware of,
* including the "myself" node, and return an SDS string containing the
* representation (it is up to the caller to free it).
@ -4225,6 +4266,9 @@ sds clusterGenNodesDescription(int filter) {
dictIterator *di;
dictEntry *de;
/* Generate all nodes slots info firstly. */
clusterGenNodesSlotsInfo(filter);
di = dictGetSafeIterator(server.cluster->nodes);
while((de = dictNext(di)) != NULL) {
clusterNode *node = dictGetVal(de);
@ -4234,6 +4278,12 @@ sds clusterGenNodesDescription(int filter) {
ci = sdscatsds(ci,ni);
sdsfree(ni);
ci = sdscatlen(ci,"\n",1);
/* Release slots info. */
if (node->slots_info) {
sdsfree(node->slots_info);
node->slots_info = NULL;
}
}
dictReleaseIterator(di);
return ci;

View File

@ -118,6 +118,7 @@ typedef struct clusterNode {
int flags; /* CLUSTER_NODE_... */
uint64_t configEpoch; /* Last configEpoch observed for this node */
unsigned char slots[CLUSTER_SLOTS/8]; /* slots handled by this node */
sds slots_info; /* Slots info represented by string. */
int numslots; /* Number of slots handled by this node */
int numslaves; /* Number of slave nodes, if this is a master */
struct clusterNode **slaves; /* pointers to slave nodes */

View File

@ -153,15 +153,15 @@ int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT] = { 0, 200, 800 };
typedef struct boolConfigData {
int *config; /* The pointer to the server config this value is stored in */
const int default_value; /* The default value of the config on rewrite */
int (*is_valid_fn)(int val, char **err); /* Optional function to check validity of new value (generic doc above) */
int (*update_fn)(int val, int prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
int (*is_valid_fn)(int val, const char **err); /* Optional function to check validity of new value (generic doc above) */
int (*update_fn)(int val, int prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */
} boolConfigData;
typedef struct stringConfigData {
char **config; /* Pointer to the server config this value is stored in. */
const char *default_value; /* Default value of the config on rewrite. */
int (*is_valid_fn)(char* val, char **err); /* Optional function to check validity of new value (generic doc above) */
int (*update_fn)(char* val, char* prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
int (*is_valid_fn)(char* val, const char **err); /* Optional function to check validity of new value (generic doc above) */
int (*update_fn)(char* val, char* prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */
int convert_empty_to_null; /* Boolean indicating if empty strings should
be stored as a NULL value. */
} stringConfigData;
@ -169,8 +169,8 @@ typedef struct stringConfigData {
typedef struct sdsConfigData {
sds *config; /* Pointer to the server config this value is stored in. */
const char *default_value; /* Default value of the config on rewrite. */
int (*is_valid_fn)(sds val, char **err); /* Optional function to check validity of new value (generic doc above) */
int (*update_fn)(sds val, sds prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
int (*is_valid_fn)(sds val, const char **err); /* Optional function to check validity of new value (generic doc above) */
int (*update_fn)(sds val, sds prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */
int convert_empty_to_null; /* Boolean indicating if empty SDS strings should
be stored as a NULL value. */
} sdsConfigData;
@ -179,8 +179,8 @@ typedef struct enumConfigData {
int *config; /* The pointer to the server config this value is stored in */
configEnum *enum_value; /* The underlying enum type this data represents */
const int default_value; /* The default value of the config on rewrite */
int (*is_valid_fn)(int val, char **err); /* Optional function to check validity of new value (generic doc above) */
int (*update_fn)(int val, int prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
int (*is_valid_fn)(int val, const char **err); /* Optional function to check validity of new value (generic doc above) */
int (*update_fn)(int val, int prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */
} enumConfigData;
typedef enum numericType {
@ -214,8 +214,8 @@ typedef struct numericConfigData {
long long lower_bound; /* The lower bound of this numeric value */
long long upper_bound; /* The upper bound of this numeric value */
const long long default_value; /* The default value of the config on rewrite */
int (*is_valid_fn)(long long val, char **err); /* Optional function to check validity of new value (generic doc above) */
int (*update_fn)(long long val, long long prev, char **err); /* Optional function to apply new value at runtime (generic doc above) */
int (*is_valid_fn)(long long val, const char **err); /* Optional function to check validity of new value (generic doc above) */
int (*update_fn)(long long val, long long prev, const char **err); /* Optional function to apply new value at runtime (generic doc above) */
} numericConfigData;
typedef union typeData {
@ -230,10 +230,10 @@ typedef struct typeInterface {
/* Called on server start, to init the server with default value */
void (*init)(typeData data);
/* Called on server start, should return 1 on success, 0 on error and should set err */
int (*load)(typeData data, sds *argc, int argv, char **err);
int (*load)(typeData data, sds *argc, int argv, const char **err);
/* Called on server startup and CONFIG SET, returns 1 on success, 0 on error
* and can set a verbose err string, update is true when called from CONFIG SET */
int (*set)(typeData data, sds value, int update, char **err);
int (*set)(typeData data, sds value, int update, const char **err);
/* Called on CONFIG GET, required to add output to the client */
void (*get)(client *c, typeData data);
/* Called on CONFIG REWRITE, required to rewrite the config state */
@ -325,7 +325,7 @@ void queueLoadModule(sds path, sds *argv, int argc) {
* server.oom_score_adj_values if valid.
*/
static int updateOOMScoreAdjValues(sds *args, char **err, int apply) {
static int updateOOMScoreAdjValues(sds *args, const char **err, int apply) {
int i;
int values[CONFIG_OOM_COUNT];
@ -385,7 +385,7 @@ void initConfigValues() {
}
void loadServerConfigFromString(char *config) {
char *err = NULL;
const char *err = NULL;
int linenum = 0, totlines, i;
int slaveof_linenum = 0;
sds *lines;
@ -608,7 +608,7 @@ void loadServerConfigFromString(char *config) {
int argc_err;
if (ACLAppendUserForLoading(argv,argc,&argc_err) == C_ERR) {
char buf[1024];
char *errmsg = ACLSetUserStringError();
const char *errmsg = ACLSetUserStringError();
snprintf(buf,sizeof(buf),"Error in user declaration '%s': %s",
argv[argc_err],errmsg);
err = buf;
@ -624,8 +624,7 @@ void loadServerConfigFromString(char *config) {
err = "sentinel directive while not in sentinel mode";
goto loaderr;
}
err = sentinelHandleConfiguration(argv+1,argc-1);
if (err) goto loaderr;
queueSentinelConfig(argv+1,argc-1,linenum,lines[i]);
}
} else {
err = "Bad directive or wrong number of arguments"; goto loaderr;
@ -730,7 +729,7 @@ void configSetCommand(client *c) {
robj *o;
long long ll;
int err;
char *errstr = NULL;
const char *errstr = NULL;
serverAssertWithInfo(c,c->argv[2],sdsEncodedObject(c->argv[2]));
serverAssertWithInfo(c,c->argv[3],sdsEncodedObject(c->argv[3]));
o = c->argv[3];
@ -1221,7 +1220,16 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) {
sdsfree(argv[0]);
argv[0] = alt;
}
rewriteConfigAddLineNumberToOption(state,argv[0],linenum);
/* If this is sentinel config, we use sentinel "sentinel <config>" as option
to avoid messing up the sequence. */
if (server.sentinel_mode && argc > 1 && !strcasecmp(argv[0],"sentinel")) {
sds sentinelOption = sdsempty();
sentinelOption = sdscatfmt(sentinelOption,"%S %S",argv[0],argv[1]);
rewriteConfigAddLineNumberToOption(state,sentinelOption,linenum);
sdsfree(sentinelOption);
} else {
rewriteConfigAddLineNumberToOption(state,argv[0],linenum);
}
sdsfreesplitres(argv,argc);
}
fclose(fp);
@ -1683,7 +1691,7 @@ int rewriteConfigOverwriteFile(char *configfile, sds content) {
if (fsync(fd))
serverLog(LL_WARNING, "Could not sync tmp config file to disk (%s)", strerror(errno));
else if (fchmod(fd, 0644) == -1)
else if (fchmod(fd, 0644 & ~server.umask) == -1)
serverLog(LL_WARNING, "Could not chmod config file (%s)", strerror(errno));
else if (rename(tmp_conffile, configfile) == -1)
serverLog(LL_WARNING, "Could not rename tmp config file (%s)", strerror(errno));
@ -1795,7 +1803,7 @@ static void boolConfigInit(typeData data) {
*data.yesno.config = data.yesno.default_value;
}
static int boolConfigSet(typeData data, sds value, int update, char **err) {
static int boolConfigSet(typeData data, sds value, int update, const char **err) {
int yn = yesnotoi(value);
if (yn == -1) {
*err = "argument must be 'yes' or 'no'";
@ -1836,7 +1844,7 @@ static void stringConfigInit(typeData data) {
*data.string.config = (data.string.convert_empty_to_null && !data.string.default_value) ? NULL : zstrdup(data.string.default_value);
}
static int stringConfigSet(typeData data, sds value, int update, char **err) {
static int stringConfigSet(typeData data, sds value, int update, const char **err) {
if (data.string.is_valid_fn && !data.string.is_valid_fn(value, err))
return 0;
char *prev = *data.string.config;
@ -1863,7 +1871,7 @@ static void sdsConfigInit(typeData data) {
*data.sds.config = (data.sds.convert_empty_to_null && !data.sds.default_value) ? NULL: sdsnew(data.sds.default_value);
}
static int sdsConfigSet(typeData data, sds value, int update, char **err) {
static int sdsConfigSet(typeData data, sds value, int update, const char **err) {
if (data.sds.is_valid_fn && !data.sds.is_valid_fn(value, err))
return 0;
sds prev = *data.sds.config;
@ -1922,7 +1930,7 @@ static void enumConfigInit(typeData data) {
*data.enumd.config = data.enumd.default_value;
}
static int enumConfigSet(typeData data, sds value, int update, char **err) {
static int enumConfigSet(typeData data, sds value, int update, const char **err) {
int enumval = configEnumGetValue(data.enumd.enum_value, value);
if (enumval == INT_MIN) {
sds enumerr = sdsnew("argument must be one of the following: ");
@ -2028,7 +2036,7 @@ static void numericConfigInit(typeData data) {
SET_NUMERIC_TYPE(data.numeric.default_value)
}
static int numericBoundaryCheck(typeData data, long long ll, char **err) {
static int numericBoundaryCheck(typeData data, long long ll, const char **err) {
if (data.numeric.numeric_type == NUMERIC_TYPE_ULONG_LONG ||
data.numeric.numeric_type == NUMERIC_TYPE_UINT ||
data.numeric.numeric_type == NUMERIC_TYPE_SIZE_T) {
@ -2058,7 +2066,7 @@ static int numericBoundaryCheck(typeData data, long long ll, char **err) {
return 1;
}
static int numericConfigSet(typeData data, sds value, int update, char **err) {
static int numericConfigSet(typeData data, sds value, int update, const char **err) {
long long ll, prev = 0;
if (data.numeric.is_memory) {
int memerr;
@ -2196,7 +2204,7 @@ static void numericConfigRewrite(typeData data, const char *name, struct rewrite
} \
}
static int isValidActiveDefrag(int val, char **err) {
static int isValidActiveDefrag(int val, const char **err) {
#ifndef HAVE_DEFRAG
if (val) {
*err = "Active defragmentation cannot be enabled: it "
@ -2212,7 +2220,7 @@ static int isValidActiveDefrag(int val, char **err) {
return 1;
}
static int isValidDBfilename(char *val, char **err) {
static int isValidDBfilename(char *val, const char **err) {
if (!pathIsBaseName(val)) {
*err = "dbfilename can't be a path, just a filename";
return 0;
@ -2220,7 +2228,7 @@ static int isValidDBfilename(char *val, char **err) {
return 1;
}
static int isValidAOFfilename(char *val, char **err) {
static int isValidAOFfilename(char *val, const char **err) {
if (!pathIsBaseName(val)) {
*err = "appendfilename can't be a path, just a filename";
return 0;
@ -2228,7 +2236,26 @@ static int isValidAOFfilename(char *val, char **err) {
return 1;
}
static int updateHZ(long long val, long long prev, char **err) {
/* Validate specified string is a valid proc-title-template */
static int isValidProcTitleTemplate(char *val, const char **err) {
if (!validateProcTitleTemplate(val)) {
*err = "template format is invalid or contains unknown variables";
return 0;
}
return 1;
}
static int updateProcTitleTemplate(char *val, char *prev, const char **err) {
UNUSED(val);
UNUSED(prev);
if (redisSetProcTitle(NULL) == C_ERR) {
*err = "failed to set process title";
return 0;
}
return 1;
}
static int updateHZ(long long val, long long prev, const char **err) {
UNUSED(prev);
UNUSED(err);
/* Hz is more a hint from the user, so we accept values out of range
@ -2240,14 +2267,14 @@ static int updateHZ(long long val, long long prev, char **err) {
return 1;
}
static int updateJemallocBgThread(int val, int prev, char **err) {
static int updateJemallocBgThread(int val, int prev, const char **err) {
UNUSED(prev);
UNUSED(err);
set_jemalloc_bg_thread(val);
return 1;
}
static int updateReplBacklogSize(long long val, long long prev, char **err) {
static int updateReplBacklogSize(long long val, long long prev, const char **err) {
/* resizeReplicationBacklog sets server.repl_backlog_size, and relies on
* being able to tell when the size changes, so restore prev before calling it. */
UNUSED(err);
@ -2256,7 +2283,7 @@ static int updateReplBacklogSize(long long val, long long prev, char **err) {
return 1;
}
static int updateMaxmemory(long long val, long long prev, char **err) {
static int updateMaxmemory(long long val, long long prev, const char **err) {
UNUSED(prev);
UNUSED(err);
if (val) {
@ -2269,7 +2296,7 @@ static int updateMaxmemory(long long val, long long prev, char **err) {
return 1;
}
static int updateGoodSlaves(long long val, long long prev, char **err) {
static int updateGoodSlaves(long long val, long long prev, const char **err) {
UNUSED(val);
UNUSED(prev);
UNUSED(err);
@ -2277,7 +2304,7 @@ static int updateGoodSlaves(long long val, long long prev, char **err) {
return 1;
}
static int updateAppendonly(int val, int prev, char **err) {
static int updateAppendonly(int val, int prev, const char **err) {
UNUSED(prev);
if (val == 0 && server.aof_state != AOF_OFF) {
stopAppendOnly();
@ -2290,7 +2317,7 @@ static int updateAppendonly(int val, int prev, char **err) {
return 1;
}
static int updateSighandlerEnabled(int val, int prev, char **err) {
static int updateSighandlerEnabled(int val, int prev, const char **err) {
UNUSED(err);
UNUSED(prev);
if (val)
@ -2300,7 +2327,7 @@ static int updateSighandlerEnabled(int val, int prev, char **err) {
return 1;
}
static int updateMaxclients(long long val, long long prev, char **err) {
static int updateMaxclients(long long val, long long prev, const char **err) {
/* Try to check if the OS is capable of supporting so many FDs. */
if (val > prev) {
adjustOpenFilesLimit();
@ -2328,7 +2355,7 @@ static int updateMaxclients(long long val, long long prev, char **err) {
return 1;
}
static int updateOOMScoreAdj(int val, int prev, char **err) {
static int updateOOMScoreAdj(int val, int prev, const char **err) {
UNUSED(prev);
if (val) {
@ -2342,7 +2369,7 @@ static int updateOOMScoreAdj(int val, int prev, char **err) {
}
#ifdef USE_OPENSSL
static int updateTlsCfg(char *val, char *prev, char **err) {
static int updateTlsCfg(char *val, char *prev, const char **err) {
UNUSED(val);
UNUSED(prev);
UNUSED(err);
@ -2355,13 +2382,13 @@ static int updateTlsCfg(char *val, char *prev, char **err) {
}
return 1;
}
static int updateTlsCfgBool(int val, int prev, char **err) {
static int updateTlsCfgBool(int val, int prev, const char **err) {
UNUSED(val);
UNUSED(prev);
return updateTlsCfg(NULL, NULL, err);
}
static int updateTlsCfgInt(long long val, long long prev, char **err) {
static int updateTlsCfgInt(long long val, long long prev, const char **err) {
UNUSED(val);
UNUSED(prev);
return updateTlsCfg(NULL, NULL, err);
@ -2380,11 +2407,13 @@ standardConfig configs[] = {
createBoolConfig("rdb-del-sync-files", NULL, MODIFIABLE_CONFIG, server.rdb_del_sync_files, 0, NULL, NULL),
createBoolConfig("activerehashing", NULL, MODIFIABLE_CONFIG, server.activerehashing, 1, NULL, NULL),
createBoolConfig("stop-writes-on-bgsave-error", NULL, MODIFIABLE_CONFIG, server.stop_writes_on_bgsave_err, 1, NULL, NULL),
createBoolConfig("set-proc-title", NULL, IMMUTABLE_CONFIG, server.set_proc_title, 1, NULL, NULL), /* Should setproctitle be used? */
createBoolConfig("dynamic-hz", NULL, MODIFIABLE_CONFIG, server.dynamic_hz, 1, NULL, NULL), /* Adapt hz to # of clients.*/
createBoolConfig("lazyfree-lazy-eviction", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_eviction, 0, NULL, NULL),
createBoolConfig("lazyfree-lazy-expire", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_expire, 0, NULL, NULL),
createBoolConfig("lazyfree-lazy-server-del", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_server_del, 0, NULL, NULL),
createBoolConfig("lazyfree-lazy-user-del", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_user_del , 0, NULL, NULL),
createBoolConfig("lazyfree-lazy-user-flush", NULL, MODIFIABLE_CONFIG, server.lazyfree_lazy_user_flush , 0, NULL, NULL),
createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, server.repl_disable_tcp_nodelay, 0, NULL, NULL),
createBoolConfig("repl-diskless-sync", NULL, MODIFIABLE_CONFIG, server.repl_diskless_sync, 0, NULL, NULL),
createBoolConfig("gopher-enabled", NULL, MODIFIABLE_CONFIG, server.gopher_enabled, 0, NULL, NULL),
@ -2425,6 +2454,7 @@ standardConfig configs[] = {
createStringConfig("aof_rewrite_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.aof_rewrite_cpulist, NULL, NULL, NULL),
createStringConfig("bgsave_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, server.bgsave_cpulist, NULL, NULL, NULL),
createStringConfig("ignore-warnings", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.ignore_warnings, "", NULL, NULL),
createStringConfig("proc-title-template", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, server.proc_title_template, CONFIG_DEFAULT_PROC_TITLE_TEMPLATE, isValidProcTitleTemplate, updateProcTitleTemplate),
/* SDS Configs */
createSDSConfig("masterauth", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, server.masterauth, NULL, NULL, NULL),

View File

@ -226,7 +226,7 @@ void dbOverwrite(redisDb *db, robj *key, robj *val) {
/* Although the key is not really deleted from the database, we regard
overwrite as two steps of unlink+add, so we still need to call the unlink
callback of the module. */
moduleNotifyKeyUnlink(key,val);
moduleNotifyKeyUnlink(key,old);
dictSetVal(db->dict, de, val);
if (server.lazyfree_lazy_server_del) {
@ -595,21 +595,23 @@ void signalFlushedDb(int dbid, int async) {
/* Return the set of flags to use for the emptyDb() call for FLUSHALL
* and FLUSHDB commands.
*
* Currently the command just attempts to parse the "ASYNC" option. It
* also checks if the command arity is wrong.
* sync: flushes the database in an sync manner.
* async: flushes the database in an async manner.
* no option: determine sync or async according to the value of lazyfree-lazy-user-flush.
*
* On success C_OK is returned and the flags are stored in *flags, otherwise
* C_ERR is returned and the function sends an error to the client. */
int getFlushCommandFlags(client *c, int *flags) {
/* Parse the optional ASYNC option. */
if (c->argc > 1) {
if (c->argc > 2 || strcasecmp(c->argv[1]->ptr,"async")) {
addReplyErrorObject(c,shared.syntaxerr);
return C_ERR;
}
*flags = EMPTYDB_ASYNC;
} else {
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"sync")) {
*flags = EMPTYDB_NO_FLAGS;
} else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"async")) {
*flags = EMPTYDB_ASYNC;
} else if (c->argc == 1) {
*flags = server.lazyfree_lazy_user_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS;
} else {
addReplyErrorObject(c,shared.syntaxerr);
return C_ERR;
}
return C_OK;
}
@ -951,7 +953,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
int filter = 0;
/* Filter element if it does not match the pattern. */
if (!filter && use_pattern) {
if (use_pattern) {
if (sdsEncodedObject(kobj)) {
if (!stringmatchlen(pat, patlen, kobj->ptr, sdslen(kobj->ptr), 0))
filter = 1;

View File

@ -367,7 +367,7 @@ long activeDefragSdsListAndDict(list *l, dict *d, int dict_val_type) {
} else if (dict_val_type == DEFRAG_SDS_DICT_VAL_VOID_PTR) {
void *newptr, *ptr = dictGetVal(de);
if ((newptr = activeDefragAlloc(ptr)))
ln->value = newptr, defragged++;
de->v.val = newptr, defragged++;
}
defragged += dictIterDefragEntry(di);
}

View File

@ -53,15 +53,19 @@
* to the function to avoid too many gettimeofday() syscalls. */
int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
long long t = dictGetSignedIntegerVal(de);
mstime_t expire_latency;
if (now > t) {
sds key = dictGetKey(de);
robj *keyobj = createStringObject(key,sdslen(key));
propagateExpire(db,keyobj,server.lazyfree_lazy_expire);
latencyStartMonitor(expire_latency);
if (server.lazyfree_lazy_expire)
dbAsyncDelete(db,keyobj);
else
dbSyncDelete(db,keyobj);
latencyEndMonitor(expire_latency);
latencyAddSampleIfNeeded("expire-del",expire_latency);
notifyKeyspaceEvent(NOTIFY_EXPIRED,
"expired",keyobj,db->id);
signalModifiedKey(NULL, db, keyobj);
@ -224,7 +228,7 @@ void activeExpireCycle(int type) {
/* When there are less than 1% filled slots, sampling the key
* space is expensive, so stop here waiting for better times...
* The dictionary will be resized asap. */
if (num && slots > DICT_HT_INITIAL_SIZE &&
if (slots > DICT_HT_INITIAL_SIZE &&
(num*100/slots < 1)) break;
/* The main collection cycle. Sample random keys among keys

View File

@ -459,12 +459,12 @@ struct commandHelp {
0,
"1.2.0" },
{ "FLUSHALL",
"[ASYNC]",
"[ASYNC|SYNC]",
"Remove all keys from all databases",
9,
"1.0.0" },
{ "FLUSHDB",
"[ASYNC]",
"[ASYNC|SYNC]",
"Remove all keys from the current database",
9,
"1.0.0" },
@ -518,6 +518,16 @@ struct commandHelp {
"Returns the bit value at offset in the string value stored at key",
1,
"2.2.0" },
{ "GETDEL",
"key",
"Get the value of a key and delete the key",
1,
"6.2.0" },
{ "GETEX",
"key [EX seconds|PX milliseconds|EXAT timestamp|PXAT milliseconds-timestamp|PERSIST]",
"Get the value of a key and optionally set its expiration",
1,
"6.2.0" },
{ "GETRANGE",
"key start end",
"Get a substring of the string stored at a key",
@ -583,6 +593,11 @@ struct commandHelp {
"Set multiple hash fields to multiple values",
5,
"2.0.0" },
{ "HRANDFIELD",
"key [count [WITHVALUES]]",
"Get one or multiple random fields from a hash",
5,
"6.2.0" },
{ "HSCAN",
"key cursor [MATCH pattern] [COUNT count]",
"Incrementally iterate hash fields and associated values",
@ -989,7 +1004,7 @@ struct commandHelp {
10,
"2.6.0" },
{ "SCRIPT FLUSH",
"-",
"[ASYNC|SYNC]",
"Remove all the scripts from the script cache.",
10,
"2.6.0" },
@ -1019,7 +1034,7 @@ struct commandHelp {
8,
"1.0.0" },
{ "SET",
"key value [EX seconds|PX milliseconds|KEEPTTL] [NX|XX] [GET]",
"key value [EX seconds|PX milliseconds|EXAT timestamp|PXAT milliseconds-timestamp|KEEPTTL] [NX|XX] [GET]",
"Set the string value of a key",
1,
"1.0.0" },
@ -1323,6 +1338,11 @@ struct commandHelp {
"Remove and return members with the lowest scores in a sorted set",
4,
"5.0.0" },
{ "ZRANDMEMBER",
"key [count [WITHSCORES]]",
"Get one or multiple random elements from a sorted set",
4,
"6.2.0" },
{ "ZRANGE",
"key min max [BYSCORE|BYLEX] [REV] [LIMIT offset count] [WITHSCORES]",
"Return a range of members in a sorted set",

View File

@ -49,6 +49,14 @@ void lazyFreeTrackingTable(void *args[]) {
atomicIncr(lazyfreed_objects,len);
}
void lazyFreeLuaScripts(void *args[]) {
dict *lua_scripts = args[0];
long long len = dictSize(lua_scripts);
dictRelease(lua_scripts);
atomicDecr(lazyfree_objects,len);
atomicIncr(lazyfreed_objects,len);
}
/* Return the number of currently pending objects to free. */
size_t lazyfreeGetPendingObjectsCount(void) {
size_t aux;
@ -212,3 +220,13 @@ void freeTrackingRadixTreeAsync(rax *tracking) {
atomicIncr(lazyfree_objects,tracking->numele);
bioCreateLazyFreeJob(lazyFreeTrackingTable,1,tracking);
}
/* Free lua_scripts dict, if the dict is huge enough, free it in async way. */
void freeLuaScriptsAsync(dict *lua_scripts) {
if (dictSize(lua_scripts) > LAZYFREE_THRESHOLD) {
atomicIncr(lazyfree_objects,dictSize(lua_scripts));
bioCreateLazyFreeJob(lazyFreeLuaScripts,1,lua_scripts);
} else {
dictRelease(lua_scripts);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -4,16 +4,26 @@
# Convert the C comment to markdown
def markdown(s)
s = s.gsub(/\*\/$/,"")
s = s.gsub(/^ \* {0,1}/,"")
s = s.gsub(/^\/\* /,"")
s = s.gsub(/^ ?\* ?/,"")
s = s.gsub(/^\/\*\*? ?/,"")
s.chop! while s[-1] == "\n" || s[-1] == " "
lines = s.split("\n")
newlines = []
# Fix some markdown, except in code blocks indented by 4 spaces.
lines.each{|l|
if l[0] != ' '
l = l.gsub(/RM_[A-z()]+/){|x| "`#{x}`"}
l = l.gsub(/RedisModule_[A-z()]+/){|x| "`#{x}`"}
l = l.gsub(/REDISMODULE_[A-z]+/){|x| "`#{x}`"}
if not l.start_with?(' ')
# Rewrite RM_Xyz() to `RedisModule_Xyz()`. The () suffix is
# optional. Even RM_Xyz*() with * as wildcard is handled.
l = l.gsub(/(?<!`)RM_([A-z]+(?:\*?\(\))?)/, '`RedisModule_\1`')
# Add backquotes around RedisModule functions and type where missing.
l = l.gsub(/(?<!`)RedisModule[A-z]+(?:\*?\(\))?/){|x| "`#{x}`"}
# Add backquotes around c functions like malloc() where missing.
l = l.gsub(/(?<![`A-z])[a-z_]+\(\)/, '`\0`')
# Add backquotes around macro and var names containing underscores.
l = l.gsub(/(?<![`A-z\*])[A-Za-z]+_[A-Za-z0-9_]+/){|x| "`#{x}`"}
# Link URLs preceded by space (i.e. when not already linked)
l = l.gsub(/ (https?:\/\/[A-Za-z0-9_\/\.\-]+[A-Za-z0-9\/])/,
' [\1](\1)')
end
newlines << l
}
@ -41,6 +51,7 @@ def docufy(src,i)
end
puts "# Modules API reference\n\n"
puts "<!-- This file is generated from module.c using gendoc.rb -->\n\n"
src = File.open("../module.c").to_a
src.each_with_index{|line,i|
if line =~ /RM_/ && line[0] != ' ' && line[0] != '#' && line[0] != '/'

View File

@ -1104,6 +1104,7 @@ void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
"Accepting client connection: %s", server.neterr);
return;
}
anetCloexec(cfd);
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
acceptCommonHandler(connCreateAcceptedSocket(cfd),0,cip);
}
@ -1124,6 +1125,7 @@ void acceptTLSHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
"Accepting client connection: %s", server.neterr);
return;
}
anetCloexec(cfd);
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
acceptCommonHandler(connCreateAcceptedTLS(cfd, server.tls_auth_clients),0,cip);
}
@ -1143,6 +1145,7 @@ void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
"Accepting client connection: %s", server.neterr);
return;
}
anetCloexec(cfd);
serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket);
acceptCommonHandler(connCreateAcceptedSocket(cfd),CLIENT_UNIX_SOCKET,NULL);
}
@ -1707,7 +1710,7 @@ int processInlineBuffer(client *c) {
}
/* Handle the \r\n case. */
if (newline && newline != c->querybuf+c->qb_pos && *(newline-1) == '\r')
if (newline != c->querybuf+c->qb_pos && *(newline-1) == '\r')
newline--, linefeed_chars++;
/* Split the input buffer up to the \r\n */
@ -2436,8 +2439,10 @@ void clientCommand(client *c) {
" Kill connection made from <ip:port>.",
"KILL <option> <value> [<option> <value> [...]]",
" Kill connections. Options are:",
" * ADDR <ip:port>",
" Kill connection made from <ip:port>",
" * ADDR (<ip:port>|<unixsocket>:0)",
" Kill connections made from the specified address",
" * LADDR (<ip:port>|<unixsocket>:0)",
" Kill connections made to specified local address",
" * TYPE (normal|master|replica|pubsub)",
" Kill connections by type.",
" * USER <username>",
@ -2675,7 +2680,7 @@ NULL
c->argc == 4))
{
/* CLIENT PAUSE TIMEOUT [WRITE|ALL] */
long long duration;
mstime_t end;
int type = CLIENT_PAUSE_ALL;
if (c->argc == 4) {
if (!strcasecmp(c->argv[3]->ptr,"write")) {
@ -2689,9 +2694,9 @@ NULL
}
}
if (getTimeoutFromObjectOrReply(c,c->argv[2],&duration,
if (getTimeoutFromObjectOrReply(c,c->argv[2],&end,
UNIT_MILLISECONDS) != C_OK) return;
pauseClients(duration, type);
pauseClients(end, type);
addReply(c,shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr,"tracking") && c->argc >= 3) {
/* CLIENT TRACKING (on|off) [REDIRECT <id>] [BCAST] [PREFIX first]
@ -3355,8 +3360,6 @@ void processEventsWhileBlocked(void) {
* Threaded I/O
* ========================================================================== */
int tio_debug = 0;
#define IO_THREADS_MAX_NUM 128
#define IO_THREADS_OP_READ 0
#define IO_THREADS_OP_WRITE 1
@ -3407,8 +3410,6 @@ void *IOThreadMain(void *myid) {
serverAssert(getIOPendingCount(id) != 0);
if (tio_debug) printf("[%ld] %d to handle\n", id, (int)listLength(io_threads_list[id]));
/* Process: note that the main thread will never touch our list
* before we drop the pending count to 0. */
listIter li;
@ -3426,8 +3427,6 @@ void *IOThreadMain(void *myid) {
}
listEmpty(io_threads_list[id]);
setIOPendingCount(id, 0);
if (tio_debug) printf("[%ld] Done\n", id);
}
}
@ -3482,8 +3481,6 @@ void killIOThreads(void) {
}
void startThreadedIO(void) {
if (tio_debug) { printf("S"); fflush(stdout); }
if (tio_debug) printf("--- STARTING THREADED IO ---\n");
serverAssert(server.io_threads_active == 0);
for (int j = 1; j < server.io_threads_num; j++)
pthread_mutex_unlock(&io_threads_mutex[j]);
@ -3494,10 +3491,6 @@ void stopThreadedIO(void) {
/* We may have still clients with pending reads when this function
* is called: handle them before stopping the threads. */
handleClientsWithPendingReadsUsingThreads();
if (tio_debug) { printf("E"); fflush(stdout); }
if (tio_debug) printf("--- STOPPING THREADED IO [R%d] [W%d] ---\n",
(int) listLength(server.clients_pending_read),
(int) listLength(server.clients_pending_write));
serverAssert(server.io_threads_active == 1);
for (int j = 1; j < server.io_threads_num; j++)
pthread_mutex_lock(&io_threads_mutex[j]);
@ -3540,8 +3533,6 @@ int handleClientsWithPendingWritesUsingThreads(void) {
/* Start threads if needed. */
if (!server.io_threads_active) startThreadedIO();
if (tio_debug) printf("%d TOTAL WRITE pending clients\n", processed);
/* Distribute the clients across N different lists. */
listIter li;
listNode *ln;
@ -3586,7 +3577,6 @@ int handleClientsWithPendingWritesUsingThreads(void) {
pending += getIOPendingCount(j);
if (pending == 0) break;
}
if (tio_debug) printf("I/O WRITE All threads finshed\n");
/* Run the list of clients again to install the write handler where
* needed. */
@ -3639,8 +3629,6 @@ int handleClientsWithPendingReadsUsingThreads(void) {
int processed = listLength(server.clients_pending_read);
if (processed == 0) return 0;
if (tio_debug) printf("%d TOTAL READ pending clients\n", processed);
/* Distribute the clients across N different lists. */
listIter li;
listNode *ln;
@ -3676,7 +3664,6 @@ int handleClientsWithPendingReadsUsingThreads(void) {
pending += getIOPendingCount(j);
if (pending == 0) break;
}
if (tio_debug) printf("I/O READ All threads finshed\n");
/* Run the list of clients again to process the new buffers. */
while(listLength(server.clients_pending_read)) {

View File

@ -5301,7 +5301,7 @@ static clusterManagerNode *clusterNodeForResharding(char *id,
clusterManagerLogErr(invalid_node_msg, id);
*raise_err = 1;
return NULL;
} else if (node != NULL && target != NULL) {
} else if (target != NULL) {
if (!strcmp(node->name, target->name)) {
clusterManagerLogErr( "*** It is not possible to use "
"the target node as "
@ -6940,6 +6940,10 @@ void sendCapa() {
sendReplconf("capa", "eof");
}
void sendRdbOnly(void) {
sendReplconf("rdb-only", "1");
}
/* Read raw bytes through a redisContext. The read operation is not greedy
* and may not fill the buffer entirely.
*/
@ -7137,7 +7141,6 @@ static void getRDB(clusterManagerNode *node) {
node->context = NULL;
fsync(fd);
close(fd);
fprintf(stderr,"Transfer finished with success.\n");
if (node) {
sdsfree(filename);
return;
@ -8258,6 +8261,7 @@ int main(int argc, char **argv) {
if (config.getrdb_mode) {
if (cliConnect(0) == REDIS_ERR) exit(1);
sendCapa();
sendRdbOnly();
getRDB(NULL);
}

View File

@ -69,6 +69,20 @@
#define REDISMODULE_HASH_CFIELDS (1<<2)
#define REDISMODULE_HASH_EXISTS (1<<3)
/* StreamID type. */
typedef struct RedisModuleStreamID {
uint64_t ms;
uint64_t seq;
} RedisModuleStreamID;
/* StreamAdd() flags. */
#define REDISMODULE_STREAM_ADD_AUTOID (1<<0)
/* StreamIteratorStart() flags. */
#define REDISMODULE_STREAM_ITERATOR_EXCLUSIVE (1<<0)
#define REDISMODULE_STREAM_ITERATOR_REVERSE (1<<1)
/* StreamIteratorTrim*() flags. */
#define REDISMODULE_STREAM_TRIM_APPROX (1<<0)
/* Context Flags: Info about the current context returned by
* RM_GetContextFlags(). */
@ -216,9 +230,8 @@ typedef uint64_t RedisModuleTimerID;
#define REDISMODULE_EVENT_LOADING_PROGRESS 10
#define REDISMODULE_EVENT_SWAPDB 11
#define REDISMODULE_EVENT_REPL_BACKUP 12
/* Next event flag, should be updated if a new event added. */
#define _REDISMODULE_EVENT_NEXT 13
#define REDISMODULE_EVENT_FORK_CHILD 13
#define _REDISMODULE_EVENT_NEXT 14 /* Next event flag, should be updated if a new event added. */
typedef struct RedisModuleEvent {
uint64_t id; /* REDISMODULE_EVENT_... defines. */
@ -281,6 +294,10 @@ static const RedisModuleEvent
RedisModuleEvent_ReplBackup = {
REDISMODULE_EVENT_REPL_BACKUP,
1
},
RedisModuleEvent_ForkChild = {
REDISMODULE_EVENT_FORK_CHILD,
1
};
/* Those are values that are used for the 'subevent' callback argument. */
@ -331,6 +348,10 @@ static const RedisModuleEvent
#define REDISMODULE_SUBEVENT_REPL_BACKUP_DISCARD 2
#define _REDISMODULE_SUBEVENT_REPL_BACKUP_NEXT 3
#define REDISMODULE_SUBEVENT_FORK_CHILD_BORN 0
#define REDISMODULE_SUBEVENT_FORK_CHILD_DIED 1
#define _REDISMODULE_SUBEVENT_FORK_CHILD_NEXT 2
#define _REDISMODULE_SUBEVENT_SHUTDOWN_NEXT 0
#define _REDISMODULE_SUBEVENT_CRON_LOOP_NEXT 0
#define _REDISMODULE_SUBEVENT_SWAPDB_NEXT 0
@ -578,6 +599,7 @@ REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromLongLong)(Redi
REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromDouble)(RedisModuleCtx *ctx, double d) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromLongDouble)(RedisModuleCtx *ctx, long double ld, int humanfriendly) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromString)(RedisModuleCtx *ctx, const RedisModuleString *str) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromStreamID)(RedisModuleCtx *ctx, const RedisModuleStreamID *id) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringPrintf)(RedisModuleCtx *ctx, const char *fmt, ...) REDISMODULE_ATTR_PRINTF(2,3) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_FreeString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR;
REDISMODULE_API const char * (*RedisModule_StringPtrLen)(const RedisModuleString *str, size_t *len) REDISMODULE_ATTR;
@ -599,6 +621,7 @@ REDISMODULE_API int (*RedisModule_ReplyWithCallReply)(RedisModuleCtx *ctx, Redis
REDISMODULE_API int (*RedisModule_StringToLongLong)(const RedisModuleString *str, long long *ll) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StringToDouble)(const RedisModuleString *str, double *d) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StringToLongDouble)(const RedisModuleString *str, long double *d) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StringToStreamID)(const RedisModuleString *str, RedisModuleStreamID *id) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_AutoMemory)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_Replicate)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ReplicateVerbatim)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
@ -629,6 +652,15 @@ REDISMODULE_API int (*RedisModule_ZsetRangePrev)(RedisModuleKey *key) REDISMODUL
REDISMODULE_API int (*RedisModule_ZsetRangeEndReached)(RedisModuleKey *key) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_HashSet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_HashGet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamAdd)(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, int64_t numfields) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamDelete)(RedisModuleKey *key, RedisModuleStreamID *id) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamIteratorStart)(RedisModuleKey *key, int flags, RedisModuleStreamID *startid, RedisModuleStreamID *endid) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamIteratorStop)(RedisModuleKey *key) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamIteratorNextID)(RedisModuleKey *key, RedisModuleStreamID *id, long *numfields) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamIteratorNextField)(RedisModuleKey *key, RedisModuleString **field_ptr, RedisModuleString **value_ptr) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamIteratorDelete)(RedisModuleKey *key) REDISMODULE_ATTR;
REDISMODULE_API long long (*RedisModule_StreamTrimByLength)(RedisModuleKey *key, int flags, long long length) REDISMODULE_ATTR;
REDISMODULE_API long long (*RedisModule_StreamTrimByID)(RedisModuleKey *key, int flags, RedisModuleStreamID *id) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_IsKeysPositionRequest)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_KeyAtPos)(RedisModuleCtx *ctx, int pos) REDISMODULE_ATTR;
REDISMODULE_API unsigned long long (*RedisModule_GetClientId)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
@ -744,6 +776,8 @@ REDISMODULE_API int (*RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx)
REDISMODULE_API void * (*RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_GetBlockedClientHandle)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_AbortBlock)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_BlockedClientMeasureTimeStart)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_BlockedClientMeasureTimeEnd)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleCtx * (*RedisModule_GetThreadSafeContext)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR;
REDISMODULE_API RedisModuleCtx * (*RedisModule_GetDetachedThreadSafeContext)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_FreeThreadSafeContext)(RedisModuleCtx *ctx) REDISMODULE_ATTR;
@ -842,6 +876,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(StringToLongLong);
REDISMODULE_GET_API(StringToDouble);
REDISMODULE_GET_API(StringToLongDouble);
REDISMODULE_GET_API(StringToStreamID);
REDISMODULE_GET_API(Call);
REDISMODULE_GET_API(CallReplyProto);
REDISMODULE_GET_API(FreeCallReply);
@ -856,6 +891,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(CreateStringFromDouble);
REDISMODULE_GET_API(CreateStringFromLongDouble);
REDISMODULE_GET_API(CreateStringFromString);
REDISMODULE_GET_API(CreateStringFromStreamID);
REDISMODULE_GET_API(CreateStringPrintf);
REDISMODULE_GET_API(FreeString);
REDISMODULE_GET_API(StringPtrLen);
@ -887,6 +923,15 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(ZsetRangeEndReached);
REDISMODULE_GET_API(HashSet);
REDISMODULE_GET_API(HashGet);
REDISMODULE_GET_API(StreamAdd);
REDISMODULE_GET_API(StreamDelete);
REDISMODULE_GET_API(StreamIteratorStart);
REDISMODULE_GET_API(StreamIteratorStop);
REDISMODULE_GET_API(StreamIteratorNextID);
REDISMODULE_GET_API(StreamIteratorNextField);
REDISMODULE_GET_API(StreamIteratorDelete);
REDISMODULE_GET_API(StreamTrimByLength);
REDISMODULE_GET_API(StreamTrimByID);
REDISMODULE_GET_API(IsKeysPositionRequest);
REDISMODULE_GET_API(KeyAtPos);
REDISMODULE_GET_API(GetClientId);
@ -1006,6 +1051,8 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(GetBlockedClientPrivateData);
REDISMODULE_GET_API(GetBlockedClientHandle);
REDISMODULE_GET_API(AbortBlock);
REDISMODULE_GET_API(BlockedClientMeasureTimeStart);
REDISMODULE_GET_API(BlockedClientMeasureTimeEnd);
REDISMODULE_GET_API(SetDisconnectCallback);
REDISMODULE_GET_API(SubscribeToKeyspaceEvents);
REDISMODULE_GET_API(NotifyKeyspaceEvent);

View File

@ -200,6 +200,16 @@ void feedReplicationBacklogWithObject(robj *o) {
feedReplicationBacklog(p,len);
}
int canFeedReplicaReplBuffer(client *replica) {
/* Don't feed replicas that only want the RDB. */
if (replica->flags & CLIENT_REPL_RDBONLY) return 0;
/* Don't feed replicas that are still waiting for BGSAVE to start. */
if (replica->replstate == SLAVE_STATE_WAIT_BGSAVE_START) return 0;
return 1;
}
/* Propagate write commands to slaves, and populate the replication backlog
* as well. This function is used if the instance is a master: we use
* the commands received by our clients in order to create the replication
@ -249,7 +259,8 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
listRewind(slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
if (!canFeedReplicaReplBuffer(slave)) continue;
addReply(slave,selectcmd);
}
@ -290,8 +301,7 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
while((ln = listNext(&li))) {
client *slave = ln->value;
/* Don't feed slaves that are still waiting for BGSAVE to start. */
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
if (!canFeedReplicaReplBuffer(slave)) continue;
/* Feed slaves that are waiting for the initial SYNC (so these commands
* are queued in the output buffer until the initial SYNC completes),
@ -363,8 +373,7 @@ void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t bufle
while((ln = listNext(&li))) {
client *slave = ln->value;
/* Don't feed slaves that are still waiting for BGSAVE to start. */
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
if (!canFeedReplicaReplBuffer(slave)) continue;
addReplyProto(slave,buf,buflen);
}
}
@ -712,6 +721,36 @@ void syncCommand(client *c) {
/* ignore SYNC if already slave or in monitor mode */
if (c->flags & CLIENT_SLAVE) return;
/* Check if this is a failover request to a replica with the same replid and
* become a master if so. */
if (c->argc > 3 && !strcasecmp(c->argv[0]->ptr,"psync") &&
!strcasecmp(c->argv[3]->ptr,"failover"))
{
serverLog(LL_WARNING, "Failover request received for replid %s.",
(unsigned char *)c->argv[1]->ptr);
if (!server.masterhost) {
addReplyError(c, "PSYNC FAILOVER can't be sent to a master.");
return;
}
if (!strcasecmp(c->argv[1]->ptr,server.replid)) {
replicationUnsetMaster();
sds client = catClientInfoString(sdsempty(),c);
serverLog(LL_NOTICE,
"MASTER MODE enabled (failover request from '%s')",client);
sdsfree(client);
} else {
addReplyError(c, "PSYNC FAILOVER replid must match my replid.");
return;
}
}
/* Don't let replicas sync with us while we're failing over */
if (server.failover_state != NO_FAILOVER) {
addReplyError(c,"-NOMASTERLINK Can't SYNC while failing over");
return;
}
/* Refuse SYNC requests if we are a slave but the link with our master
* is not ok... */
if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED) {
@ -799,14 +838,20 @@ void syncCommand(client *c) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
slave = ln->value;
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) break;
/* If the client needs a buffer of commands, we can't use
* a replica without replication buffer. */
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END &&
(!(slave->flags & CLIENT_REPL_RDBONLY) ||
(c->flags & CLIENT_REPL_RDBONLY)))
break;
}
/* To attach this slave, we check that it has at least all the
* capabilities of the slave that triggered the current BGSAVE. */
if (ln && ((c->slave_capa & slave->slave_capa) == slave->slave_capa)) {
/* Perfect, the server is already registering differences for
* another slave. Set the right state, and copy the buffer. */
copyClientOutputBuffer(c,slave);
* another slave. Set the right state, and copy the buffer.
* We don't copy buffer if clients don't want. */
if (!(c->flags & CLIENT_REPL_RDBONLY)) copyClientOutputBuffer(c,slave);
replicationSetupSlaveForFullResync(c,slave->psync_initial_offset);
serverLog(LL_NOTICE,"Waiting for end of BGSAVE for SYNC");
} else {
@ -925,6 +970,15 @@ void replconfCommand(client *c) {
* to the slave. */
if (server.masterhost && server.master) replicationSendAck();
return;
} else if (!strcasecmp(c->argv[j]->ptr,"rdb-only")) {
/* REPLCONF RDB-ONLY is used to identify the client only wants
* RDB snapshot without replication buffer. */
long rdb_only = 0;
if (getRangeLongFromObjectOrReply(c,c->argv[j+1],
0,1,&rdb_only,NULL) != C_OK)
return;
if (rdb_only == 1) c->flags |= CLIENT_REPL_RDBONLY;
else c->flags &= ~CLIENT_REPL_RDBONLY;
} else {
addReplyErrorFormat(c,"Unrecognized REPLCONF option: %s",
(char*)c->argv[j]->ptr);
@ -939,19 +993,28 @@ void replconfCommand(client *c) {
* we are finally ready to send the incremental stream of commands.
*
* It does a few things:
*
* 1) Put the slave in ONLINE state. Note that the function may also be called
* 1) Close the replica's connection async if it doesn't need replication
* commands buffer stream, since it actually isn't a valid replica.
* 2) Put the slave in ONLINE state. Note that the function may also be called
* for a replicas that are already in ONLINE state, but having the flag
* repl_put_online_on_ack set to true: we still have to install the write
* handler in that case. This function will take care of that.
* 2) Make sure the writable event is re-installed, since calling the SYNC
* 3) Make sure the writable event is re-installed, since calling the SYNC
* command disables it, so that we can accumulate output buffer without
* sending it to the replica.
* 3) Update the count of "good replicas". */
* 4) Update the count of "good replicas". */
void putSlaveOnline(client *slave) {
slave->replstate = SLAVE_STATE_ONLINE;
slave->repl_put_online_on_ack = 0;
slave->repl_ack_time = server.unixtime; /* Prevent false timeout. */
if (slave->flags & CLIENT_REPL_RDBONLY) {
serverLog(LL_NOTICE,
"Close the connection with replica %s as RDB transfer is complete",
replicationGetSlaveName(slave));
freeClientAsync(slave);
return;
}
if (connSetWriteHandler(slave->conn, sendReplyToClient) == C_ERR) {
serverLog(LL_WARNING,"Unable to register writable event for replica bulk transfer: %s", strerror(errno));
freeClient(slave);
@ -1998,8 +2061,15 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) {
memcpy(psync_offset,"-1",3);
}
/* Issue the PSYNC command */
reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,NULL);
/* Issue the PSYNC command, if this is a master with a failover in
* progress then send the failover argument to the replica to cause it
* to become a master */
if (server.failover_state == FAILOVER_IN_PROGRESS) {
reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,"FAILOVER",NULL);
} else {
reply = sendCommand(conn,"PSYNC",psync_replid,psync_offset,NULL);
}
if (reply != NULL) {
serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply);
sdsfree(reply);
@ -2323,6 +2393,7 @@ void syncWithMaster(connection *conn) {
if (server.repl_state == REPL_STATE_SEND_PSYNC) {
if (slaveTryPartialResynchronization(conn,0) == PSYNC_WRITE_ERROR) {
err = sdsnew("Write error sending the PSYNC command.");
abortFailover("Write error to failover target");
goto write_error;
}
server.repl_state = REPL_STATE_RECEIVE_PSYNC_REPLY;
@ -2340,6 +2411,18 @@ void syncWithMaster(connection *conn) {
psync_result = slaveTryPartialResynchronization(conn,1);
if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */
/* Check the status of the planned failover. We expect PSYNC_CONTINUE,
* but there is nothing technically wrong with a full resync which
* could happen in edge cases. */
if (server.failover_state == FAILOVER_IN_PROGRESS) {
if (psync_result == PSYNC_CONTINUE || psync_result == PSYNC_FULLRESYNC) {
clearFailoverState();
} else {
abortFailover("Failover target rejected psync request");
return;
}
}
/* If the master is in an transient error, we should try to PSYNC
* from scratch later, so go to the error path. This happens when
* the server is loading the dataset or is not connected with its
@ -2645,6 +2728,11 @@ void replicaofCommand(client *c) {
return;
}
if (server.failover_state != NO_FAILOVER) {
addReplyError(c,"REPLICAOF not allowed while failing over.");
return;
}
/* The special host/port combination "NO" "ONE" turns the instance
* into a master. Otherwise the new master address is set. */
if (!strcasecmp(c->argv[1]->ptr,"no") &&
@ -3178,6 +3266,10 @@ long long replicationGetSlaveOffset(void) {
void replicationCron(void) {
static long long replication_cron_loops = 0;
/* Check failover status first, to see if we need to start
* handling the failover. */
updateFailoverStatus();
/* Non blocking connection timeout? */
if (server.masterhost &&
(server.repl_state == REPL_STATE_CONNECTING ||
@ -3235,8 +3327,9 @@ void replicationCron(void) {
* alter the replication offsets of master and slave, and will no longer
* match the one stored into 'mf_master_offset' state. */
int manual_failover_in_progress =
server.cluster_enabled &&
server.cluster->mf_end &&
((server.cluster_enabled &&
server.cluster->mf_end) ||
server.failover_end_time) &&
checkClientPauseTimeoutAndReturnIfPaused();
if (!manual_failover_in_progress) {
@ -3390,3 +3483,271 @@ void replicationStartPendingFork(void) {
}
}
}
/* Find replica at IP:PORT from replica list */
static client *findReplica(char *host, int port) {
listIter li;
listNode *ln;
client *replica;
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
replica = ln->value;
char ip[NET_IP_STR_LEN], *replicaip = replica->slave_ip;
if (replicaip[0] == '\0') {
if (connPeerToString(replica->conn, ip, sizeof(ip), NULL) == -1)
continue;
replicaip = ip;
}
if (!strcasecmp(host, replicaip) &&
(port == replica->slave_listening_port))
return replica;
}
return NULL;
}
const char *getFailoverStateString() {
switch(server.failover_state) {
case NO_FAILOVER: return "no-failover";
case FAILOVER_IN_PROGRESS: return "failover-in-progress";
case FAILOVER_WAIT_FOR_SYNC: return "waiting-for-sync";
default: return "unknown";
}
}
/* Resets the internal failover configuration, this needs
* to be called after a failover either succeeds or fails
* as it includes the client unpause. */
void clearFailoverState() {
server.failover_end_time = 0;
server.force_failover = 0;
zfree(server.target_replica_host);
server.target_replica_host = NULL;
server.target_replica_port = 0;
server.failover_state = NO_FAILOVER;
unpauseClients();
}
/* Abort an ongoing failover if one is going on. */
void abortFailover(const char *err) {
if (server.failover_state == NO_FAILOVER) return;
if (server.target_replica_host) {
serverLog(LL_NOTICE,"FAILOVER to %s:%d aborted: %s",
server.target_replica_host,server.target_replica_port,err);
} else {
serverLog(LL_NOTICE,"FAILOVER to any replica aborted: %s",err);
}
if (server.failover_state == FAILOVER_IN_PROGRESS) {
replicationUnsetMaster();
}
clearFailoverState();
}
/*
* FAILOVER [TO <HOST> <IP> [FORCE]] [ABORT] [TIMEOUT <timeout>]
*
* This command will coordinate a failover between the master and one
* of its replicas. The happy path contains the following steps:
* 1) The master will initiate a client pause write, to stop replication
* traffic.
* 2) The master will periodically check if any of its replicas has
* consumed the entire replication stream through acks.
* 3) Once any replica has caught up, the master will itself become a replica.
* 4) The master will send a PSYNC FAILOVER request to the target replica, which
* if accepted will cause the replica to become the new master and start a sync.
*
* FAILOVER ABORT is the only way to abort a failover command, as replicaof
* will be disabled. This may be needed if the failover is unable to progress.
*
* The optional arguments [TO <HOST> <IP>] allows designating a specific replica
* to be failed over to.
*
* FORCE flag indicates that even if the target replica is not caught up,
* failover to it anyway. This must be specified with a timeout and a target
* HOST and IP.
*
* TIMEOUT <timeout> indicates how long should the primary wait for
* a replica to sync up before aborting. If not specified, the failover
* will attempt forever and must be manually aborted.
*/
void failoverCommand(client *c) {
if (server.cluster_enabled) {
addReplyError(c,"FAILOVER not allowed in cluster mode. "
"Use CLUSTER FAILOVER command instead.");
return;
}
/* Handle special case for abort */
if ((c->argc == 2) && !strcasecmp(c->argv[1]->ptr,"abort")) {
if (server.failover_state == NO_FAILOVER) {
addReplyError(c, "No failover in progress.");
return;
}
abortFailover("Failover manually aborted");
addReply(c,shared.ok);
return;
}
long timeout_in_ms = 0;
int force_flag = 0;
long port = 0;
char *host = NULL;
/* Parse the command for syntax and arguments. */
for (int j = 1; j < c->argc; j++) {
if (!strcasecmp(c->argv[j]->ptr,"timeout") && (j + 1 < c->argc) &&
timeout_in_ms == 0)
{
if (getLongFromObjectOrReply(c,c->argv[j + 1],
&timeout_in_ms,NULL) != C_OK) return;
if (timeout_in_ms <= 0) {
addReplyError(c,"FAILOVER timeout must be greater than 0");
return;
}
j++;
} else if (!strcasecmp(c->argv[j]->ptr,"to") && (j + 2 < c->argc) &&
!host)
{
if (getLongFromObjectOrReply(c,c->argv[j + 2],&port,NULL) != C_OK)
return;
host = c->argv[j + 1]->ptr;
j += 2;
} else if (!strcasecmp(c->argv[j]->ptr,"force") && !force_flag) {
force_flag = 1;
} else {
addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
if (server.failover_state != NO_FAILOVER) {
addReplyError(c,"FAILOVER already in progress.");
return;
}
if (server.masterhost) {
addReplyError(c,"FAILOVER is not valid when server is a replica.");
return;
}
if (listLength(server.slaves) == 0) {
addReplyError(c,"FAILOVER requires connected replicas.");
return;
}
if (force_flag && (!timeout_in_ms || !host)) {
addReplyError(c,"FAILOVER with force option requires both a timeout "
"and target HOST and IP.");
return;
}
/* If a replica address was provided, validate that it is connected. */
if (host) {
client *replica = findReplica(host, port);
if (replica == NULL) {
addReplyError(c,"FAILOVER target HOST and IP is not "
"a replica.");
return;
}
/* Check if requested replica is online */
if (replica->replstate != SLAVE_STATE_ONLINE) {
addReplyError(c,"FAILOVER target replica is not online.");
return;
}
server.target_replica_host = zstrdup(host);
server.target_replica_port = port;
serverLog(LL_NOTICE,"FAILOVER requested to %s:%ld.",host,port);
} else {
serverLog(LL_NOTICE,"FAILOVER requested to any replica.");
}
mstime_t now = mstime();
if (timeout_in_ms) {
server.failover_end_time = now + timeout_in_ms;
}
server.force_failover = force_flag;
server.failover_state = FAILOVER_WAIT_FOR_SYNC;
/* Cluster failover will unpause eventually */
pauseClients(LLONG_MAX,CLIENT_PAUSE_WRITE);
addReply(c,shared.ok);
}
/* Failover cron function, checks coordinated failover state.
*
* Implementation note: The current implementation calls replicationSetMaster()
* to start the failover request, this has some unintended side effects if the
* failover doesn't work like blocked clients will be unblocked and replicas will
* be disconnected. This could be optimized further.
*/
void updateFailoverStatus(void) {
if (server.failover_state != FAILOVER_WAIT_FOR_SYNC) return;
mstime_t now = server.mstime;
/* Check if failover operation has timed out */
if (server.failover_end_time && server.failover_end_time <= now) {
if (server.force_failover) {
serverLog(LL_NOTICE,
"FAILOVER to %s:%d time out exceeded, failing over.",
server.target_replica_host, server.target_replica_port);
server.failover_state = FAILOVER_IN_PROGRESS;
/* If timeout has expired force a failover if requested. */
replicationSetMaster(server.target_replica_host,
server.target_replica_port);
return;
} else {
/* Force was not requested, so timeout. */
abortFailover("Replica never caught up before timeout");
return;
}
}
/* Check to see if the replica has caught up so failover can start */
client *replica = NULL;
if (server.target_replica_host) {
replica = findReplica(server.target_replica_host,
server.target_replica_port);
} else {
listIter li;
listNode *ln;
listRewind(server.slaves,&li);
/* Find any replica that has matched our repl_offset */
while((ln = listNext(&li))) {
replica = ln->value;
if (replica->repl_ack_off == server.master_repl_offset) {
char ip[NET_IP_STR_LEN], *replicaip = replica->slave_ip;
if (replicaip[0] == '\0') {
if (connPeerToString(replica->conn,ip,sizeof(ip),NULL) == -1)
continue;
replicaip = ip;
}
/* We are now failing over to this specific node */
server.target_replica_host = zstrdup(replicaip);
server.target_replica_port = replica->slave_listening_port;
break;
}
}
}
/* We've found a replica that is caught up */
if (replica && (replica->repl_ack_off == server.master_repl_offset)) {
server.failover_state = FAILOVER_IN_PROGRESS;
serverLog(LL_NOTICE,
"Failover target %s:%d is synced, failing over.",
server.target_replica_host, server.target_replica_port);
/* Designated replica is caught up, failover to it. */
replicationSetMaster(server.target_replica_host,
server.target_replica_port);
}
}

View File

@ -1282,14 +1282,17 @@ void scriptingInit(int setup) {
/* Release resources related to Lua scripting.
* This function is used in order to reset the scripting environment. */
void scriptingRelease(void) {
dictRelease(server.lua_scripts);
void scriptingRelease(int async) {
if (async)
freeLuaScriptsAsync(server.lua_scripts);
else
dictRelease(server.lua_scripts);
server.lua_scripts_mem = 0;
lua_close(server.lua);
}
void scriptingReset(void) {
scriptingRelease();
void scriptingReset(int async) {
scriptingRelease(async);
scriptingInit(0);
}
@ -1711,8 +1714,12 @@ void scriptCommand(client *c) {
" Set the debug mode for subsequent scripts executed.",
"EXISTS <sha1> [<sha1> ...]",
" Return information about the existence of the scripts in the script cache.",
"FLUSH",
"FLUSH [ASYNC|SYNC]",
" Flush the Lua scripts cache. Very dangerous on replicas.",
" When called without the optional mode argument, the behavior is determined by the",
" lazyfree-lazy-user-flush configuration directive. Valid modes are:",
" * ASYNC: Asynchronously flush the scripts cache.",
" * SYNC: Synchronously flush the scripts cache.",
"KILL",
" Kill the currently executing Lua script.",
"LOAD <script>",
@ -1720,8 +1727,19 @@ void scriptCommand(client *c) {
NULL
};
addReplyHelp(c, help);
} else if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"flush")) {
scriptingReset();
} else if (c->argc >= 2 && !strcasecmp(c->argv[1]->ptr,"flush")) {
int async = 0;
if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr,"sync")) {
async = 0;
} else if (c->argc == 3 && !strcasecmp(c->argv[2]->ptr,"async")) {
async = 1;
} else if (c->argc == 2) {
async = server.lazyfree_lazy_user_flush ? 1 : 0;
} else {
addReplyError(c,"SCRIPT FLUSH only support SYNC|ASYNC option");
return;
}
scriptingReset(async);
addReply(c,shared.ok);
replicationScriptCacheFlush();
server.dirty++; /* Propagating this command is a good idea. */

View File

@ -1157,12 +1157,80 @@ void *sds_malloc(size_t size) { return s_malloc(size); }
void *sds_realloc(void *ptr, size_t size) { return s_realloc(ptr,size); }
void sds_free(void *ptr) { s_free(ptr); }
/* Perform expansion of a template string and return the result as a newly
* allocated sds.
*
* Template variables are specified using curly brackets, e.g. {variable}.
* An opening bracket can be quoted by repeating it twice.
*/
sds sdstemplate(const char *template, sdstemplate_callback_t cb_func, void *cb_arg)
{
sds res = sdsempty();
const char *p = template;
while (*p) {
/* Find next variable, copy everything until there */
const char *sv = strchr(p, '{');
if (!sv) {
/* Not found: copy till rest of template and stop */
res = sdscat(res, p);
break;
} else if (sv > p) {
/* Found: copy anything up to the begining of the variable */
res = sdscatlen(res, p, sv - p);
}
/* Skip into variable name, handle premature end or quoting */
sv++;
if (!*sv) goto error; /* Premature end of template */
if (*sv == '{') {
/* Quoted '{' */
p = sv + 1;
res = sdscat(res, "{");
continue;
}
/* Find end of variable name, handle premature end of template */
const char *ev = strchr(sv, '}');
if (!ev) goto error;
/* Pass variable name to callback and obtain value. If callback failed,
* abort. */
sds varname = sdsnewlen(sv, ev - sv);
sds value = cb_func(varname, cb_arg);
sdsfree(varname);
if (!value) goto error;
/* Append value to result and continue */
res = sdscat(res, value);
sdsfree(value);
p = ev + 1;
}
return res;
error:
sdsfree(res);
return NULL;
}
#ifdef REDIS_TEST
#include <stdio.h>
#include <limits.h>
#include "testhelp.h"
#define UNUSED(x) (void)(x)
static sds sdsTestTemplateCallback(sds varname, void *arg) {
UNUSED(arg);
static const char *_var1 = "variable1";
static const char *_var2 = "variable2";
if (!strcmp(varname, _var1)) return sdsnew("value1");
else if (!strcmp(varname, _var2)) return sdsnew("value2");
else return NULL;
}
int sdsTest(int argc, char **argv) {
UNUSED(argc);
UNUSED(argv);
@ -1342,6 +1410,30 @@ int sdsTest(int argc, char **argv) {
sdsfree(x);
}
/* Simple template */
x = sdstemplate("v1={variable1} v2={variable2}", sdsTestTemplateCallback, NULL);
test_cond("sdstemplate() normal flow",
memcmp(x,"v1=value1 v2=value2",19) == 0);
sdsfree(x);
/* Template with callback error */
x = sdstemplate("v1={variable1} v3={doesnotexist}", sdsTestTemplateCallback, NULL);
test_cond("sdstemplate() with callback error", x == NULL);
/* Template with empty var name */
x = sdstemplate("v1={", sdsTestTemplateCallback, NULL);
test_cond("sdstemplate() with empty var name", x == NULL);
/* Template with truncated var name */
x = sdstemplate("v1={start", sdsTestTemplateCallback, NULL);
test_cond("sdstemplate() with truncated var name", x == NULL);
/* Template with quoting */
x = sdstemplate("v1={{{variable1}} {{} v2={variable2}", sdsTestTemplateCallback, NULL);
test_cond("sdstemplate() with quoting",
memcmp(x,"v1={value1} {} v2=value2",24) == 0);
sdsfree(x);
}
test_report();
return 0;

View File

@ -253,6 +253,14 @@ sds sdsmapchars(sds s, const char *from, const char *to, size_t setlen);
sds sdsjoin(char **argv, int argc, char *sep);
sds sdsjoinsds(sds *argv, int argc, const char *sep, size_t seplen);
/* Callback for sdstemplate. The function gets called by sdstemplate
* every time a variable needs to be expanded. The variable name is
* provided as variable, and the callback is expected to return a
* substitution value. Returning a NULL indicates an error.
*/
typedef sds (*sdstemplate_callback_t)(const sds variable, void *arg);
sds sdstemplate(const char *template, sdstemplate_callback_t cb_func, void *cb_arg);
/* Low level functions exposed to the user API */
sds sdsMakeRoomFor(sds s, size_t addlen);
void sdsIncrLen(sds s, ssize_t incr);

File diff suppressed because it is too large Load Diff

View File

@ -201,6 +201,14 @@ struct redisCommand redisCommandTable[] = {
"read-only fast @string",
0,NULL,1,1,1,0,0,0},
{"getex",getexCommand,-2,
"write fast @string",
0,NULL,1,1,1,0,0,0},
{"getdel",getdelCommand,2,
"write fast @string",
0,NULL,1,1,1,0,0,0},
/* Note that we can't flag set as fast, since it may perform an
* implicit DEL of a large key. */
{"set",setCommand,-3,
@ -449,15 +457,15 @@ struct redisCommand redisCommandTable[] = {
{"zunionstore",zunionstoreCommand,-4,
"write use-memory @sortedset",
0,zunionInterDiffStoreGetKeys,0,0,0,0,0,0},
0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0},
{"zinterstore",zinterstoreCommand,-4,
"write use-memory @sortedset",
0,zunionInterDiffStoreGetKeys,0,0,0,0,0,0},
0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0},
{"zdiffstore",zdiffstoreCommand,-4,
"write use-memory @sortedset",
0,zunionInterDiffStoreGetKeys,0,0,0,0,0,0},
0,zunionInterDiffStoreGetKeys,1,1,1,0,0,0},
{"zunion",zunionCommand,-3,
"read-only @sortedset",
@ -547,6 +555,10 @@ struct redisCommand redisCommandTable[] = {
"write no-script fast @sortedset @blocking",
0,NULL,1,-2,1,0,0,0},
{"zrandmember",zrandmemberCommand,-2,
"read-only random @sortedset",
0,NULL,1,1,1,0,0,0},
{"hset",hsetCommand,-4,
"write use-memory fast @hash",
0,NULL,1,1,1,0,0,0},
@ -603,6 +615,10 @@ struct redisCommand redisCommandTable[] = {
"read-only fast @hash",
0,NULL,1,1,1,0,0,0},
{"hrandfield",hrandfieldCommand,-2,
"read-only random @hash",
0,NULL,1,1,1,0,0,0},
{"hscan",hscanCommand,-3,
"read-only random @hash",
0,NULL,1,1,1,0,0,0},
@ -744,7 +760,7 @@ struct redisCommand redisCommandTable[] = {
"admin no-script",
0,NULL,0,0,0,0,0,0},
{"psync",syncCommand,3,
{"psync",syncCommand,-3,
"admin no-script",
0,NULL,0,0,0,0,0,0},
@ -941,7 +957,7 @@ struct redisCommand redisCommandTable[] = {
{"georadius_ro",georadiusroCommand,-6,
"read-only @geo",
0,georadiusGetKeys,1,1,1,0,0,0},
0,NULL,1,1,1,0,0,0},
{"georadiusbymember",georadiusbymemberCommand,-5,
"write use-memory @geo",
@ -949,7 +965,7 @@ struct redisCommand redisCommandTable[] = {
{"georadiusbymember_ro",georadiusbymemberroCommand,-5,
"read-only @geo",
0,georadiusGetKeys,1,1,1,0,0,0},
0,NULL,1,1,1,0,0,0},
{"geohash",geohashCommand,-2,
"read-only @geo",
@ -1016,11 +1032,11 @@ struct redisCommand redisCommandTable[] = {
{"xread",xreadCommand,-4,
"read-only @stream @blocking",
0,xreadGetKeys,1,1,1,0,0,0},
0,xreadGetKeys,0,0,0,0,0,0},
{"xreadgroup",xreadCommand,-7,
"write @stream @blocking",
0,xreadGetKeys,1,1,1,0,0,0},
0,xreadGetKeys,0,0,0,0,0,0},
{"xgroup",xgroupCommand,-2,
"write use-memory @stream",
@ -1084,6 +1100,10 @@ struct redisCommand redisCommandTable[] = {
{"reset",resetCommand,1,
"no-script ok-stale ok-loading fast @connection",
0,NULL,0,0,0,0,0,0},
{"failover",failoverCommand,-1,
"admin no-script ok-stale",
0,NULL,0,0,0,0,0,0}
};
@ -1444,6 +1464,17 @@ dictType hashDictType = {
NULL /* allow to expand */
};
/* Dict type without destructor */
dictType sdsReplyDictType = {
dictSdsHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
dictSdsKeyCompare, /* key compare */
NULL, /* key destructor */
NULL, /* val destructor */
NULL /* allow to expand */
};
/* Keylist hash table type has unencoded redis objects as keys and
* lists as values. It's used for blocking operations (BLPOP) and to
* map swapped keys to a list of clients waiting for this keys to be loaded. */
@ -1592,6 +1623,9 @@ void resetChildState() {
server.stat_current_cow_bytes = 0;
updateDictResizePolicy();
closeChildInfoPipe();
moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD,
REDISMODULE_SUBEVENT_FORK_CHILD_DIED,
NULL);
}
/* Return if child type is mutual exclusive with other fork children */
@ -2159,14 +2193,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
/* AOF postponed flush: Try at every cron cycle if the slow fsync
* completed. */
if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);
if (server.aof_state == AOF_ON && server.aof_flush_postponed_start)
flushAppendOnlyFile(0);
/* AOF write errors: in this case we have a buffer to flush as well and
* clear the AOF error in case of success to make the DB writable again,
* however to try every second is enough in case of 'hz' is set to
* a higher frequency. */
run_with_period(1000) {
if (server.aof_last_write_status == C_ERR)
if (server.aof_state == AOF_ON && server.aof_last_write_status == C_ERR)
flushAppendOnlyFile(0);
}
@ -2174,8 +2209,15 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
checkClientPauseTimeoutAndReturnIfPaused();
/* Replication cron function -- used to reconnect to master,
* detect transfer failures, start background RDB transfers and so forth. */
run_with_period(1000) replicationCron();
* detect transfer failures, start background RDB transfers and so forth.
*
* If Redis is trying to failover then run the replication cron faster so
* progress on the handshake happens more quickly. */
if (server.failover_state != NO_FAILOVER) {
run_with_period(100) replicationCron();
} else {
run_with_period(1000) replicationCron();
}
/* Run the Redis Cluster cron. */
run_with_period(100) {
@ -2386,12 +2428,18 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
server.get_ack_from_slaves = 0;
}
/* We may have recieved updates from clients about their current offset. NOTE:
* this can't be done where the ACK is recieved since failover will disconnect
* our clients. */
updateFailoverStatus();
/* Send the invalidation messages to clients participating to the
* client side caching protocol in broadcasting (BCAST) mode. */
trackingBroadcastInvalidationMessages();
/* Write the AOF buffer on disk */
flushAppendOnlyFile(0);
if (server.aof_state == AOF_ON)
flushAppendOnlyFile(0);
/* Handle writes with pending output buffers. */
handleClientsWithPendingWritesUsingThreads();
@ -2532,6 +2580,12 @@ void createSharedObjects(void) {
/* Used in the LMOVE/BLMOVE commands */
shared.left = createStringObject("left",4);
shared.right = createStringObject("right",5);
shared.pexpireat = createStringObject("PEXPIREAT",9);
shared.pexpire = createStringObject("PEXPIRE",7);
shared.persist = createStringObject("PERSIST",7);
shared.set = createStringObject("SET",3);
shared.pxat = createStringObject("PXAT", 4);
shared.px = createStringObject("PX",2);
for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
shared.integers[j] =
makeObjectShared(createObject(OBJ_STRING,(void*)(long)j));
@ -2634,6 +2688,13 @@ void initServerConfig(void) {
server.repl_backlog_off = 0;
server.repl_no_slaves_since = time(NULL);
/* Failover related */
server.failover_end_time = 0;
server.force_failover = 0;
server.target_replica_host = NULL;
server.target_replica_port = 0;
server.failover_state = NO_FAILOVER;
/* Client output buffer limits */
for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++)
server.client_obuf_limits[j] = clientBufferLimitsDefaults[j];
@ -2957,6 +3018,7 @@ int listenToPort(int port, int *fds, int *count) {
return C_ERR;
}
anetNonBlock(NULL,fds[*count]);
anetCloexec(fds[*count]);
(*count)++;
}
return C_OK;
@ -3095,6 +3157,7 @@ void initServer(void) {
exit(1);
}
anetNonBlock(NULL,server.sofd);
anetCloexec(server.sofd);
}
/* Abort if there are no listening sockets at all. */
@ -3557,7 +3620,7 @@ void preventCommandReplication(client *c) {
*/
void call(client *c, int flags) {
long long dirty;
ustime_t start, duration;
monotime call_timer;
int client_old_flags = c->flags;
struct redisCommand *real_cmd = c->cmd;
static long long prev_err_count;
@ -3583,9 +3646,10 @@ void call(client *c, int flags) {
dirty = server.dirty;
prev_err_count = server.stat_total_error_replies;
updateCachedTime(0);
start = server.ustime;
elapsedStart(&call_timer);
c->cmd->proc(c);
duration = ustime()-start;
const long duration = elapsedUs(call_timer);
c->duration = duration;
dirty = server.dirty-dirty;
if (dirty < 0) dirty = 0;
@ -3629,7 +3693,10 @@ void call(client *c, int flags) {
* arguments. */
robj **argv = c->original_argv ? c->original_argv : c->argv;
int argc = c->original_argv ? c->original_argc : c->argc;
slowlogPushEntryIfNeeded(c,argv,argc,duration);
/* If the client is blocked we will handle slowlog when it is unblocked . */
if (!(c->flags & CLIENT_BLOCKED)) {
slowlogPushEntryIfNeeded(c,argv,argc,duration);
}
}
freeClientOriginalArgv(c);
@ -4682,7 +4749,7 @@ sds genRedisInfoString(const char *section) {
"aof_last_cow_size:%zu\r\n"
"module_fork_in_progress:%d\r\n"
"module_fork_last_cow_size:%zu\r\n",
server.loading,
(int)server.loading,
server.stat_current_cow_bytes,
server.dirty,
server.child_type == CHILD_TYPE_RDB,
@ -4972,6 +5039,7 @@ sds genRedisInfoString(const char *section) {
}
}
info = sdscatprintf(info,
"master_failover_state:%s\r\n"
"master_replid:%s\r\n"
"master_replid2:%s\r\n"
"master_repl_offset:%lld\r\n"
@ -4980,6 +5048,7 @@ sds genRedisInfoString(const char *section) {
"repl_backlog_size:%lld\r\n"
"repl_backlog_first_byte_offset:%lld\r\n"
"repl_backlog_histlen:%lld\r\n",
getFailoverStateString(),
server.replid,
server.replid2,
server.master_repl_offset,
@ -5184,7 +5253,7 @@ static int smapsGetSharedDirty(unsigned long addr) {
FILE *f;
f = fopen("/proc/self/smaps", "r");
serverAssert(f);
if (!f) return -1;
while (1) {
if (!fgets(buf, sizeof(buf), f))
@ -5195,8 +5264,8 @@ static int smapsGetSharedDirty(unsigned long addr) {
in_mapping = from <= addr && addr < to;
if (in_mapping && !memcmp(buf, "Shared_Dirty:", 13)) {
ret = sscanf(buf, "%*s %d", &val);
serverAssert(ret == 1);
sscanf(buf, "%*s %d", &val);
/* If parsing fails, we remain with val == -1 */
break;
}
}
@ -5210,23 +5279,33 @@ static int smapsGetSharedDirty(unsigned long addr) {
* kernel is affected.
* The bug was fixed in commit ff1712f953e27f0b0718762ec17d0adb15c9fd0b
* titled: "arm64: pgtable: Ensure dirty bit is preserved across pte_wrprotect()"
* Return 1 if the kernel seems to be affected, and 0 otherwise. */
* Return -1 on unexpected test failure, 1 if the kernel seems to be affected,
* and 0 otherwise. */
int linuxMadvFreeForkBugCheck(void) {
int ret, pipefd[2];
int ret, pipefd[2] = { -1, -1 };
pid_t pid;
char *p, *q, bug_found = 0;
const long map_size = 3 * 4096;
char *p = NULL, *q;
int bug_found = 0;
long page_size = sysconf(_SC_PAGESIZE);
long map_size = 3 * page_size;
/* Create a memory map that's in our full control (not one used by the allocator). */
p = mmap(NULL, map_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
serverAssert(p != MAP_FAILED);
if (p == MAP_FAILED) {
serverLog(LL_WARNING, "Failed to mmap(): %s", strerror(errno));
return -1;
}
q = p + 4096;
q = p + page_size;
/* Split the memory map in 3 pages by setting their protection as RO|RW|RO to prevent
* Linux from merging this memory map with adjacent VMAs. */
ret = mprotect(q, 4096, PROT_READ | PROT_WRITE);
serverAssert(!ret);
ret = mprotect(q, page_size, PROT_READ | PROT_WRITE);
if (ret < 0) {
serverLog(LL_WARNING, "Failed to mprotect(): %s", strerror(errno));
bug_found = -1;
goto exit;
}
/* Write to the page once to make it resident */
*(volatile char*)q = 0;
@ -5235,8 +5314,16 @@ int linuxMadvFreeForkBugCheck(void) {
#ifndef MADV_FREE
#define MADV_FREE 8
#endif
ret = madvise(q, 4096, MADV_FREE);
serverAssert(!ret);
ret = madvise(q, page_size, MADV_FREE);
if (ret < 0) {
/* MADV_FREE is not available on older kernels that are presumably
* not affected. */
if (errno == EINVAL) goto exit;
serverLog(LL_WARNING, "Failed to madvise(): %s", strerror(errno));
bug_found = -1;
goto exit;
}
/* Write to the page after being marked for freeing, this is supposed to take
* ownership of that page again. */
@ -5244,37 +5331,47 @@ int linuxMadvFreeForkBugCheck(void) {
/* Create a pipe for the child to return the info to the parent. */
ret = pipe(pipefd);
serverAssert(!ret);
if (ret < 0) {
serverLog(LL_WARNING, "Failed to create pipe: %s", strerror(errno));
bug_found = -1;
goto exit;
}
/* Fork the process. */
pid = fork();
serverAssert(pid >= 0);
if (!pid) {
/* Child: check if the page is marked as dirty, expecing 4 (kB).
if (pid < 0) {
serverLog(LL_WARNING, "Failed to fork: %s", strerror(errno));
bug_found = -1;
goto exit;
} else if (!pid) {
/* Child: check if the page is marked as dirty, page_size in kb.
* A value of 0 means the kernel is affected by the bug. */
if (!smapsGetSharedDirty((unsigned long)q))
ret = smapsGetSharedDirty((unsigned long) q);
if (!ret)
bug_found = 1;
else if (ret == -1) /* Failed to read */
bug_found = -1;
ret = write(pipefd[1], &bug_found, 1);
serverAssert(ret == 1);
if (write(pipefd[1], &bug_found, sizeof(bug_found)) < 0)
serverLog(LL_WARNING, "Failed to write to parent: %s", strerror(errno));
exit(0);
} else {
/* Read the result from the child. */
ret = read(pipefd[0], &bug_found, 1);
serverAssert(ret == 1);
ret = read(pipefd[0], &bug_found, sizeof(bug_found));
if (ret < 0) {
serverLog(LL_WARNING, "Failed to read from child: %s", strerror(errno));
bug_found = -1;
}
/* Reap the child pid. */
serverAssert(waitpid(pid, NULL, 0) == pid);
waitpid(pid, NULL, 0);
}
exit:
/* Cleanup */
ret = close(pipefd[0]);
serverAssert(!ret);
ret = close(pipefd[1]);
serverAssert(!ret);
ret = munmap(p, map_size);
serverAssert(!ret);
if (pipefd[0] != -1) close(pipefd[0]);
if (pipefd[1] != -1) close(pipefd[1]);
if (p != NULL) munmap(p, map_size);
return bug_found;
}
@ -5470,7 +5567,7 @@ void setupChildSignalHandlers(void) {
* of the parent process, e.g. fd(socket or flock) etc.
* should close the resources not used by the child process, so that if the
* parent restarts it can bind/lock despite the child possibly still running. */
void closeClildUnusedResourceAfterFork() {
void closeChildUnusedResourceAfterFork() {
closeListeningSockets(0);
if (server.cluster_enabled && server.cluster_config_file_lock_fd != -1)
close(server.cluster_config_file_lock_fd); /* don't care if this fails */
@ -5497,7 +5594,7 @@ int redisFork(int purpose) {
server.in_fork_child = purpose;
setOOMScoreAdj(CONFIG_OOM_BGCHILD);
setupChildSignalHandlers();
closeClildUnusedResourceAfterFork();
closeChildUnusedResourceAfterFork();
} else {
/* Parent */
server.stat_total_forks++;
@ -5523,6 +5620,9 @@ int redisFork(int purpose) {
}
updateDictResizePolicy();
moduleFireServerEvent(REDISMODULE_EVENT_FORK_CHILD,
REDISMODULE_SUBEVENT_FORK_CHILD_BORN,
NULL);
}
return childpid;
}
@ -5533,7 +5633,7 @@ void sendChildCOWInfo(int ptype, int on_exit, char *pname) {
if (private_dirty) {
serverLog(on_exit ? LL_NOTICE : LL_VERBOSE,
"%s: %zu MB of memory used by copy-on-write",
pname, private_dirty);
pname, private_dirty/(1024*1024));
}
sendChildInfo(ptype, on_exit, private_dirty);
@ -5598,20 +5698,68 @@ void redisOutOfMemoryHandler(size_t allocation_size) {
allocation_size);
}
void redisSetProcTitle(char *title) {
#ifdef USE_SETPROCTITLE
char *server_mode = "";
if (server.cluster_enabled) server_mode = " [cluster]";
else if (server.sentinel_mode) server_mode = " [sentinel]";
/* Callback for sdstemplate on proc-title-template. See redis.conf for
* supported variables.
*/
static sds redisProcTitleGetVariable(const sds varname, void *arg)
{
if (!strcmp(varname, "title")) {
return sdsnew(arg);
} else if (!strcmp(varname, "listen-addr")) {
if (server.port || server.tls_port)
return sdscatprintf(sdsempty(), "%s:%u",
server.bindaddr_count ? server.bindaddr[0] : "*",
server.port ? server.port : server.tls_port);
else
return sdscatprintf(sdsempty(), "unixsocket:%s", server.unixsocket);
} else if (!strcmp(varname, "server-mode")) {
if (server.cluster_enabled) return sdsnew("[cluster]");
else if (server.sentinel_mode) return sdsnew("[sentinel]");
else return sdsempty();
} else if (!strcmp(varname, "config-file")) {
return sdsnew(server.configfile ? server.configfile : "-");
} else if (!strcmp(varname, "port")) {
return sdscatprintf(sdsempty(), "%u", server.port);
} else if (!strcmp(varname, "tls-port")) {
return sdscatprintf(sdsempty(), "%u", server.tls_port);
} else if (!strcmp(varname, "unixsocket")) {
return sdsnew(server.unixsocket);
} else
return NULL; /* Unknown variable name */
}
setproctitle("%s %s:%d%s",
title,
server.bindaddr_count ? server.bindaddr[0] : "*",
server.port ? server.port : server.tls_port,
server_mode);
/* Expand the specified proc-title-template string and return a newly
* allocated sds, or NULL. */
static sds expandProcTitleTemplate(const char *template, const char *title) {
sds res = sdstemplate(template, redisProcTitleGetVariable, (void *) title);
if (!res)
return NULL;
return sdstrim(res, " ");
}
/* Validate the specified template, returns 1 if valid or 0 otherwise. */
int validateProcTitleTemplate(const char *template) {
int ok = 1;
sds res = expandProcTitleTemplate(template, "");
if (!res)
return 0;
if (sdslen(res) == 0) ok = 0;
sdsfree(res);
return ok;
}
int redisSetProcTitle(char *title) {
#ifdef USE_SETPROCTITLE
if (!title) title = server.exec_argv[0];
sds proc_title = expandProcTitleTemplate(server.proc_title_template, title);
if (!proc_title) return C_ERR; /* Not likely, proc_title_template is validated */
setproctitle("%s", proc_title);
sdsfree(proc_title);
#else
UNUSED(title);
#endif
return C_OK;
}
void redisSetCpuAffinity(const char *cpulist) {
@ -5751,6 +5899,12 @@ int main(int argc, char **argv) {
init_genrand64(((long long) tv.tv_sec * 1000000 + tv.tv_usec) ^ getpid());
crc64_init();
/* Store umask value. Because umask(2) only offers a set-and-get API we have
* to reset it and restore it back. We do this early to avoid a potential
* race condition with threads that could be creating files or directories.
*/
umask(server.umask = umask(0777));
uint8_t hashseed[16];
getRandomBytes(hashseed,sizeof(hashseed));
dictSetHashFunctionSeed(hashseed);
@ -5843,6 +5997,7 @@ int main(int argc, char **argv) {
exit(1);
}
loadServerConfig(server.configfile, config_from_stdin, options);
if (server.sentinel_mode) loadSentinelConfigFromQueue();
sdsfree(options);
}
@ -5868,7 +6023,7 @@ int main(int argc, char **argv) {
readOOMScoreAdj();
initServer();
if (background || server.pidfile) createPidFile();
redisSetProcTitle(argv[0]);
if (server.set_proc_title) redisSetProcTitle(NULL);
redisAsciiArt();
checkTcpBacklogSettings();
@ -5878,10 +6033,17 @@ int main(int argc, char **argv) {
#ifdef __linux__
linuxMemoryWarnings();
#if defined (__arm64__)
if (linuxMadvFreeForkBugCheck()) {
serverLog(LL_WARNING,"WARNING Your kernel has a bug that could lead to data corruption during background save. Please upgrade to the latest stable kernel.");
int ret;
if ((ret = linuxMadvFreeForkBugCheck())) {
if (ret == 1)
serverLog(LL_WARNING,"WARNING Your kernel has a bug that could lead to data corruption during background save. "
"Please upgrade to the latest stable kernel.");
else
serverLog(LL_WARNING, "Failed to test the kernel for a bug that could lead to data corruption during background save. "
"Your system could be affected, please report this error.");
if (!checkIgnoreWarning("ARM64-COW-BUG")) {
serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG");
serverLog(LL_WARNING,"Redis will now exit to prevent data corruption. "
"Note that it is possible to suppress this warning by setting the following config: ignore-warnings ARM64-COW-BUG");
exit(1);
}
}

View File

@ -115,6 +115,7 @@ typedef long long ustime_t; /* microsecond time type. */
#define NET_ADDR_STR_LEN (NET_IP_STR_LEN+32) /* Must be enough for ip:port */
#define CONFIG_BINDADDR_MAX 16
#define CONFIG_MIN_RESERVED_FDS 32
#define CONFIG_DEFAULT_PROC_TITLE_TEMPLATE "{title} {listen-addr} {server-mode}"
#define ACTIVE_EXPIRE_CYCLE_SLOW 0
#define ACTIVE_EXPIRE_CYCLE_FAST 1
@ -270,6 +271,8 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
#define CLIENT_DENY_BLOCKING (1ULL<<41) /* Indicate that the client should not be blocked.
currently, turned on inside MULTI, Lua, RM_Call,
and AOF client */
#define CLIENT_REPL_RDBONLY (1ULL<<42) /* This client is a replica that only wants
RDB without replication buffer. */
/* Client block type (btype field in client structure)
* if CLIENT_BLOCKED flag is set. */
@ -317,6 +320,14 @@ typedef enum {
REPL_STATE_CONNECTED, /* Connected to master */
} repl_state;
/* The state of an in progress coordinated failover */
typedef enum {
NO_FAILOVER = 0, /* No failover in progress */
FAILOVER_WAIT_FOR_SYNC, /* Waiting for target replica to catch up */
FAILOVER_IN_PROGRESS /* Waiting for target replica to accept
* PSYNC FAILOVER request. */
} failover_state;
/* State of slaves from the POV of the master. Used in client->replstate.
* In SEND_BULK and ONLINE state the slave receives new updates
* in its output queue. In the WAIT_BGSAVE states instead the server is waiting
@ -870,6 +881,7 @@ typedef struct client {
size_t sentlen; /* Amount of bytes already sent in the current
buffer or object being sent. */
time_t ctime; /* Client creation time. */
long duration; /* Current command duration. Used for measuring latency of blocking/non-blocking cmds */
time_t lastinteraction; /* Time of the last interaction, used for timeout */
time_t obuf_soft_limit_reached_time;
uint64_t flags; /* Client flags: CLIENT_* macros. */
@ -942,6 +954,19 @@ struct moduleLoadQueueEntry {
robj **argv;
};
struct sentinelLoadQueueEntry {
int argc;
sds *argv;
int linenum;
sds line;
};
struct sentinelConfig {
list *pre_monitor_cfg;
list *monitor_cfg;
list *post_monitor_cfg;
};
struct sharedObjectsStruct {
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
*colon, *queued, *null[4], *nullarray[4], *emptymap[4], *emptyset[4],
@ -951,7 +976,8 @@ struct sharedObjectsStruct {
*busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
*unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink,
*rpop, *lpop, *lpush, *rpoplpush, *lmove, *blmove, *zpopmin, *zpopmax,
*emptyscan, *multi, *exec, *left, *right,
*emptyscan, *multi, *exec, *left, *right, *persist, *set, *pexpireat,
*pexpire, *pxat, *px,
*select[PROTO_SHARED_SELECT_CMDS],
*integers[OBJ_SHARED_INTEGERS],
*mbulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
@ -1124,6 +1150,7 @@ struct redisServer {
int config_hz; /* Configured HZ value. May be different than
the actual 'hz' field value if dynamic-hz
is enabled. */
mode_t umask; /* The umask value of the process on startup */
int hz; /* serverCron() calls frequency in hertz */
int in_fork_child; /* indication that this is a fork child */
redisDb *db;
@ -1280,6 +1307,8 @@ struct redisServer {
int supervised; /* 1 if supervised, 0 otherwise. */
int supervised_mode; /* See SUPERVISED_* */
int daemonize; /* True if running as a daemon */
int set_proc_title; /* True if change proc title */
char *proc_title_template; /* Process title template format */
clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
/* AOF persistence */
int aof_enabled; /* AOF configuration */
@ -1530,6 +1559,7 @@ struct redisServer {
int lazyfree_lazy_expire;
int lazyfree_lazy_server_del;
int lazyfree_lazy_user_del;
int lazyfree_lazy_user_flush;
/* Latency monitor */
long long latency_monitor_threshold;
dict *latency_events;
@ -1554,6 +1584,16 @@ struct redisServer {
char *bio_cpulist; /* cpu affinity list of bio thread. */
char *aof_rewrite_cpulist; /* cpu affinity list of aof rewrite process. */
char *bgsave_cpulist; /* cpu affinity list of bgsave process. */
/* Sentinel config */
struct sentinelConfig *sentinel_config; /* sentinel config to load at startup time. */
/* Coordinate failover info */
mstime_t failover_end_time; /* Deadline for failover command. */
int force_failover; /* If true then failover will be foreced at the
* deadline, otherwise failover is aborted. */
char *target_replica_host; /* Failover target host. If null during a
* failover then any replica can be used. */
int target_replica_port; /* Failover target port */
int failover_state; /* Failover state */
};
typedef struct pubsubPattern {
@ -1679,6 +1719,7 @@ extern dictType hashDictType;
extern dictType replScriptCacheDictType;
extern dictType dbExpiresDictType;
extern dictType modulesDictType;
extern dictType sdsReplyDictType;
/*-----------------------------------------------------------------------------
* Functions prototypes
@ -1728,7 +1769,8 @@ void getRandomBytes(unsigned char *p, size_t len);
uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
void exitFromChild(int retcode);
size_t redisPopcount(void *s, long count);
void redisSetProcTitle(char *title);
int redisSetProcTitle(char *title);
int validateProcTitleTemplate(const char *template);
int redisCommunicateSystemd(const char *sd_notify_msg);
void redisSetCpuAffinity(const char *cpulist);
@ -1973,6 +2015,10 @@ void feedReplicationBacklog(void *ptr, size_t len);
void showLatestBacklog(void);
void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask);
void rdbPipeWriteHandlerConnRemoved(struct connection *conn);
void clearFailoverState(void);
void updateFailoverStatus(void);
void abortFailover(const char *err);
const char *getFailoverStateString();
/* Generic persistence functions */
void startLoadingFile(FILE* fp, char* filename, int rdbflags);
@ -2042,7 +2088,7 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen);
sds ACLDefaultUserFirstPassword(void);
uint64_t ACLGetCommandCategoryFlagByName(const char *name);
int ACLAppendUserForLoading(sds *argv, int argc, int *argc_err);
char *ACLSetUserStringError(void);
const char *ACLSetUserStringError(void);
int ACLLoadConfiguredUsers(void);
sds ACLDescribeUser(user *u);
void ACLLoadUsersAtStartup(void);
@ -2236,6 +2282,7 @@ void appendServerSaveParams(time_t seconds, int changes);
void resetServerSaveParams(void);
struct rewriteConfigState; /* Forward declaration to export API. */
void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force);
void rewriteConfigMarkAsProcessed(struct rewriteConfigState *state, const char *option);
int rewriteConfig(char *path, int force_all);
void initConfigValues();
@ -2330,7 +2377,9 @@ int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uin
void initSentinelConfig(void);
void initSentinel(void);
void sentinelTimer(void);
char *sentinelHandleConfiguration(char **argv, int argc);
const char *sentinelHandleConfiguration(char **argv, int argc);
void queueSentinelConfig(sds *argv, int argc, int linenum, sds line);
void loadSentinelConfigFromQueue(void);
void sentinelIsRunning(void);
/* redis-check-rdb & aof */
@ -2344,6 +2393,7 @@ int ldbRemoveChild(pid_t pid);
void ldbKillForkedSessions(void);
int ldbPendingChildren(void);
sds luaCreateFunction(client *c, lua_State *lua, robj *body);
void freeLuaScriptsAsync(dict *lua_scripts);
/* Blocked clients */
void processUnblockedClients(void);
@ -2356,6 +2406,7 @@ void disconnectAllBlockedClients(void);
void handleClientsBlockedOnKeys(void);
void signalKeyAsReady(redisDb *db, robj *key, int type);
void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, robj *target, struct listPos *listpos, streamID *ids);
void updateStatsOnUnblock(client *c, long blocked_us, long reply_us);
/* timeout.c -- Blocked clients timeout and connections timeout. */
void addClientToTimeoutTable(client *c);
@ -2403,6 +2454,8 @@ void setnxCommand(client *c);
void setexCommand(client *c);
void psetexCommand(client *c);
void getCommand(client *c);
void getexCommand(client *c);
void getdelCommand(client *c);
void delCommand(client *c);
void unlinkCommand(client *c);
void existsCommand(client *c);
@ -2505,6 +2558,7 @@ void zpopminCommand(client *c);
void zpopmaxCommand(client *c);
void bzpopminCommand(client *c);
void bzpopmaxCommand(client *c);
void zrandmemberCommand(client *c);
void multiCommand(client *c);
void execCommand(client *c);
void discardCommand(client *c);
@ -2538,6 +2592,7 @@ void hvalsCommand(client *c);
void hgetallCommand(client *c);
void hexistsCommand(client *c);
void hscanCommand(client *c);
void hrandfieldCommand(client *c);
void configCommand(client *c);
void hincrbyCommand(client *c);
void hincrbyfloatCommand(client *c);
@ -2607,6 +2662,7 @@ void lolwutCommand(client *c);
void aclCommand(client *c);
void stralgoCommand(client *c);
void resetCommand(client *c);
void failoverCommand(client *c);
#if defined(__GNUC__)
void *calloc(size_t count, size_t size) __attribute__ ((deprecated));

View File

@ -108,6 +108,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
void streamIteratorStart(streamIterator *si, stream *s, streamID *start, streamID *end, int rev);
int streamIteratorGetID(streamIterator *si, streamID *id, int64_t *numfields);
void streamIteratorGetField(streamIterator *si, unsigned char **fieldptr, unsigned char **valueptr, int64_t *fieldlen, int64_t *valuelen);
void streamIteratorRemoveEntry(streamIterator *si, streamID *current);
void streamIteratorStop(streamIterator *si);
streamCG *streamLookupCG(stream *s, sds groupname);
streamConsumer *streamLookupConsumer(streamCG *cg, sds name, int flags, int *created);
@ -121,5 +122,11 @@ int streamDecrID(streamID *id);
void streamPropagateConsumerCreation(client *c, robj *key, robj *groupname, sds consumername);
robj *streamDup(robj *o);
int streamValidateListpackIntegrity(unsigned char *lp, size_t size, int deep);
int streamParseID(const robj *o, streamID *id);
robj *createObjectFromStreamID(streamID *id);
int streamAppendItem(stream *s, robj **argv, int64_t numfields, streamID *added_id, streamID *use_id);
int streamDeleteItem(stream *s, streamID *id);
int64_t streamTrimByLength(stream *s, long long maxlen, int approx);
int64_t streamTrimByID(stream *s, streamID minid, int approx);
#endif

View File

@ -598,6 +598,42 @@ int hashZiplistValidateIntegrity(unsigned char *zl, size_t size, int deep) {
return ret;
}
/* Create a new sds string from the ziplist entry. */
sds hashSdsFromZiplistEntry(ziplistEntry *e) {
return e->sval ? sdsnewlen(e->sval, e->slen) : sdsfromlonglong(e->lval);
}
/* Reply with bulk string from the ziplist entry. */
void hashReplyFromZiplistEntry(client *c, ziplistEntry *e) {
if (e->sval)
addReplyBulkCBuffer(c, e->sval, e->slen);
else
addReplyBulkLongLong(c, e->lval);
}
/* Return random element from a non empty hash.
* 'key' and 'val' will be set to hold the element.
* The memory in them is not to be freed or modified by the caller.
* 'val' can be NULL in which case it's not extracted. */
void hashTypeRandomElement(robj *hashobj, unsigned long hashsize, ziplistEntry *key, ziplistEntry *val) {
if (hashobj->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictGetFairRandomKey(hashobj->ptr);
sds s = dictGetKey(de);
key->sval = (unsigned char*)s;
key->slen = sdslen(s);
if (val) {
sds s = dictGetVal(de);
val->sval = (unsigned char*)s;
val->slen = sdslen(s);
}
} else if (hashobj->encoding == OBJ_ENCODING_ZIPLIST) {
ziplistRandomPair(hashobj->ptr, hashsize, key, val);
} else {
serverPanic("Unknown hash encoding");
}
}
/*-----------------------------------------------------------------------------
* Hash type commands
*----------------------------------------------------------------------------*/
@ -922,3 +958,220 @@ void hscanCommand(client *c) {
checkType(c,o,OBJ_HASH)) return;
scanGenericCommand(c,o,cursor);
}
/* How many times bigger should be the hash compared to the requested size
* for us to not use the "remove elements" strategy? Read later in the
* implementation for more info. */
#define HRANDFIELD_SUB_STRATEGY_MUL 3
void hrandfieldWithCountCommand(client *c, long l, int withvalues) {
unsigned long count, size;
int uniq = 1;
robj *hash;
if ((hash = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))
== NULL || checkType(c,hash,OBJ_HASH)) return;
size = hashTypeLength(hash);
if(l >= 0) {
count = (unsigned long) l;
} else {
count = -l;
uniq = 0;
}
/* If count is zero, serve it ASAP to avoid special cases later. */
if (count == 0) {
addReply(c,shared.emptyarray);
return;
}
/* CASE 1: The count was negative, so the extraction method is just:
* "return N random elements" sampling the whole set every time.
* This case is trivial and can be served without auxiliary data
* structures. This case is the only one that also needs to return the
* elements in random order. */
if (!uniq || count == 1) {
if (withvalues && c->resp == 2)
addReplyArrayLen(c, count*2);
else
addReplyArrayLen(c, count);
if (hash->encoding == OBJ_ENCODING_HT) {
sds key, value;
while (count--) {
dictEntry *de = dictGetRandomKey(hash->ptr);
key = dictGetKey(de);
value = dictGetVal(de);
if (withvalues && c->resp > 2)
addReplyArrayLen(c,2);
addReplyBulkCBuffer(c, key, sdslen(key));
if (withvalues)
addReplyBulkCBuffer(c, value, sdslen(value));
}
} else if (hash->encoding == OBJ_ENCODING_ZIPLIST) {
ziplistEntry *keys, *vals = NULL;
keys = zmalloc(sizeof(ziplistEntry)*count);
if (withvalues)
vals = zmalloc(sizeof(ziplistEntry)*count);
ziplistRandomPairs(hash->ptr, count, keys, vals);
for (unsigned long i = 0; i < count; i++) {
if (withvalues && c->resp > 2)
addReplyArrayLen(c,2);
if (keys[i].sval)
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
else
addReplyBulkLongLong(c, keys[i].lval);
if (withvalues) {
if (vals[i].sval)
addReplyBulkCBuffer(c, vals[i].sval, vals[i].slen);
else
addReplyBulkLongLong(c, vals[i].lval);
}
}
zfree(keys);
zfree(vals);
}
return;
}
/* Initiate reply count, RESP3 responds with nested array, RESP2 with flat one. */
long reply_size = count < size ? count : size;
if (withvalues && c->resp == 2)
addReplyArrayLen(c, reply_size*2);
else
addReplyArrayLen(c, reply_size);
/* CASE 2:
* The number of requested elements is greater than the number of
* elements inside the hash: simply return the whole hash. */
if(count >= size) {
hashTypeIterator *hi = hashTypeInitIterator(hash);
while (hashTypeNext(hi) != C_ERR) {
if (withvalues && c->resp > 2)
addReplyArrayLen(c,2);
addHashIteratorCursorToReply(c, hi, OBJ_HASH_KEY);
if (withvalues)
addHashIteratorCursorToReply(c, hi, OBJ_HASH_VALUE);
}
hashTypeReleaseIterator(hi);
return;
}
/* CASE 3:
* The number of elements inside the hash is not greater than
* HRANDFIELD_SUB_STRATEGY_MUL times the number of requested elements.
* In this case we create a hash from scratch with all the elements, and
* subtract random elements to reach the requested number of elements.
*
* This is done because if the number of requested elements is just
* a bit less than the number of elements in the hash, the natural approach
* used into CASE 4 is highly inefficient. */
if (count*HRANDFIELD_SUB_STRATEGY_MUL > size) {
dict *d = dictCreate(&sdsReplyDictType, NULL);
hashTypeIterator *hi = hashTypeInitIterator(hash);
/* Add all the elements into the temporary dictionary. */
while ((hashTypeNext(hi)) != C_ERR) {
int ret = DICT_ERR;
sds key, value = NULL;
key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY);
if (withvalues)
value = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_VALUE);
ret = dictAdd(d, key, value);
serverAssert(ret == DICT_OK);
}
serverAssert(dictSize(d) == size);
hashTypeReleaseIterator(hi);
/* Remove random elements to reach the right count. */
while (size > count) {
dictEntry *de;
de = dictGetRandomKey(d);
dictUnlink(d,dictGetKey(de));
sdsfree(dictGetKey(de));
sdsfree(dictGetVal(de));
dictFreeUnlinkedEntry(d,de);
size--;
}
/* Reply with what's in the dict and release memory */
dictIterator *di;
dictEntry *de;
di = dictGetIterator(d);
while ((de = dictNext(di)) != NULL) {
sds key = dictGetKey(de);
sds value = dictGetVal(de);
if (withvalues && c->resp > 2)
addReplyArrayLen(c,2);
addReplyBulkSds(c, key);
if (withvalues)
addReplyBulkSds(c, value);
}
dictReleaseIterator(di);
dictRelease(d);
}
/* CASE 4: We have a big hash compared to the requested number of elements.
* In this case we can simply get random elements from the hash and add
* to the temporary hash, trying to eventually get enough unique elements
* to reach the specified count. */
else {
unsigned long added = 0;
ziplistEntry key, value;
dict *d = dictCreate(&hashDictType, NULL);
while(added < count) {
hashTypeRandomElement(hash, size, &key, withvalues? &value : NULL);
/* Try to add the object to the dictionary. If it already exists
* free it, otherwise increment the number of objects we have
* in the result dictionary. */
sds skey = hashSdsFromZiplistEntry(&key);
if (dictAdd(d,skey,NULL) != DICT_OK) {
sdsfree(skey);
continue;
}
added++;
/* We can reply right away, so that we don't need to store the value in the dict. */
if (withvalues && c->resp > 2)
addReplyArrayLen(c,2);
hashReplyFromZiplistEntry(c, &key);
if (withvalues)
hashReplyFromZiplistEntry(c, &value);
}
/* Release memory */
dictRelease(d);
}
}
/* HRANDFIELD [<count> WITHVALUES] */
void hrandfieldCommand(client *c) {
long l;
int withvalues = 0;
robj *hash;
ziplistEntry ele;
if (c->argc >= 3) {
if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
if (c->argc > 4 || (c->argc == 4 && strcasecmp(c->argv[3]->ptr,"withvalues"))) {
addReplyErrorObject(c,shared.syntaxerr);
return;
} else if (c->argc == 4)
withvalues = 1;
hrandfieldWithCountCommand(c, l, withvalues);
return;
}
/* Handle variant without <count> argument. Reply with simple bulk string */
if ((hash = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))== NULL ||
checkType(c,hash,OBJ_HASH)) {
return;
}
hashTypeRandomElement(hash,hashTypeLength(hash),&ele,NULL);
hashReplyFromZiplistEntry(c, &ele);
}

View File

@ -690,8 +690,9 @@ void srandmemberWithCountCommand(client *c) {
/* CASE 1: The count was negative, so the extraction method is just:
* "return N random elements" sampling the whole set every time.
* This case is trivial and can be served without auxiliary data
* structures. */
if (!uniq) {
* structures. This case is the only one that also needs to return the
* elements in random order. */
if (!uniq || count == 1) {
addReplySetLen(c,count);
while(count--) {
encoding = setTypeRandomElement(set,&ele,&llele);
@ -713,7 +714,7 @@ void srandmemberWithCountCommand(client *c) {
}
/* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
d = dictCreate(&objectKeyPointerValueDictType,NULL);
d = dictCreate(&sdsReplyDictType,NULL);
/* CASE 3:
* The number of elements inside the set is not greater than
@ -729,13 +730,13 @@ void srandmemberWithCountCommand(client *c) {
/* Add all the elements into the temporary dictionary. */
si = setTypeInitIterator(set);
while((encoding = setTypeNext(si,&ele,&llele)) != -1) {
while ((encoding = setTypeNext(si,&ele,&llele)) != -1) {
int retval = DICT_ERR;
if (encoding == OBJ_ENCODING_INTSET) {
retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
retval = dictAdd(d,sdsfromlonglong(llele),NULL);
} else {
retval = dictAdd(d,createStringObject(ele,sdslen(ele)),NULL);
retval = dictAdd(d,sdsdup(ele),NULL);
}
serverAssert(retval == DICT_OK);
}
@ -743,11 +744,12 @@ void srandmemberWithCountCommand(client *c) {
serverAssert(dictSize(d) == size);
/* Remove random elements to reach the right count. */
while(size > count) {
while (size > count) {
dictEntry *de;
de = dictGetRandomKey(d);
dictDelete(d,dictGetKey(de));
dictUnlink(d,dictGetKey(de));
sdsfree(dictGetKey(de));
dictFreeUnlinkedEntry(d,de);
size--;
}
}
@ -758,22 +760,22 @@ void srandmemberWithCountCommand(client *c) {
* to reach the specified count. */
else {
unsigned long added = 0;
robj *objele;
sds sdsele;
while(added < count) {
while (added < count) {
encoding = setTypeRandomElement(set,&ele,&llele);
if (encoding == OBJ_ENCODING_INTSET) {
objele = createStringObjectFromLongLong(llele);
sdsele = sdsfromlonglong(llele);
} else {
objele = createStringObject(ele,sdslen(ele));
sdsele = sdsdup(ele);
}
/* Try to add the object to the dictionary. If it already exists
* free it, otherwise increment the number of objects we have
* in the result dictionary. */
if (dictAdd(d,objele,NULL) == DICT_OK)
if (dictAdd(d,sdsele,NULL) == DICT_OK)
added++;
else
decrRefCount(objele);
sdsfree(sdsele);
}
}
@ -785,12 +787,13 @@ void srandmemberWithCountCommand(client *c) {
addReplySetLen(c,count);
di = dictGetIterator(d);
while((de = dictNext(di)) != NULL)
addReplyBulk(c,dictGetKey(de));
addReplyBulkSds(c,dictGetKey(de));
dictReleaseIterator(di);
dictRelease(d);
}
}
/* SRANDMEMBER [<count>] */
void srandmemberCommand(client *c) {
robj *set;
sds ele;
@ -805,6 +808,7 @@ void srandmemberCommand(client *c) {
return;
}
/* Handle variant without <count> argument. Reply with simple bulk string */
if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))
== NULL || checkType(c,set,OBJ_SET)) return;

View File

@ -818,6 +818,28 @@ int64_t streamTrim(stream *s, streamAddTrimArgs *args) {
return deleted;
}
/* Trims a stream by length. Returns the number of deleted items. */
int64_t streamTrimByLength(stream *s, long long maxlen, int approx) {
streamAddTrimArgs args = {
.trim_strategy = TRIM_STRATEGY_MAXLEN,
.approx_trim = approx,
.limit = approx ? 100 * server.stream_node_max_entries : 0,
.maxlen = maxlen
};
return streamTrim(s, &args);
}
/* Trims a stream by minimum ID. Returns the number of deleted items. */
int64_t streamTrimByID(stream *s, streamID minid, int approx) {
streamAddTrimArgs args = {
.trim_strategy = TRIM_STRATEGY_MINID,
.approx_trim = approx,
.limit = approx ? 100 * server.stream_node_max_entries : 0,
.minid = minid
};
return streamTrim(s, &args);
}
/* Parse the arguements of XADD/XTRIM.
*
* See streamAddTrimArgs for more details about the arguments handled.
@ -1625,7 +1647,7 @@ robj *streamTypeLookupWriteOrCreate(client *c, robj *key, int no_create) {
* treated as an invalid ID.
*
* If 'c' is set to NULL, no reply is sent to the client. */
int streamGenericParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int strict) {
int streamGenericParseIDOrReply(client *c, const robj *o, streamID *id, uint64_t missing_seq, int strict) {
char buf[128];
if (sdslen(o->ptr) > sizeof(buf)-1) goto invalid;
memcpy(buf,o->ptr,sdslen(o->ptr)+1);
@ -1661,6 +1683,11 @@ invalid:
return C_ERR;
}
/* Wrapper for streamGenericParseIDOrReply() used by module API. */
int streamParseID(const robj *o, streamID *id) {
return streamGenericParseIDOrReply(NULL, o, id, 0, 0);
}
/* Wrapper for streamGenericParseIDOrReply() with 'strict' argument set to
* 0, to be used when - and + are acceptable IDs. */
int streamParseIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq) {

View File

@ -61,13 +61,16 @@ static int checkStringLength(client *c, long long size) {
* If ok_reply is NULL "+OK" is used.
* If abort_reply is NULL, "$-1" is used. */
#define OBJ_SET_NO_FLAGS 0
#define OBJ_NO_FLAGS 0
#define OBJ_SET_NX (1<<0) /* Set if key not exists. */
#define OBJ_SET_XX (1<<1) /* Set if key exists. */
#define OBJ_SET_EX (1<<2) /* Set if time in seconds is given */
#define OBJ_SET_PX (1<<3) /* Set if time in ms in given */
#define OBJ_SET_KEEPTTL (1<<4) /* Set and keep the ttl */
#define OBJ_EX (1<<2) /* Set if time in seconds is given */
#define OBJ_PX (1<<3) /* Set if time in ms in given */
#define OBJ_KEEPTTL (1<<4) /* Set and keep the ttl */
#define OBJ_SET_GET (1<<5) /* Set if want to get key before set */
#define OBJ_EXAT (1<<6) /* Set if timestamp in second is given */
#define OBJ_PXAT (1<<7) /* Set if timestamp in ms is given */
#define OBJ_PERSIST (1<<8) /* Set if we need to remove the ttl */
void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
long long milliseconds = 0; /* initialized to avoid any harmness warning */
@ -93,77 +96,41 @@ void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire,
if (getGenericCommand(c) == C_ERR) return;
}
genericSetKey(c,c->db,key,val,flags & OBJ_SET_KEEPTTL,1);
genericSetKey(c,c->db,key, val,flags & OBJ_KEEPTTL,1);
server.dirty++;
if (expire) setExpire(c,c->db,key,mstime()+milliseconds);
notifyKeyspaceEvent(NOTIFY_STRING,"set",key,c->db->id);
if (expire) notifyKeyspaceEvent(NOTIFY_GENERIC,
"expire",key,c->db->id);
if (expire) {
robj *exp = shared.pxat;
if ((flags & OBJ_PX) || (flags & OBJ_EX)) {
setExpire(c,c->db,key,milliseconds + mstime());
exp = shared.px;
} else {
setExpire(c,c->db,key,milliseconds);
}
notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id);
/* Propagate as SET Key Value PXAT millisecond-timestamp if there is EXAT/PXAT or
* propagate as SET Key Value PX millisecond if there is EX/PX flag.
*
* Additionally when we propagate the SET with PX (relative millisecond) we translate
* it again to SET with PXAT for the AOF.
*
* Additional care is required while modifying the argument order. AOF relies on the
* exp argument being at index 3. (see feedAppendOnlyFile)
* */
robj *millisecondObj = createStringObjectFromLongLong(milliseconds);
rewriteClientCommandVector(c,5,shared.set,key,val,exp,millisecondObj);
decrRefCount(millisecondObj);
}
if (!(flags & OBJ_SET_GET)) {
addReply(c, ok_reply ? ok_reply : shared.ok);
}
}
/* SET key value [NX] [XX] [KEEPTTL] [GET] [EX <seconds>] [PX <milliseconds>] */
void setCommand(client *c) {
int j;
robj *expire = NULL;
int unit = UNIT_SECONDS;
int flags = OBJ_SET_NO_FLAGS;
for (j = 3; j < c->argc; j++) {
char *a = c->argv[j]->ptr;
robj *next = (j == c->argc-1) ? NULL : c->argv[j+1];
if ((a[0] == 'n' || a[0] == 'N') &&
(a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
!(flags & OBJ_SET_XX) && !(flags & OBJ_SET_GET))
{
flags |= OBJ_SET_NX;
} else if ((a[0] == 'x' || a[0] == 'X') &&
(a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
!(flags & OBJ_SET_NX))
{
flags |= OBJ_SET_XX;
} else if ((a[0] == 'g' || a[0] == 'G') &&
(a[1] == 'e' || a[1] == 'E') &&
(a[2] == 't' || a[2] == 'T') && a[3] == '\0' &&
!(flags & OBJ_SET_NX)) {
flags |= OBJ_SET_GET;
} else if (!strcasecmp(c->argv[j]->ptr,"KEEPTTL") &&
!(flags & OBJ_SET_EX) && !(flags & OBJ_SET_PX))
{
flags |= OBJ_SET_KEEPTTL;
} else if ((a[0] == 'e' || a[0] == 'E') &&
(a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
!(flags & OBJ_SET_KEEPTTL) &&
!(flags & OBJ_SET_PX) && next)
{
flags |= OBJ_SET_EX;
unit = UNIT_SECONDS;
expire = next;
j++;
} else if ((a[0] == 'p' || a[0] == 'P') &&
(a[1] == 'x' || a[1] == 'X') && a[2] == '\0' &&
!(flags & OBJ_SET_KEEPTTL) &&
!(flags & OBJ_SET_EX) && next)
{
flags |= OBJ_SET_PX;
unit = UNIT_MILLISECONDS;
expire = next;
j++;
} else {
addReplyErrorObject(c,shared.syntaxerr);
return;
}
}
c->argv[2] = tryObjectEncoding(c->argv[2]);
setGenericCommand(c,flags,c->argv[1],c->argv[2],expire,unit,NULL,NULL);
/* Propagate without the GET argument */
if (flags & OBJ_SET_GET) {
/* Propagate without the GET argument (Isn't needed if we had expire since in that case we completely re-written the command argv) */
if ((flags & OBJ_SET_GET) && !expire) {
int argc = 0;
int j;
robj **argv = zmalloc((c->argc-1)*sizeof(robj*));
for (j=0; j < c->argc; j++) {
char *a = c->argv[j]->ptr;
@ -180,6 +147,123 @@ void setCommand(client *c) {
}
}
#define COMMAND_GET 0
#define COMMAND_SET 1
/*
* The parseExtendedStringArgumentsOrReply() function performs the common validation for extended
* string arguments used in SET and GET command.
*
* Get specific commands - PERSIST/DEL
* Set specific commands - XX/NX/GET
* Common commands - EX/EXAT/PX/PXAT/KEEPTTL
*
* Function takes pointers to client, flags, unit, pointer to pointer of expire obj if needed
* to be determined and command_type which can be COMMAND_GET or COMMAND_SET.
*
* If there are any syntax violations C_ERR is returned else C_OK is returned.
*
* Input flags are updated upon parsing the arguments. Unit and expire are updated if there are any
* EX/EXAT/PX/PXAT arguments. Unit is updated to millisecond if PX/PXAT is set.
*/
int parseExtendedStringArgumentsOrReply(client *c, int *flags, int *unit, robj **expire, int command_type) {
int j = command_type == COMMAND_GET ? 2 : 3;
for (; j < c->argc; j++) {
char *opt = c->argv[j]->ptr;
robj *next = (j == c->argc-1) ? NULL : c->argv[j+1];
if ((opt[0] == 'n' || opt[0] == 'N') &&
(opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
!(*flags & OBJ_SET_XX) && !(*flags & OBJ_SET_GET) && (command_type == COMMAND_SET))
{
*flags |= OBJ_SET_NX;
} else if ((opt[0] == 'x' || opt[0] == 'X') &&
(opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
!(*flags & OBJ_SET_NX) && (command_type == COMMAND_SET))
{
*flags |= OBJ_SET_XX;
} else if ((opt[0] == 'g' || opt[0] == 'G') &&
(opt[1] == 'e' || opt[1] == 'E') &&
(opt[2] == 't' || opt[2] == 'T') && opt[3] == '\0' &&
!(*flags & OBJ_SET_NX) && (command_type == COMMAND_SET))
{
*flags |= OBJ_SET_GET;
} else if (!strcasecmp(opt, "KEEPTTL") && !(*flags & OBJ_PERSIST) &&
!(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
!(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) && (command_type == COMMAND_SET))
{
*flags |= OBJ_KEEPTTL;
} else if (!strcasecmp(opt,"PERSIST") && (command_type == COMMAND_GET) &&
!(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
!(*flags & OBJ_PX) && !(*flags & OBJ_PXAT) &&
!(*flags & OBJ_KEEPTTL))
{
*flags |= OBJ_PERSIST;
} else if ((opt[0] == 'e' || opt[0] == 'E') &&
(opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
!(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
!(*flags & OBJ_EXAT) && !(*flags & OBJ_PX) &&
!(*flags & OBJ_PXAT) && next)
{
*flags |= OBJ_EX;
*expire = next;
j++;
} else if ((opt[0] == 'p' || opt[0] == 'P') &&
(opt[1] == 'x' || opt[1] == 'X') && opt[2] == '\0' &&
!(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
!(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
!(*flags & OBJ_PXAT) && next)
{
*flags |= OBJ_PX;
*unit = UNIT_MILLISECONDS;
*expire = next;
j++;
} else if ((opt[0] == 'e' || opt[0] == 'E') &&
(opt[1] == 'x' || opt[1] == 'X') &&
(opt[2] == 'a' || opt[2] == 'A') &&
(opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' &&
!(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
!(*flags & OBJ_EX) && !(*flags & OBJ_PX) &&
!(*flags & OBJ_PXAT) && next)
{
*flags |= OBJ_EXAT;
*expire = next;
j++;
} else if ((opt[0] == 'p' || opt[0] == 'P') &&
(opt[1] == 'x' || opt[1] == 'X') &&
(opt[2] == 'a' || opt[2] == 'A') &&
(opt[3] == 't' || opt[3] == 'T') && opt[4] == '\0' &&
!(*flags & OBJ_KEEPTTL) && !(*flags & OBJ_PERSIST) &&
!(*flags & OBJ_EX) && !(*flags & OBJ_EXAT) &&
!(*flags & OBJ_PX) && next)
{
*flags |= OBJ_PXAT;
*unit = UNIT_MILLISECONDS;
*expire = next;
j++;
} else {
addReplyErrorObject(c,shared.syntaxerr);
return C_ERR;
}
}
return C_OK;
}
/* SET key value [NX] [XX] [KEEPTTL] [GET] [EX <seconds>] [PX <milliseconds>]
* [EXAT <seconds-timestamp>][PXAT <milliseconds-timestamp>] */
void setCommand(client *c) {
robj *expire = NULL;
int unit = UNIT_SECONDS;
int flags = OBJ_NO_FLAGS;
if (parseExtendedStringArgumentsOrReply(c,&flags,&unit,&expire,COMMAND_SET) != C_OK) {
return;
}
c->argv[2] = tryObjectEncoding(c->argv[2]);
setGenericCommand(c,flags,c->argv[1],c->argv[2],expire,unit,NULL,NULL);
}
void setnxCommand(client *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]);
setGenericCommand(c,OBJ_SET_NX,c->argv[1],c->argv[2],NULL,0,shared.cone,shared.czero);
@ -187,12 +271,12 @@ void setnxCommand(client *c) {
void setexCommand(client *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL);
setGenericCommand(c,OBJ_EX,c->argv[1],c->argv[3],c->argv[2],UNIT_SECONDS,NULL,NULL);
}
void psetexCommand(client *c) {
c->argv[3] = tryObjectEncoding(c->argv[3]);
setGenericCommand(c,OBJ_SET_NO_FLAGS,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL);
setGenericCommand(c,OBJ_PX,c->argv[1],c->argv[3],c->argv[2],UNIT_MILLISECONDS,NULL,NULL);
}
int getGenericCommand(client *c) {
@ -213,6 +297,112 @@ void getCommand(client *c) {
getGenericCommand(c);
}
/*
* GETEX <key> [PERSIST][EX seconds][PX milliseconds][EXAT seconds-timestamp][PXAT milliseconds-timestamp]
*
* The getexCommand() function implements extended options and variants of the GET command. Unlike GET
* command this command is not read-only.
*
* The default behavior when no options are specified is same as GET and does not alter any TTL.
*
* Only one of the below options can be used at a given time.
*
* 1. PERSIST removes any TTL associated with the key.
* 2. EX Set expiry TTL in seconds.
* 3. PX Set expiry TTL in milliseconds.
* 4. EXAT Same like EX instead of specifying the number of seconds representing the TTL
* (time to live), it takes an absolute Unix timestamp
* 5. PXAT Same like PX instead of specifying the number of milliseconds representing the TTL
* (time to live), it takes an absolute Unix timestamp
*
* Command would either return the bulk string, error or nil.
*/
void getexCommand(client *c) {
robj *expire = NULL;
int unit = UNIT_SECONDS;
int flags = OBJ_NO_FLAGS;
if (parseExtendedStringArgumentsOrReply(c,&flags,&unit,&expire,COMMAND_GET) != C_OK) {
return;
}
robj *o;
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp])) == NULL)
return;
if (checkType(c,o,OBJ_STRING)) {
return;
}
long long milliseconds = 0;
/* Validate the expiration time value first */
if (expire) {
if (getLongLongFromObjectOrReply(c, expire, &milliseconds, NULL) != C_OK)
return;
if (milliseconds <= 0) {
addReplyErrorFormat(c,"invalid expire time in %s",c->cmd->name);
return;
}
if (unit == UNIT_SECONDS) milliseconds *= 1000;
}
/* We need to do this before we expire the key or delete it */
addReplyBulk(c,o);
/* This command is never propagated as is. It is either propagated as PEXPIRE[AT],DEL,UNLINK or PERSIST.
* This why it doesn't need special handling in feedAppendOnlyFile to convert relative expire time to absolute one. */
if (((flags & OBJ_PXAT) || (flags & OBJ_EXAT)) && checkAlreadyExpired(milliseconds)) {
/* When PXAT/EXAT absolute timestamp is specified, there can be a chance that timestamp
* has already elapsed so delete the key in that case. */
int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db, c->argv[1]) :
dbSyncDelete(c->db, c->argv[1]);
serverAssert(deleted);
robj *aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
rewriteClientCommandVector(c,2,aux,c->argv[1]);
signalModifiedKey(c, c->db, c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id);
server.dirty++;
} else if (expire) {
robj *exp = shared.pexpireat;
if ((flags & OBJ_PX) || (flags & OBJ_EX)) {
setExpire(c,c->db,c->argv[1],milliseconds + mstime());
exp = shared.pexpire;
} else {
setExpire(c,c->db,c->argv[1],milliseconds);
}
robj* millisecondObj = createStringObjectFromLongLong(milliseconds);
rewriteClientCommandVector(c,3,exp,c->argv[1],millisecondObj);
decrRefCount(millisecondObj);
signalModifiedKey(c, c->db, c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",c->argv[1],c->db->id);
server.dirty++;
} else if (flags & OBJ_PERSIST) {
if (removeExpire(c->db, c->argv[1])) {
signalModifiedKey(c, c->db, c->argv[1]);
rewriteClientCommandVector(c, 2, shared.persist, c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"persist",c->argv[1],c->db->id);
server.dirty++;
}
}
}
void getdelCommand(client *c) {
if (getGenericCommand(c) == C_ERR) return;
int deleted = server.lazyfree_lazy_user_del ? dbAsyncDelete(c->db, c->argv[1]) :
dbSyncDelete(c->db, c->argv[1]);
if (deleted) {
/* Propagate as DEL/UNLINK command */
robj *aux = server.lazyfree_lazy_user_del ? shared.unlink : shared.del;
rewriteClientCommandVector(c,2,aux,c->argv[1]);
signalModifiedKey(c, c->db, c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC, "del", c->argv[1], c->db->id);
server.dirty++;
}
}
void getsetCommand(client *c) {
if (getGenericCommand(c) == C_ERR) return;
c->argv[2] = tryObjectEncoding(c->argv[2]);
@ -221,9 +411,7 @@ void getsetCommand(client *c) {
server.dirty++;
/* Propagate as SET command */
robj *setcmd = createStringObject("SET",3);
rewriteClientCommandArgument(c,0,setcmd);
decrRefCount(setcmd);
rewriteClientCommandArgument(c,0,shared.set);
}
void setrangeCommand(client *c) {
@ -443,7 +631,7 @@ void decrbyCommand(client *c) {
void incrbyfloatCommand(client *c) {
long double incr, value;
robj *o, *new, *aux1, *aux2;
robj *o, *new, *aux;
o = lookupKeyWrite(c->db,c->argv[1]);
if (checkType(c,o,OBJ_STRING)) return;
@ -469,13 +657,11 @@ void incrbyfloatCommand(client *c) {
/* Always replicate INCRBYFLOAT as a SET command with the final value
* in order to make sure that differences in float precision or formatting
* will not create differences in replicas or after an AOF restart. */
aux1 = createStringObject("SET",3);
rewriteClientCommandArgument(c,0,aux1);
decrRefCount(aux1);
rewriteClientCommandArgument(c,0,shared.set);
rewriteClientCommandArgument(c,2,new);
aux2 = createStringObject("KEEPTTL",7);
rewriteClientCommandArgument(c,3,aux2);
decrRefCount(aux2);
aux = createStringObject("KEEPTTL",7);
rewriteClientCommandArgument(c,3,aux);
decrRefCount(aux);
}
void appendCommand(client *c) {

View File

@ -721,20 +721,26 @@ zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range) {
* Ziplist-backed sorted set API
*----------------------------------------------------------------------------*/
double zzlStrtod(unsigned char *vstr, unsigned int vlen) {
char buf[128];
if (vlen > sizeof(buf))
vlen = sizeof(buf);
memcpy(buf,vstr,vlen);
buf[vlen] = '\0';
return strtod(buf,NULL);
}
double zzlGetScore(unsigned char *sptr) {
unsigned char *vstr;
unsigned int vlen;
long long vlong;
char buf[128];
double score;
serverAssert(sptr != NULL);
serverAssert(ziplistGet(sptr,&vstr,&vlen,&vlong));
if (vstr) {
memcpy(buf,vstr,vlen);
buf[vlen] = '\0';
score = strtod(buf,NULL);
score = zzlStrtod(vstr,vlen);
} else {
score = vlong;
}
@ -1653,6 +1659,48 @@ int zsetZiplistValidateIntegrity(unsigned char *zl, size_t size, int deep) {
return ret;
}
/* Create a new sds string from the ziplist entry. */
sds zsetSdsFromZiplistEntry(ziplistEntry *e) {
return e->sval ? sdsnewlen(e->sval, e->slen) : sdsfromlonglong(e->lval);
}
/* Reply with bulk string from the ziplist entry. */
void zsetReplyFromZiplistEntry(client *c, ziplistEntry *e) {
if (e->sval)
addReplyBulkCBuffer(c, e->sval, e->slen);
else
addReplyBulkLongLong(c, e->lval);
}
/* Return random element from a non empty zset.
* 'key' and 'val' will be set to hold the element.
* The memory in `key` is not to be freed or modified by the caller.
* 'score' can be NULL in which case it's not extracted. */
void zsetTypeRandomElement(robj *zsetobj, unsigned long zsetsize, ziplistEntry *key, double *score) {
if (zsetobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zsetobj->ptr;
dictEntry *de = dictGetFairRandomKey(zs->dict);
sds s = dictGetKey(de);
key->sval = (unsigned char*)s;
key->slen = sdslen(s);
if (score)
*score = *(double*)dictGetVal(de);
} else if (zsetobj->encoding == OBJ_ENCODING_ZIPLIST) {
ziplistEntry val;
ziplistRandomPair(zsetobj->ptr, zsetsize, key, &val);
if (score) {
if (val.sval) {
*score = zzlStrtod(val.sval,val.slen);
} else {
*score = (double)val.lval;
}
}
} else {
serverPanic("Unknown zset encoding");
}
}
/*-----------------------------------------------------------------------------
* Sorted set commands
*----------------------------------------------------------------------------*/
@ -2543,7 +2591,9 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in
/* read keys to be used for input */
src = zcalloc(sizeof(zsetopsrc) * setnum);
for (i = 0, j = numkeysIndex+1; i < setnum; i++, j++) {
robj *obj = lookupKeyWrite(c->db,c->argv[j]);
robj *obj = dstkey ?
lookupKeyWrite(c->db,c->argv[j]) :
lookupKeyRead(c->db,c->argv[j]);
if (obj != NULL) {
if (obj->type != OBJ_ZSET && obj->type != OBJ_SET) {
zfree(src);
@ -2749,6 +2799,9 @@ void zunionInterDiffGenericCommand(client *c, robj *dstkey, int numkeysIndex, in
unsigned long length = dstzset->zsl->length;
zskiplist *zsl = dstzset->zsl;
zskiplistNode *zn = zsl->header->level[0].forward;
/* In case of WITHSCORES, respond with a single array in RESP2, and
* nested arrays in RESP3. We can't use a map response type since the
* client library needs to know to respect the order. */
if (withscores && c->resp == 2)
addReplyArrayLen(c, length*2);
else
@ -2866,6 +2919,9 @@ static void zrangeResultEmitLongLongToClient(zrange_result_handler *handler,
static void zrangeResultFinalizeClient(zrange_result_handler *handler,
size_t result_count)
{
/* In case of WITHSCORES, respond with a single array in RESP2, and
* nested arrays in RESP3. We can't use a map response type since the
* client library needs to know to respect the order. */
if (handler->withscores && (handler->client->resp == 2)) {
result_count *= 2;
}
@ -3071,8 +3127,8 @@ void zrevrangeCommand(client *c) {
/* This command implements ZRANGEBYSCORE, ZREVRANGEBYSCORE. */
void genericZrangebyscoreCommand(zrange_result_handler *handler,
zrangespec *range, robj *zobj, int withscores, long offset,
long limit, int reverse) {
zrangespec *range, robj *zobj, long offset, long limit,
int reverse) {
client *c = handler->client;
unsigned long rangelen = 0;
@ -3172,8 +3228,7 @@ void genericZrangebyscoreCommand(zrange_result_handler *handler,
}
rangelen++;
handler->emitResultFromCBuffer(handler, ln->ele, sdslen(ln->ele),
((withscores) ? ln->score : ln->score));
handler->emitResultFromCBuffer(handler, ln->ele, sdslen(ln->ele), ln->score);
/* Move to next node */
if (reverse) {
@ -3605,11 +3660,16 @@ void zrangeGenericCommand(zrange_result_handler *handler, int argc_start, int st
}
/* Step 3: Lookup the key and get the range. */
if (((zobj = lookupKeyReadOrReply(c, key, shared.emptyarray)) == NULL)
|| checkType(c, zobj, OBJ_ZSET)) {
zobj = handler->dstkey ?
lookupKeyWrite(c->db,key) :
lookupKeyRead(c->db,key);
if (zobj == NULL) {
addReply(c,shared.emptyarray);
goto cleanup;
}
if (checkType(c,zobj,OBJ_ZSET)) goto cleanup;
/* Step 4: Pass this to the command-specific handler. */
switch (rangetype) {
case ZRANGE_AUTO:
@ -3619,8 +3679,8 @@ void zrangeGenericCommand(zrange_result_handler *handler, int argc_start, int st
break;
case ZRANGE_SCORE:
genericZrangebyscoreCommand(handler, &range, zobj, opt_withscores || store,
opt_offset, opt_limit, direction == ZRANGE_DIRECTION_REVERSE);
genericZrangebyscoreCommand(handler, &range, zobj, opt_offset,
opt_limit, direction == ZRANGE_DIRECTION_REVERSE);
break;
case ZRANGE_LEX:
@ -3895,3 +3955,216 @@ void bzpopminCommand(client *c) {
void bzpopmaxCommand(client *c) {
blockingGenericZpopCommand(c,ZSET_MAX);
}
/* How many times bigger should be the zset compared to the requested size
* for us to not use the "remove elements" strategy? Read later in the
* implementation for more info. */
#define ZRANDMEMBER_SUB_STRATEGY_MUL 3
void zrandmemberWithCountCommand(client *c, long l, int withscores) {
unsigned long count, size;
int uniq = 1;
robj *zsetobj;
if ((zsetobj = lookupKeyReadOrReply(c, c->argv[1], shared.null[c->resp]))
== NULL || checkType(c, zsetobj, OBJ_ZSET)) return;
size = zsetLength(zsetobj);
if(l >= 0) {
count = (unsigned long) l;
} else {
count = -l;
uniq = 0;
}
/* If count is zero, serve it ASAP to avoid special cases later. */
if (count == 0) {
addReply(c,shared.emptyarray);
return;
}
/* CASE 1: The count was negative, so the extraction method is just:
* "return N random elements" sampling the whole set every time.
* This case is trivial and can be served without auxiliary data
* structures. This case is the only one that also needs to return the
* elements in random order. */
if (!uniq || count == 1) {
if (withscores && c->resp == 2)
addReplyArrayLen(c, count*2);
else
addReplyArrayLen(c, count);
if (zsetobj->encoding == OBJ_ENCODING_SKIPLIST) {
zset *zs = zsetobj->ptr;
while (count--) {
dictEntry *de = dictGetFairRandomKey(zs->dict);
sds key = dictGetKey(de);
if (withscores && c->resp > 2)
addReplyArrayLen(c,2);
addReplyBulkCBuffer(c, key, sdslen(key));
if (withscores)
addReplyDouble(c, dictGetDoubleVal(de));
}
} else if (zsetobj->encoding == OBJ_ENCODING_ZIPLIST) {
ziplistEntry *keys, *vals = NULL;
keys = zmalloc(sizeof(ziplistEntry)*count);
if (withscores)
vals = zmalloc(sizeof(ziplistEntry)*count);
ziplistRandomPairs(zsetobj->ptr, count, keys, vals);
for (unsigned long i = 0; i < count; i++) {
if (withscores && c->resp > 2)
addReplyArrayLen(c,2);
if (keys[i].sval)
addReplyBulkCBuffer(c, keys[i].sval, keys[i].slen);
else
addReplyBulkLongLong(c, keys[i].lval);
if (withscores) {
if (vals[i].sval) {
addReplyDouble(c, zzlStrtod(vals[i].sval,vals[i].slen));
} else
addReplyDouble(c, vals[i].lval);
}
}
zfree(keys);
zfree(vals);
}
return;
}
zsetopsrc src;
zsetopval zval;
src.subject = zsetobj;
src.type = zsetobj->type;
src.encoding = zsetobj->encoding;
zuiInitIterator(&src);
memset(&zval, 0, sizeof(zval));
/* Initiate reply count, RESP3 responds with nested array, RESP2 with flat one. */
long reply_size = count < size ? count : size;
if (withscores && c->resp == 2)
addReplyArrayLen(c, reply_size*2);
else
addReplyArrayLen(c, reply_size);
/* CASE 2:
* The number of requested elements is greater than the number of
* elements inside the zset: simply return the whole zset. */
if (count >= size) {
while (zuiNext(&src, &zval)) {
if (withscores && c->resp > 2)
addReplyArrayLen(c,2);
addReplyBulkSds(c, zuiNewSdsFromValue(&zval));
if (withscores)
addReplyDouble(c, zval.score);
}
return;
}
/* CASE 3:
* The number of elements inside the zset is not greater than
* ZRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements.
* In this case we create a dict from scratch with all the elements, and
* subtract random elements to reach the requested number of elements.
*
* This is done because if the number of requested elements is just
* a bit less than the number of elements in the set, the natural approach
* used into CASE 4 is highly inefficient. */
if (count*ZRANDMEMBER_SUB_STRATEGY_MUL > size) {
dict *d = dictCreate(&sdsReplyDictType, NULL);
/* Add all the elements into the temporary dictionary. */
while (zuiNext(&src, &zval)) {
sds key = zuiNewSdsFromValue(&zval);
dictEntry *de = dictAddRaw(d, key, NULL);
serverAssert(de);
if (withscores)
dictSetDoubleVal(de, zval.score);
}
serverAssert(dictSize(d) == size);
/* Remove random elements to reach the right count. */
while (size > count) {
dictEntry *de;
de = dictGetRandomKey(d);
dictUnlink(d,dictGetKey(de));
sdsfree(dictGetKey(de));
dictFreeUnlinkedEntry(d,de);
size--;
}
/* Reply with what's in the dict and release memory */
dictIterator *di;
dictEntry *de;
di = dictGetIterator(d);
while ((de = dictNext(di)) != NULL) {
if (withscores && c->resp > 2)
addReplyArrayLen(c,2);
addReplyBulkSds(c, dictGetKey(de));
if (withscores)
addReplyDouble(c, dictGetDoubleVal(de));
}
dictReleaseIterator(di);
dictRelease(d);
}
/* CASE 4: We have a big zset compared to the requested number of elements.
* In this case we can simply get random elements from the zset and add
* to the temporary set, trying to eventually get enough unique elements
* to reach the specified count. */
else {
unsigned long added = 0;
dict *d = dictCreate(&hashDictType, NULL);
while (added < count) {
ziplistEntry key;
double score;
zsetTypeRandomElement(zsetobj, size, &key, withscores ? &score: NULL);
/* Try to add the object to the dictionary. If it already exists
* free it, otherwise increment the number of objects we have
* in the result dictionary. */
sds skey = zsetSdsFromZiplistEntry(&key);
if (dictAdd(d,skey,NULL) != DICT_OK) {
sdsfree(skey);
continue;
}
added++;
if (withscores && c->resp > 2)
addReplyArrayLen(c,2);
zsetReplyFromZiplistEntry(c, &key);
if (withscores)
addReplyDouble(c, score);
}
/* Release memory */
dictRelease(d);
}
}
/* ZRANDMEMBER [<count> WITHSCORES] */
void zrandmemberCommand(client *c) {
long l;
int withscores = 0;
robj *zset;
ziplistEntry ele;
if (c->argc >= 3) {
if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != C_OK) return;
if (c->argc > 4 || (c->argc == 4 && strcasecmp(c->argv[3]->ptr,"withscores"))) {
addReplyErrorObject(c,shared.syntaxerr);
return;
} else if (c->argc == 4)
withscores = 1;
zrandmemberWithCountCommand(c, l, withscores);
return;
}
/* Handle variant without <count> argument. Reply with simple bulk string */
if ((zset = lookupKeyReadOrReply(c,c->argv[1],shared.null[c->resp]))== NULL ||
checkType(c,zset,OBJ_ZSET)) {
return;
}
zsetTypeRandomElement(zset, zsetLength(zset), &ele,NULL);
zsetReplyFromZiplistEntry(c,&ele);
}

View File

@ -749,7 +749,7 @@ sds getAbsolutePath(char *filename) {
* Gets the proper timezone in a more portable fashion
* i.e timezone variables are linux specific.
*/
unsigned long getTimeZone(void) {
long getTimeZone(void) {
#if defined(__linux__) || defined(__sun)
return timezone;
#else
@ -758,7 +758,7 @@ unsigned long getTimeZone(void) {
gettimeofday(&tv, &tz);
return tz.tz_minuteswest * 60UL;
return tz.tz_minuteswest * 60L;
#endif
}

View File

@ -60,7 +60,7 @@ int string2d(const char *s, size_t slen, double *dp);
int d2string(char *buf, size_t len, double value);
int ld2string(char *buf, size_t len, long double value, ld2string_mode mode);
sds getAbsolutePath(char *filename);
unsigned long getTimeZone(void);
long getTimeZone(void);
int pathIsBaseName(char *path);
#ifdef REDIS_TEST

View File

@ -1,2 +1,2 @@
#define REDIS_VERSION "6.1.241"
#define REDIS_VERSION_NUM 0x000601f1
#define REDIS_VERSION "6.1.242"
#define REDIS_VERSION_NUM 0x000601f2

View File

@ -1498,6 +1498,89 @@ int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
return 1;
}
/* Randomly select a pair of key and value.
* total_count is a pre-computed length/2 of the ziplist (to avoid calls to ziplistLen)
* 'key' and 'val' are used to store the result key value pair.
* 'val' can be NULL if the value is not needed. */
void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val) {
int ret;
unsigned char *p;
/* Avoid div by zero on corrupt ziplist */
assert(total_count);
/* Generate even numbers, because ziplist saved K-V pair */
int r = (rand() % total_count) * 2;
p = ziplistIndex(zl, r);
ret = ziplistGet(p, &key->sval, &key->slen, &key->lval);
assert(ret != 0);
if (!val)
return;
p = ziplistNext(zl, p);
ret = ziplistGet(p, &val->sval, &val->slen, &val->lval);
assert(ret != 0);
}
/* int compare for qsort */
int intCompare(const void *a, const void *b) {
return (*(int *) a - *(int *) b);
}
/* Helper method to store a string into from val or lval into dest */
static inline void ziplistSaveValue(unsigned char *val, unsigned int len, long long lval, ziplistEntry *dest) {
dest->sval = val;
dest->slen = len;
dest->lval = lval;
}
/* Randomly select unique count of key value pairs and store into 'keys' and
* 'vals' args. The order of the picked entries is random.
* The 'vals' arg can be NULL in which case we skip these. */
void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals) {
unsigned char *p, *key, *value;
unsigned int klen, vlen;
long long klval, vlval;
typedef struct {
int index;
int order;
} rand_pick;
rand_pick *picks = zmalloc(sizeof(rand_pick)*count);
unsigned long total_size = ziplistLen(zl)/2;
/* Avoid div by zero on corrupt ziplist */
assert(total_size);
/* create a pool of random indexes (some may be duplicate). */
for (int i = 0; i < count; i++) {
picks[i].index = (rand() % total_size) * 2; /* Generate even indexes */
/* keep track of the order we picked them */
picks[i].order = i;
}
/* sort by indexes. */
qsort(picks, count, sizeof(rand_pick), intCompare);
/* fetch the elements form the ziplist into a output array respecting the original order. */
int zipindex = 0, pickindex = 0;
p = ziplistIndex(zl, 0);
while (ziplistGet(p, &key, &klen, &klval) && pickindex < count) {
p = ziplistNext(zl, p);
ziplistGet(p, &value, &vlen, &vlval);
while (pickindex < count && zipindex == picks[pickindex].index) {
int storeorder = picks[pickindex].order;
ziplistSaveValue(key, klen, klval, &keys[storeorder]);
if (vals)
ziplistSaveValue(value, vlen, vlval, &vals[storeorder]);
pickindex++;
}
zipindex += 2;
p = ziplistNext(zl, p);
}
zfree(picks);
}
#ifdef REDIS_TEST
#include <sys/time.h>
#include "adlist.h"

View File

@ -34,6 +34,15 @@
#define ZIPLIST_HEAD 0
#define ZIPLIST_TAIL 1
/* Each entry in the ziplist is either a string or an integer. */
typedef struct {
/* When string is used, it is provided with the length (slen). */
unsigned char *sval;
unsigned int slen;
/* When integer is used, 'sval' is NULL, and lval holds the value. */
long long lval;
} ziplistEntry;
unsigned char *ziplistNew(void);
unsigned char *ziplistMerge(unsigned char **first, unsigned char **second);
unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);
@ -52,6 +61,8 @@ void ziplistRepr(unsigned char *zl);
typedef int (*ziplistValidateEntryCB)(unsigned char* p, void* userdata);
int ziplistValidateIntegrity(unsigned char *zl, size_t size, int deep,
ziplistValidateEntryCB entry_cb, void *cb_userdata);
void ziplistRandomPair(unsigned char *zl, unsigned long total_count, ziplistEntry *key, ziplistEntry *val);
void ziplistRandomPairs(unsigned char *zl, int count, ziplistEntry *keys, ziplistEntry *vals);
#ifdef REDIS_TEST
int ziplistTest(int argc, char *argv[]);

View File

@ -0,0 +1,62 @@
# Optimize CLUSTER NODES command by generating all nodes slot topology firstly
source "../tests/includes/init-tests.tcl"
proc cluster_allocate_with_continuous_slots {n} {
set slot 16383
set avg [expr ($slot+1) / $n]
while {$slot >= 0} {
set node [expr $slot/$avg >= $n ? $n-1 : $slot/$avg]
lappend slots_$node $slot
incr slot -1
}
for {set j 0} {$j < $n} {incr j} {
R $j cluster addslots {*}[set slots_${j}]
}
}
proc cluster_create_with_continuous_slots {masters slaves} {
cluster_allocate_with_continuous_slots $masters
if {$slaves} {
cluster_allocate_slaves $masters $slaves
}
assert_cluster_state ok
}
test "Create a 2 nodes cluster" {
cluster_create_with_continuous_slots 2 2
}
test "Cluster should start ok" {
assert_cluster_state ok
}
set master1 [Rn 0]
set master2 [Rn 1]
test "Continuous slots distribution" {
assert_match "* 0-8191*" [$master1 CLUSTER NODES]
assert_match "* 8192-16383*" [$master2 CLUSTER NODES]
$master1 CLUSTER DELSLOTS 4096
assert_match "* 0-4095 4097-8191*" [$master1 CLUSTER NODES]
$master2 CLUSTER DELSLOTS 12288
assert_match "* 8192-12287 12289-16383*" [$master2 CLUSTER NODES]
}
test "Discontinuous slots distribution" {
# Remove middle slots
$master1 CLUSTER DELSLOTS 4092 4094
assert_match "* 0-4091 4093 4095 4097-8191*" [$master1 CLUSTER NODES]
$master2 CLUSTER DELSLOTS 12284 12286
assert_match "* 8192-12283 12285 12287 12289-16383*" [$master2 CLUSTER NODES]
# Remove head slots
$master1 CLUSTER DELSLOTS 0 2
assert_match "* 1 3-4091 4093 4095 4097-8191*" [$master1 CLUSTER NODES]
# Remove tail slots
$master2 CLUSTER DELSLOTS 16380 16382 16383
assert_match "* 8192-12283 12285 12287 12289-16379 16381*" [$master2 CLUSTER NODES]
}

View File

@ -24,9 +24,11 @@ set ::simulate_error 0
set ::failed 0
set ::sentinel_instances {}
set ::redis_instances {}
set ::global_config {}
set ::sentinel_base_port 20000
set ::redis_base_port 30000
set ::redis_port_count 1024
set ::host "127.0.0.1"
set ::pids {} ; # We kill everything at exit
set ::dirs {} ; # We remove all the temp dirs at exit
set ::run_matching {} ; # If non empty, only tests matching pattern are run.
@ -58,10 +60,9 @@ proc exec_instance {type dirname cfgfile} {
}
# Spawn a redis or sentinel instance, depending on 'type'.
proc spawn_instance {type base_port count {conf {}}} {
proc spawn_instance {type base_port count {conf {}} {base_conf_file ""}} {
for {set j 0} {$j < $count} {incr j} {
set port [find_available_port $base_port $::redis_port_count]
# Create a directory for this instance.
set dirname "${type}_${j}"
lappend ::dirs $dirname
@ -70,7 +71,13 @@ proc spawn_instance {type base_port count {conf {}}} {
# Write the instance config file.
set cfgfile [file join $dirname $type.conf]
set cfg [open $cfgfile w]
if {$base_conf_file ne ""} {
file copy -- $base_conf_file $cfgfile
set cfg [open $cfgfile a+]
} else {
set cfg [open $cfgfile w]
}
if {$::tls} {
puts $cfg "tls-port $port"
puts $cfg "tls-replication yes"
@ -92,6 +99,9 @@ proc spawn_instance {type base_port count {conf {}}} {
foreach directive $conf {
puts $cfg $directive
}
dict for {name val} $::global_config {
puts $cfg "$name $val"
}
close $cfg
# Finally exec it and remember the pid for later cleanup.
@ -119,18 +129,18 @@ proc spawn_instance {type base_port count {conf {}}} {
}
# Check availability finally
if {[server_is_up 127.0.0.1 $port 100] == 0} {
if {[server_is_up $::host $port 100] == 0} {
set logfile [file join $dirname log.txt]
puts [exec tail $logfile]
abort_sentinel_test "Problems starting $type #$j: ping timeout, maybe server start failed, check $logfile"
}
# Push the instance into the right list
set link [redis 127.0.0.1 $port 0 $::tls]
set link [redis $::host $port 0 $::tls]
$link reconnect 1
lappend ::${type}_instances [list \
pid $pid \
host 127.0.0.1 \
host $::host \
port $port \
link $link \
]
@ -232,6 +242,9 @@ proc parse_options {} {
set ::simulate_error 1
} elseif {$opt eq {--valgrind}} {
set ::valgrind 1
} elseif {$opt eq {--host}} {
incr j
set ::host ${val}
} elseif {$opt eq {--tls}} {
package require tls 1.6
::tls::init \
@ -239,6 +252,10 @@ proc parse_options {} {
-certfile "$::tlsdir/client.crt" \
-keyfile "$::tlsdir/client.key"
set ::tls 1
} elseif {$opt eq {--config}} {
set val2 [lindex $::argv [expr $j+2]]
dict set ::global_config $val $val2
incr j 2
} elseif {$opt eq "--help"} {
puts "--single <pattern> Only runs tests specified by pattern."
puts "--dont-clean Keep log files on exit."
@ -246,6 +263,8 @@ proc parse_options {} {
puts "--fail Simulate a test failure."
puts "--valgrind Run with valgrind."
puts "--tls Run tests in TLS mode."
puts "--host <host> Use hostname instead of 127.0.0.1."
puts "--config <k> <v> Extra config argument(s)."
puts "--help Shows this help."
exit 0
} else {
@ -391,6 +410,11 @@ proc check_leaks instance_types {
# Execute all the units inside the 'tests' directory.
proc run_tests {} {
set sentinel_fd_leaks_file "sentinel_fd_leaks"
if { [file exists $sentinel_fd_leaks_file] } {
file delete $sentinel_fd_leaks_file
}
set tests [lsort [glob ../tests/*]]
foreach test $tests {
if {$::run_matching ne {} && [string match $::run_matching $test] == 0} {
@ -405,7 +429,15 @@ proc run_tests {} {
# Print a message and exists with 0 / 1 according to zero or more failures.
proc end_tests {} {
if {$::failed == 0} {
set sentinel_fd_leaks_file "sentinel_fd_leaks"
if { [file exists $sentinel_fd_leaks_file] } {
# temporarily disabling this error from failing the tests until leaks are fixed.
#puts [colorstr red "WARNING: sentinel test(s) failed, there are leaked fds in sentinel:"]
#puts [exec cat $sentinel_fd_leaks_file]
#exit 1
}
if {$::failed == 0 } {
puts "GOOD! No errors."
exit 0
} else {

View File

@ -272,4 +272,15 @@ tags {"aof"} {
}
}
}
start_server {overrides {appendonly {yes} appendfilename {appendonly.aof}}} {
test {GETEX should not append to AOF} {
set aof [file join [lindex [r config get dir] 1] appendonly.aof]
r set foo bar
set before [file size $aof]
r getex foo
set after [file size $aof]
assert_equal $before $after
}
}
}

View File

@ -507,5 +507,16 @@ test {corrupt payload: fuzzer findings - valgrind invalid read} {
}
}
test {corrupt payload: fuzzer findings - HRANDFIELD on bad ziplist} {
start_server [list overrides [list loglevel verbose use-exit-on-panic yes crash-memcheck-enabled no] ] {
r config set sanitize-dump-payload yes
r debug set-skip-checksum-validation 1
r RESTORE _int 0 "\x04\xC0\x01\x09\x00\xF6\x8A\xB6\x7A\x85\x87\x72\x4D"
catch {r HRANDFIELD _int}
assert_equal [count_log_message 0 "crashed by signal"] 0
assert_equal [count_log_message 0 "ASSERTION FAILED"] 1
}
}
} ;# tags

View File

@ -0,0 +1,290 @@
start_server {tags {"failover"}} {
start_server {} {
start_server {} {
set node_0 [srv 0 client]
set node_0_host [srv 0 host]
set node_0_port [srv 0 port]
set node_0_pid [srv 0 pid]
set node_1 [srv -1 client]
set node_1_host [srv -1 host]
set node_1_port [srv -1 port]
set node_1_pid [srv -1 pid]
set node_2 [srv -2 client]
set node_2_host [srv -2 host]
set node_2_port [srv -2 port]
set node_2_pid [srv -2 pid]
proc assert_digests_match {n1 n2 n3} {
assert_equal [$n1 debug digest] [$n2 debug digest]
assert_equal [$n2 debug digest] [$n3 debug digest]
}
test {failover command fails without connected replica} {
catch { $node_0 failover to $node_1_host $node_1_port } err
if {! [string match "ERR*" $err]} {
fail "failover command succeeded when replica not connected"
}
}
test {setup replication for following tests} {
$node_1 replicaof $node_0_host $node_0_port
$node_2 replicaof $node_0_host $node_0_port
wait_for_sync $node_1
wait_for_sync $node_2
}
test {failover command fails with invalid host} {
catch { $node_0 failover to invalidhost $node_1_port } err
assert_match "ERR*" $err
}
test {failover command fails with invalid port} {
catch { $node_0 failover to $node_1_host invalidport } err
assert_match "ERR*" $err
}
test {failover command fails with just force and timeout} {
catch { $node_0 FAILOVER FORCE TIMEOUT 100} err
assert_match "ERR*" $err
}
test {failover command fails when sent to a replica} {
catch { $node_1 failover to $node_1_host $node_1_port } err
assert_match "ERR*" $err
}
test {failover command fails with force without timeout} {
catch { $node_0 failover to $node_1_host $node_1_port FORCE } err
assert_match "ERR*" $err
}
test {failover command to specific replica works} {
set initial_psyncs [s -1 sync_partial_ok]
set initial_syncs [s -1 sync_full]
# Generate a delta between primary and replica
set load_handler [start_write_load $node_0_host $node_0_port 5]
exec kill -SIGSTOP [srv -1 pid]
wait_for_condition 50 100 {
[s 0 total_commands_processed] > 100
} else {
fail "Node 0 did not accept writes"
}
exec kill -SIGCONT [srv -1 pid]
# Execute the failover
$node_0 failover to $node_1_host $node_1_port
# Wait for failover to end
wait_for_condition 50 100 {
[s 0 master_failover_state] == "no-failover"
} else {
fail "Failover from node 0 to node 1 did not finish"
}
stop_write_load $load_handler
$node_2 replicaof $node_1_host $node_1_port
wait_for_sync $node_0
wait_for_sync $node_2
assert_match *slave* [$node_0 role]
assert_match *master* [$node_1 role]
assert_match *slave* [$node_2 role]
# We should accept psyncs from both nodes
assert_equal [expr [s -1 sync_partial_ok] - $initial_psyncs] 2
assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0
assert_digests_match $node_0 $node_1 $node_2
}
test {failover command to any replica works} {
set initial_psyncs [s -2 sync_partial_ok]
set initial_syncs [s -2 sync_full]
wait_for_ofs_sync $node_1 $node_2
# We stop node 0 to and make sure node 2 is selected
exec kill -SIGSTOP $node_0_pid
$node_1 set CASE 1
$node_1 FAILOVER
# Wait for failover to end
wait_for_condition 50 100 {
[s -1 master_failover_state] == "no-failover"
} else {
fail "Failover from node 1 to node 2 did not finish"
}
exec kill -SIGCONT $node_0_pid
$node_0 replicaof $node_2_host $node_2_port
wait_for_sync $node_0
wait_for_sync $node_1
assert_match *slave* [$node_0 role]
assert_match *slave* [$node_1 role]
assert_match *master* [$node_2 role]
# We should accept Psyncs from both nodes
assert_equal [expr [s -2 sync_partial_ok] - $initial_psyncs] 2
assert_equal [expr [s -1 sync_full] - $initial_psyncs] 0
assert_digests_match $node_0 $node_1 $node_2
}
test {failover to a replica with force works} {
set initial_psyncs [s 0 sync_partial_ok]
set initial_syncs [s 0 sync_full]
exec kill -SIGSTOP $node_0_pid
# node 0 will never acknowledge this write
$node_2 set case 2
$node_2 failover to $node_0_host $node_0_port TIMEOUT 100 FORCE
# Wait for node 0 to give up on sync attempt and start failover
wait_for_condition 50 100 {
[s -2 master_failover_state] == "failover-in-progress"
} else {
fail "Failover from node 2 to node 0 did not timeout"
}
# Quick check that everyone is a replica, we never want a
# state where there are two masters.
assert_match *slave* [$node_1 role]
assert_match *slave* [$node_2 role]
exec kill -SIGCONT $node_0_pid
# Wait for failover to end
wait_for_condition 50 100 {
[s -2 master_failover_state] == "no-failover"
} else {
fail "Failover from node 2 to node 0 did not finish"
}
$node_1 replicaof $node_0_host $node_0_port
wait_for_sync $node_1
wait_for_sync $node_2
assert_match *master* [$node_0 role]
assert_match *slave* [$node_1 role]
assert_match *slave* [$node_2 role]
assert_equal [count_log_message -2 "time out exceeded, failing over."] 1
# We should accept both psyncs, although this is the condition we might not
# since we didn't catch up.
assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2
assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
assert_digests_match $node_0 $node_1 $node_2
}
test {failover with timeout aborts if replica never catches up} {
set initial_psyncs [s 0 sync_partial_ok]
set initial_syncs [s 0 sync_full]
# Stop replica so it never catches up
exec kill -SIGSTOP [srv -1 pid]
$node_0 SET CASE 1
$node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 500
# Wait for failover to end
wait_for_condition 50 20 {
[s 0 master_failover_state] == "no-failover"
} else {
fail "Failover from node_0 to replica did not finish"
}
exec kill -SIGCONT [srv -1 pid]
# We need to make sure the nodes actually sync back up
wait_for_ofs_sync $node_0 $node_1
wait_for_ofs_sync $node_0 $node_2
assert_match *master* [$node_0 role]
assert_match *slave* [$node_1 role]
assert_match *slave* [$node_2 role]
# Since we never caught up, there should be no syncs
assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0
assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
assert_digests_match $node_0 $node_1 $node_2
}
test {failovers can be aborted} {
set initial_psyncs [s 0 sync_partial_ok]
set initial_syncs [s 0 sync_full]
# Stop replica so it never catches up
exec kill -SIGSTOP [srv -1 pid]
$node_0 SET CASE 2
$node_0 failover to [srv -1 host] [srv -1 port] TIMEOUT 60000
assert_match [s 0 master_failover_state] "waiting-for-sync"
# Sanity check that read commands are still accepted
$node_0 GET CASE
$node_0 failover abort
assert_match [s 0 master_failover_state] "no-failover"
exec kill -SIGCONT [srv -1 pid]
# Just make sure everything is still synced
wait_for_ofs_sync $node_0 $node_1
wait_for_ofs_sync $node_0 $node_2
assert_match *master* [$node_0 role]
assert_match *slave* [$node_1 role]
assert_match *slave* [$node_2 role]
# Since we never caught up, there should be no syncs
assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 0
assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
assert_digests_match $node_0 $node_1 $node_2
}
test {failover aborts if target rejects sync request} {
set initial_psyncs [s 0 sync_partial_ok]
set initial_syncs [s 0 sync_full]
# We block psync, so the failover will fail
$node_1 acl setuser default -psync
# We pause the target long enough to send a write command
# during the pause. This write will not be interrupted.
exec kill -SIGSTOP [srv -1 pid]
set rd [redis_deferring_client]
$rd SET FOO BAR
$node_0 failover to $node_1_host $node_1_port
exec kill -SIGCONT [srv -1 pid]
# Wait for failover to end
wait_for_condition 50 100 {
[s 0 master_failover_state] == "no-failover"
} else {
fail "Failover from node_0 to replica did not finish"
}
assert_equal [$rd read] "OK"
$rd close
# restore access to psync
$node_1 acl setuser default +psync
# We need to make sure the nodes actually sync back up
wait_for_sync $node_1
wait_for_sync $node_2
assert_match *master* [$node_0 role]
assert_match *slave* [$node_1 role]
assert_match *slave* [$node_2 role]
# We will cycle all of our replicas here and force a psync.
assert_equal [expr [s 0 sync_partial_ok] - $initial_psyncs] 2
assert_equal [expr [s 0 sync_full] - $initial_syncs] 0
assert_equal [count_log_message 0 "Failover target rejected psync request"] 1
assert_digests_match $node_0 $node_1 $node_2
}
}
}
}

View File

@ -1,3 +1,5 @@
tags {"rdb"} {
set server_path [tmpdir "server.rdb-encoding-test"]
# Copy RDB with different encodings in server path
@ -289,3 +291,5 @@ start_server {overrides {save ""}} {
}
}
} ;# system_name
} ;# tags

View File

@ -5,7 +5,7 @@ proc cmdstat {cmd} {
return [cmdrstat $cmd r]
}
start_server {tags {"benchmark"}} {
start_server {tags {"benchmark network"}} {
start_server {} {
set master_host [srv 0 host]
set master_port [srv 0 port]

View File

@ -1,4 +1,4 @@
start_server {tags {"repl"}} {
start_server {tags {"repl network"}} {
start_server {} {
set master [srv -1 client]

View File

@ -5,7 +5,7 @@ proc log_file_matches {log pattern} {
string match $pattern $content
}
start_server {tags {"repl"}} {
start_server {tags {"repl network"}} {
set slave [srv 0 client]
set slave_host [srv 0 host]
set slave_port [srv 0 port]

View File

@ -19,6 +19,7 @@ TEST_MODULES = \
misc.so \
hooks.so \
blockonkeys.so \
blockonbackground.so \
scan.so \
datatype.so \
auth.so \
@ -27,7 +28,8 @@ TEST_MODULES = \
getkeys.so \
test_lazyfree.so \
timer.so \
defragtest.so
defragtest.so \
stream.so
.PHONY: all

View File

@ -0,0 +1,220 @@
#define REDISMODULE_EXPERIMENTAL_API
#include "redismodule.h"
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
#include "assert.h"
#define UNUSED(x) (void)(x)
/* Reply callback for blocking command BLOCK.DEBUG */
int HelloBlock_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
UNUSED(argv);
UNUSED(argc);
int *myint = RedisModule_GetBlockedClientPrivateData(ctx);
return RedisModule_ReplyWithLongLong(ctx,*myint);
}
/* Timeout callback for blocking command BLOCK.DEBUG */
int HelloBlock_Timeout(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
UNUSED(argv);
UNUSED(argc);
RedisModuleBlockedClient *bc = RedisModule_GetBlockedClientHandle(ctx);
assert(RedisModule_BlockedClientMeasureTimeEnd(bc)==REDISMODULE_OK);
return RedisModule_ReplyWithSimpleString(ctx,"Request timedout");
}
/* Private data freeing callback for BLOCK.DEBUG command. */
void HelloBlock_FreeData(RedisModuleCtx *ctx, void *privdata) {
UNUSED(ctx);
RedisModule_Free(privdata);
}
/* The thread entry point that actually executes the blocking part
* of the command BLOCK.DEBUG. */
void *BlockDebug_ThreadMain(void *arg) {
void **targ = arg;
RedisModuleBlockedClient *bc = targ[0];
long long delay = (unsigned long)targ[1];
long long enable_time_track = (unsigned long)targ[2];
if (enable_time_track)
assert(RedisModule_BlockedClientMeasureTimeStart(bc)==REDISMODULE_OK);
RedisModule_Free(targ);
struct timespec ts;
ts.tv_sec = delay / 1000;
ts.tv_nsec = (delay % 1000) * 1000000;
nanosleep(&ts, NULL);
int *r = RedisModule_Alloc(sizeof(int));
*r = rand();
if (enable_time_track)
assert(RedisModule_BlockedClientMeasureTimeEnd(bc)==REDISMODULE_OK);
RedisModule_UnblockClient(bc,r);
return NULL;
}
/* The thread entry point that actually executes the blocking part
* of the command BLOCK.DEBUG. */
void *DoubleBlock_ThreadMain(void *arg) {
void **targ = arg;
RedisModuleBlockedClient *bc = targ[0];
long long delay = (unsigned long)targ[1];
assert(RedisModule_BlockedClientMeasureTimeStart(bc)==REDISMODULE_OK);
RedisModule_Free(targ);
struct timespec ts;
ts.tv_sec = delay / 1000;
ts.tv_nsec = (delay % 1000) * 1000000;
nanosleep(&ts, NULL);
int *r = RedisModule_Alloc(sizeof(int));
*r = rand();
RedisModule_BlockedClientMeasureTimeEnd(bc);
/* call again RedisModule_BlockedClientMeasureTimeStart() and
* RedisModule_BlockedClientMeasureTimeEnd and ensure that the
* total execution time is 2x the delay. */
assert(RedisModule_BlockedClientMeasureTimeStart(bc)==REDISMODULE_OK);
nanosleep(&ts, NULL);
RedisModule_BlockedClientMeasureTimeEnd(bc);
RedisModule_UnblockClient(bc,r);
return NULL;
}
void HelloBlock_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc) {
RedisModule_Log(ctx,"warning","Blocked client %p disconnected!",
(void*)bc);
}
/* BLOCK.DEBUG <delay_ms> <timeout_ms> -- Block for <count> milliseconds, then reply with
* a random number. Timeout is the command timeout, so that you can test
* what happens when the delay is greater than the timeout. */
int HelloBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc != 3) return RedisModule_WrongArity(ctx);
long long delay;
long long timeout;
if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) {
return RedisModule_ReplyWithError(ctx,"ERR invalid count");
}
if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) {
return RedisModule_ReplyWithError(ctx,"ERR invalid count");
}
pthread_t tid;
RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
/* Here we set a disconnection handler, however since this module will
* block in sleep() in a thread, there is not much we can do in the
* callback, so this is just to show you the API. */
RedisModule_SetDisconnectCallback(bc,HelloBlock_Disconnected);
/* Now that we setup a blocking client, we need to pass the control
* to the thread. However we need to pass arguments to the thread:
* the delay and a reference to the blocked client handle. */
void **targ = RedisModule_Alloc(sizeof(void*)*3);
targ[0] = bc;
targ[1] = (void*)(unsigned long) delay;
// pass 1 as flag to enable time tracking
targ[2] = (void*)(unsigned long) 1;
if (pthread_create(&tid,NULL,BlockDebug_ThreadMain,targ) != 0) {
RedisModule_AbortBlock(bc);
return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
}
return REDISMODULE_OK;
}
/* BLOCK.DEBUG_NOTRACKING <delay_ms> <timeout_ms> -- Block for <count> milliseconds, then reply with
* a random number. Timeout is the command timeout, so that you can test
* what happens when the delay is greater than the timeout.
* this command does not track background time so the background time should no appear in stats*/
int HelloBlockNoTracking_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc != 3) return RedisModule_WrongArity(ctx);
long long delay;
long long timeout;
if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) {
return RedisModule_ReplyWithError(ctx,"ERR invalid count");
}
if (RedisModule_StringToLongLong(argv[2],&timeout) != REDISMODULE_OK) {
return RedisModule_ReplyWithError(ctx,"ERR invalid count");
}
pthread_t tid;
RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
/* Here we set a disconnection handler, however since this module will
* block in sleep() in a thread, there is not much we can do in the
* callback, so this is just to show you the API. */
RedisModule_SetDisconnectCallback(bc,HelloBlock_Disconnected);
/* Now that we setup a blocking client, we need to pass the control
* to the thread. However we need to pass arguments to the thread:
* the delay and a reference to the blocked client handle. */
void **targ = RedisModule_Alloc(sizeof(void*)*3);
targ[0] = bc;
targ[1] = (void*)(unsigned long) delay;
// pass 0 as flag to enable time tracking
targ[2] = (void*)(unsigned long) 0;
if (pthread_create(&tid,NULL,BlockDebug_ThreadMain,targ) != 0) {
RedisModule_AbortBlock(bc);
return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
}
return REDISMODULE_OK;
}
/* BLOCK.DOUBLE_DEBUG <delay_ms> -- Block for 2 x <count> milliseconds,
* then reply with a random number.
* This command is used to test multiple calls to RedisModule_BlockedClientMeasureTimeStart()
* and RedisModule_BlockedClientMeasureTimeEnd() within the same execution. */
int HelloDoubleBlock_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc != 2) return RedisModule_WrongArity(ctx);
long long delay;
long long timeout;
if (RedisModule_StringToLongLong(argv[1],&delay) != REDISMODULE_OK) {
return RedisModule_ReplyWithError(ctx,"ERR invalid count");
}
pthread_t tid;
RedisModuleBlockedClient *bc = RedisModule_BlockClient(ctx,HelloBlock_Reply,HelloBlock_Timeout,HelloBlock_FreeData,timeout);
/* Now that we setup a blocking client, we need to pass the control
* to the thread. However we need to pass arguments to the thread:
* the delay and a reference to the blocked client handle. */
void **targ = RedisModule_Alloc(sizeof(void*)*2);
targ[0] = bc;
targ[1] = (void*)(unsigned long) delay;
if (pthread_create(&tid,NULL,DoubleBlock_ThreadMain,targ) != 0) {
RedisModule_AbortBlock(bc);
return RedisModule_ReplyWithError(ctx,"-ERR Can't start thread");
}
return REDISMODULE_OK;
}
int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
UNUSED(argv);
UNUSED(argc);
if (RedisModule_Init(ctx,"block",1,REDISMODULE_APIVER_1)
== REDISMODULE_ERR) return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx,"block.debug",
HelloBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx,"block.double_debug",
HelloDoubleBlock_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx,"block.debug_no_track",
HelloBlockNoTracking_RedisCommand,"",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
return REDISMODULE_OK;
}

View File

@ -2,6 +2,7 @@
#include "redismodule.h"
#include <string.h>
#include <strings.h>
#include <assert.h>
#include <unistd.h>
@ -65,6 +66,8 @@ int get_fsl(RedisModuleCtx *ctx, RedisModuleString *keyname, int mode, int creat
RedisModule_CloseKey(key);
if (reply_on_failure)
RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
RedisModuleCallReply *reply = RedisModule_Call(ctx, "INCR", "c", "fsl_wrong_type");
RedisModule_FreeCallReply(reply);
return 0;
}
@ -298,6 +301,154 @@ int fsl_getall(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
return REDISMODULE_OK;
}
/* Callback for blockonkeys_popall */
int blockonkeys_popall_reply_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argc);
RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_LIST) {
RedisModuleString *elem;
long len = 0;
RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN);
while ((elem = RedisModule_ListPop(key, REDISMODULE_LIST_HEAD)) != NULL) {
len++;
RedisModule_ReplyWithString(ctx, elem);
RedisModule_FreeString(ctx, elem);
}
RedisModule_ReplySetArrayLength(ctx, len);
} else {
RedisModule_ReplyWithError(ctx, "ERR Not a list");
}
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
int blockonkeys_popall_timeout_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc);
return RedisModule_ReplyWithError(ctx, "ERR Timeout");
}
/* BLOCKONKEYS.POPALL key
*
* Blocks on an empty key for up to 3 seconds. When unblocked by a list
* operation like LPUSH, all the elements are popped and returned. Fails with an
* error on timeout. */
int blockonkeys_popall(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc != 2)
return RedisModule_WrongArity(ctx);
RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_READ);
if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_EMPTY) {
RedisModule_BlockClientOnKeys(ctx, blockonkeys_popall_reply_callback,
blockonkeys_popall_timeout_callback,
NULL, 3000, &argv[1], 1, NULL);
} else {
RedisModule_ReplyWithError(ctx, "ERR Key not empty");
}
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
/* BLOCKONKEYS.LPUSH key val [val ..]
* BLOCKONKEYS.LPUSH_UNBLOCK key val [val ..]
*
* A module equivalent of LPUSH. If the name LPUSH_UNBLOCK is used,
* RM_SignalKeyAsReady() is also called. */
int blockonkeys_lpush(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc < 3)
return RedisModule_WrongArity(ctx);
RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
if (RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_EMPTY &&
RedisModule_KeyType(key) != REDISMODULE_KEYTYPE_LIST) {
RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
} else {
for (int i = 2; i < argc; i++) {
if (RedisModule_ListPush(key, REDISMODULE_LIST_HEAD,
argv[i]) != REDISMODULE_OK) {
RedisModule_CloseKey(key);
return RedisModule_ReplyWithError(ctx, "ERR Push failed");
}
}
}
RedisModule_CloseKey(key);
/* signal key as ready if the command is lpush_unblock */
size_t len;
const char *str = RedisModule_StringPtrLen(argv[0], &len);
if (!strncasecmp(str, "blockonkeys.lpush_unblock", len)) {
RedisModule_SignalKeyAsReady(ctx, argv[1]);
}
return RedisModule_ReplyWithSimpleString(ctx, "OK");
}
/* Callback for the BLOCKONKEYS.BLPOPN command */
int blockonkeys_blpopn_reply_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argc);
long long n;
RedisModule_StringToLongLong(argv[2], &n);
RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
int result;
if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_LIST &&
RedisModule_ValueLength(key) >= (size_t)n) {
RedisModule_ReplyWithArray(ctx, n);
for (long i = 0; i < n; i++) {
RedisModuleString *elem = RedisModule_ListPop(key, REDISMODULE_LIST_HEAD);
RedisModule_ReplyWithString(ctx, elem);
RedisModule_FreeString(ctx, elem);
}
result = REDISMODULE_OK;
} else if (RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_LIST ||
RedisModule_KeyType(key) == REDISMODULE_KEYTYPE_EMPTY) {
/* continue blocking */
result = REDISMODULE_ERR;
} else {
result = RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
}
RedisModule_CloseKey(key);
return result;
}
int blockonkeys_blpopn_timeout_callback(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc);
return RedisModule_ReplyWithError(ctx, "ERR Timeout");
}
/* BLOCKONKEYS.BLPOPN key N
*
* Blocks until key has N elements and then pops them or fails after 3 seconds.
*/
int blockonkeys_blpopn(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc < 3) return RedisModule_WrongArity(ctx);
long long n;
if (RedisModule_StringToLongLong(argv[2], &n) != REDISMODULE_OK) {
return RedisModule_ReplyWithError(ctx, "ERR Invalid N");
}
RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
int keytype = RedisModule_KeyType(key);
if (keytype != REDISMODULE_KEYTYPE_EMPTY &&
keytype != REDISMODULE_KEYTYPE_LIST) {
RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
} else if (keytype == REDISMODULE_KEYTYPE_LIST &&
RedisModule_ValueLength(key) >= (size_t)n) {
RedisModule_ReplyWithArray(ctx, n);
for (long i = 0; i < n; i++) {
RedisModuleString *elem = RedisModule_ListPop(key, REDISMODULE_LIST_HEAD);
RedisModule_ReplyWithString(ctx, elem);
RedisModule_FreeString(ctx, elem);
}
} else {
RedisModule_BlockClientOnKeys(ctx, blockonkeys_blpopn_reply_callback,
blockonkeys_blpopn_timeout_callback,
NULL, 3000, &argv[1], 1, NULL);
}
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc);
@ -334,5 +485,21 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
if (RedisModule_CreateCommand(ctx,"fsl.getall",fsl_getall,"",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "blockonkeys.popall", blockonkeys_popall,
"", 1, 1, 1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "blockonkeys.lpush", blockonkeys_lpush,
"", 1, 1, 1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "blockonkeys.lpush_unblock", blockonkeys_lpush,
"", 1, 1, 1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "blockonkeys.blpopn", blockonkeys_blpopn,
"", 1, 1, 1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
return REDISMODULE_OK;
}

258
tests/modules/stream.c Normal file
View File

@ -0,0 +1,258 @@
#include "redismodule.h"
#include <string.h>
#include <strings.h>
#include <assert.h>
#include <unistd.h>
#include <errno.h>
/* Command which adds a stream entry with automatic ID, like XADD *.
*
* Syntax: STREAM.ADD key field1 value1 [ field2 value2 ... ]
*
* The response is the ID of the added stream entry or an error message.
*/
int stream_add(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc < 2 || argc % 2 != 0) {
RedisModule_WrongArity(ctx);
return REDISMODULE_OK;
}
RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
RedisModuleStreamID id;
if (RedisModule_StreamAdd(key, REDISMODULE_STREAM_ADD_AUTOID, &id,
&argv[2], (argc-2)/2) == REDISMODULE_OK) {
RedisModuleString *id_str = RedisModule_CreateStringFromStreamID(ctx, &id);
RedisModule_ReplyWithString(ctx, id_str);
RedisModule_FreeString(ctx, id_str);
} else {
RedisModule_ReplyWithError(ctx, "ERR StreamAdd failed");
}
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
/* Command which adds a stream entry N times.
*
* Syntax: STREAM.ADD key N field1 value1 [ field2 value2 ... ]
*
* Returns the number of successfully added entries.
*/
int stream_addn(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc < 3 || argc % 2 == 0) {
RedisModule_WrongArity(ctx);
return REDISMODULE_OK;
}
long long n, i;
if (RedisModule_StringToLongLong(argv[2], &n) == REDISMODULE_ERR) {
RedisModule_ReplyWithError(ctx, "N must be a number");
return REDISMODULE_OK;
}
RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
for (i = 0; i < n; i++) {
if (RedisModule_StreamAdd(key, REDISMODULE_STREAM_ADD_AUTOID, NULL,
&argv[3], (argc-3)/2) == REDISMODULE_ERR)
break;
}
RedisModule_ReplyWithLongLong(ctx, i);
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
/* STREAM.DELETE key stream-id */
int stream_delete(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc != 3) return RedisModule_WrongArity(ctx);
RedisModuleStreamID id;
if (RedisModule_StringToStreamID(argv[2], &id) != REDISMODULE_OK) {
return RedisModule_ReplyWithError(ctx, "Invalid stream ID");
}
RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
if (RedisModule_StreamDelete(key, &id) == REDISMODULE_OK) {
RedisModule_ReplyWithSimpleString(ctx, "OK");
} else {
RedisModule_ReplyWithError(ctx, "ERR StreamDelete failed");
}
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
/* STREAM.RANGE key start-id end-id
*
* Returns an array of stream items. Each item is an array on the form
* [stream-id, [field1, value1, field2, value2, ...]].
*
* A funny side-effect used for testing RM_StreamIteratorDelete() is that if any
* entry has a field named "selfdestruct", the stream entry is deleted. It is
* however included in the results of this command.
*/
int stream_range(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc != 4) {
RedisModule_WrongArity(ctx);
return REDISMODULE_OK;
}
RedisModuleStreamID startid, endid;
if (RedisModule_StringToStreamID(argv[2], &startid) != REDISMODULE_OK ||
RedisModule_StringToStreamID(argv[3], &endid) != REDISMODULE_OK) {
RedisModule_ReplyWithError(ctx, "Invalid stream ID");
return REDISMODULE_OK;
}
/* If startid > endid, we swap and set the reverse flag. */
int flags = 0;
if (startid.ms > endid.ms ||
(startid.ms == endid.ms && startid.seq > endid.seq)) {
RedisModuleStreamID tmp = startid;
startid = endid;
endid = tmp;
flags |= REDISMODULE_STREAM_ITERATOR_REVERSE;
}
/* Open key and start iterator. */
int openflags = REDISMODULE_READ | REDISMODULE_WRITE;
RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], openflags);
if (RedisModule_StreamIteratorStart(key, flags,
&startid, &endid) != REDISMODULE_OK) {
/* Key is not a stream, etc. */
RedisModule_ReplyWithError(ctx, "ERR StreamIteratorStart failed");
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
/* Check error handling: Delete current entry when no current entry. */
assert(RedisModule_StreamIteratorDelete(key) ==
REDISMODULE_ERR);
assert(errno == ENOENT);
/* Check error handling: Fetch fields when no current entry. */
assert(RedisModule_StreamIteratorNextField(key, NULL, NULL) ==
REDISMODULE_ERR);
assert(errno == ENOENT);
/* Return array. */
RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN);
RedisModule_AutoMemory(ctx);
RedisModuleStreamID id;
long numfields;
long len = 0;
while (RedisModule_StreamIteratorNextID(key, &id,
&numfields) == REDISMODULE_OK) {
RedisModule_ReplyWithArray(ctx, 2);
RedisModuleString *id_str = RedisModule_CreateStringFromStreamID(ctx, &id);
RedisModule_ReplyWithString(ctx, id_str);
RedisModule_ReplyWithArray(ctx, numfields * 2);
int delete = 0;
RedisModuleString *field, *value;
for (long i = 0; i < numfields; i++) {
assert(RedisModule_StreamIteratorNextField(key, &field, &value) ==
REDISMODULE_OK);
RedisModule_ReplyWithString(ctx, field);
RedisModule_ReplyWithString(ctx, value);
/* check if this is a "selfdestruct" field */
size_t field_len;
const char *field_str = RedisModule_StringPtrLen(field, &field_len);
if (!strncmp(field_str, "selfdestruct", field_len)) delete = 1;
}
if (delete) {
assert(RedisModule_StreamIteratorDelete(key) == REDISMODULE_OK);
}
/* check error handling: no more fields to fetch */
assert(RedisModule_StreamIteratorNextField(key, &field, &value) ==
REDISMODULE_ERR);
assert(errno == ENOENT);
len++;
}
RedisModule_ReplySetArrayLength(ctx, len);
RedisModule_StreamIteratorStop(key);
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
/*
* STREAM.TRIM key (MAXLEN (=|~) length | MINID (=|~) id)
*/
int stream_trim(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc != 5) {
RedisModule_WrongArity(ctx);
return REDISMODULE_OK;
}
/* Parse args */
int trim_by_id = 0; /* 0 = maxlen, 1 = minid */
long long maxlen;
RedisModuleStreamID minid;
size_t arg_len;
const char *arg = RedisModule_StringPtrLen(argv[2], &arg_len);
if (!strcasecmp(arg, "minid")) {
trim_by_id = 1;
if (RedisModule_StringToStreamID(argv[4], &minid) != REDISMODULE_OK) {
RedisModule_ReplyWithError(ctx, "ERR Invalid stream ID");
return REDISMODULE_OK;
}
} else if (!strcasecmp(arg, "maxlen")) {
if (RedisModule_StringToLongLong(argv[4], &maxlen) == REDISMODULE_ERR) {
RedisModule_ReplyWithError(ctx, "ERR Maxlen must be a number");
return REDISMODULE_OK;
}
} else {
RedisModule_ReplyWithError(ctx, "ERR Invalid arguments");
return REDISMODULE_OK;
}
/* Approx or exact */
int flags;
arg = RedisModule_StringPtrLen(argv[3], &arg_len);
if (arg_len == 1 && arg[0] == '~') {
flags = REDISMODULE_STREAM_TRIM_APPROX;
} else if (arg_len == 1 && arg[0] == '=') {
flags = 0;
} else {
RedisModule_ReplyWithError(ctx, "ERR Invalid approx-or-exact mark");
return REDISMODULE_OK;
}
/* Trim */
RedisModuleKey *key = RedisModule_OpenKey(ctx, argv[1], REDISMODULE_WRITE);
long long trimmed;
if (trim_by_id) {
trimmed = RedisModule_StreamTrimByID(key, flags, &minid);
} else {
trimmed = RedisModule_StreamTrimByLength(key, flags, maxlen);
}
/* Return result */
if (trimmed < 0) {
RedisModule_ReplyWithError(ctx, "ERR Trimming failed");
} else {
RedisModule_ReplyWithLongLong(ctx, trimmed);
}
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc);
if (RedisModule_Init(ctx, "stream", 1, REDISMODULE_APIVER_1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "stream.add", stream_add, "",
1, 1, 1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "stream.addn", stream_addn, "",
1, 1, 1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "stream.delete", stream_delete, "",
1, 1, 1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "stream.range", stream_range, "",
1, 1, 1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "stream.trim", stream_trim, "",
1, 1, 1) == REDISMODULE_ERR)
return REDISMODULE_ERR;
return REDISMODULE_OK;
}

View File

@ -10,7 +10,7 @@ set ::tlsdir "../../tls"
proc main {} {
parse_options
spawn_instance sentinel $::sentinel_base_port $::instances_count
spawn_instance sentinel $::sentinel_base_port $::instances_count [list "sentinel deny-scripts-reconfig no"] "../tests/includes/sentinel.conf"
spawn_instance redis $::redis_base_port $::instances_count
run_tests
cleanup

View File

@ -1,5 +1,5 @@
# Check the basic monitoring and failover capabilities.
source "../tests/includes/start-init-tests.tcl"
source "../tests/includes/init-tests.tcl"
if {$::simulate_error} {

View File

@ -0,0 +1,67 @@
proc set_redis_announce_ip {addr} {
foreach_redis_id id {
R $id config set replica-announce-ip $addr
}
}
proc set_sentinel_config {keyword value} {
foreach_sentinel_id id {
S $id sentinel config set $keyword $value
}
}
proc set_all_instances_hostname {hostname} {
foreach_sentinel_id id {
set_instance_attrib sentinel $id host $hostname
}
foreach_redis_id id {
set_instance_attrib redis $id host $hostname
}
}
test "(pre-init) Configure instances and sentinel for hostname use" {
set ::host "localhost"
restart_killed_instances
set_all_instances_hostname $::host
set_redis_announce_ip $::host
set_sentinel_config resolve-hostnames yes
set_sentinel_config announce-hostnames yes
}
source "../tests/includes/init-tests.tcl"
proc verify_hostname_announced {hostname} {
foreach_sentinel_id id {
# Master is reported with its hostname
if {![string equal [lindex [S $id SENTINEL GET-MASTER-ADDR-BY-NAME mymaster] 0] $hostname]} {
return 0
}
# Replicas are reported with their hostnames
foreach replica [S $id SENTINEL REPLICAS mymaster] {
if {![string equal [dict get $replica ip] $hostname]} {
return 0
}
}
}
return 1
}
test "Sentinel announces hostnames" {
# Check initial state
verify_hostname_announced $::host
# Disable announce-hostnames and confirm IPs are used
set_sentinel_config announce-hostnames no
assert {[verify_hostname_announced "127.0.0.1"] || [verify_hostname_announced "::1"]}
}
# We need to revert any special configuration because all tests currently
# share the same instances.
test "(post-cleanup) Configure instances and sentinel for IPs" {
set ::host "127.0.0.1"
set_all_instances_hostname $::host
set_redis_announce_ip $::host
set_sentinel_config resolve-hostnames no
set_sentinel_config announce-hostnames no
}

View File

@ -0,0 +1,50 @@
source "../tests/includes/init-tests.tcl"
set ::user "testuser"
set ::password "secret"
proc setup_acl {} {
foreach_sentinel_id id {
assert_equal {OK} [S $id ACL SETUSER $::user >$::password +@all on]
assert_equal {OK} [S $id ACL SETUSER default off]
S $id CLIENT KILL USER default SKIPME no
assert_equal {OK} [S $id AUTH $::user $::password]
}
}
proc teardown_acl {} {
foreach_sentinel_id id {
assert_equal {OK} [S $id ACL SETUSER default on]
assert_equal {1} [S $id ACL DELUSER $::user]
S $id SENTINEL CONFIG SET sentinel-user ""
S $id SENTINEL CONFIG SET sentinel-pass ""
}
}
test "(post-init) Set up ACL configuration" {
setup_acl
assert_equal $::user [S 1 ACL WHOAMI]
}
test "SENTINEL CONFIG SET handles on-the-fly credentials reconfiguration" {
# Make sure we're starting with a broken state...
after 5000
catch {S 1 SENTINEL CKQUORUM mymaster} err
assert_match {*NOQUORUM*} $err
foreach_sentinel_id id {
assert_equal {OK} [S $id SENTINEL CONFIG SET sentinel-user $::user]
assert_equal {OK} [S $id SENTINEL CONFIG SET sentinel-pass $::password]
}
after 5000
assert_match {*OK*} [S 1 SENTINEL CKQUORUM mymaster]
}
test "(post-cleanup) Tear down ACL configuration" {
teardown_acl
}

View File

@ -1,6 +1,6 @@
# Initialization tests -- most units will start including this.
test "(init) Restart killed instances" {
proc restart_killed_instances {} {
foreach type {redis sentinel} {
foreach_${type}_id id {
if {[get_instance_attrib $type $id pid] == -1} {
@ -12,6 +12,10 @@ test "(init) Restart killed instances" {
}
}
test "(init) Restart killed instances" {
restart_killed_instances
}
test "(init) Remove old master entry from sentinels" {
foreach_sentinel_id id {
catch {S $id SENTINEL REMOVE mymaster}
@ -37,6 +41,8 @@ test "(init) Sentinels can start monitoring a master" {
S $id SENTINEL SET mymaster down-after-milliseconds 2000
S $id SENTINEL SET mymaster failover-timeout 20000
S $id SENTINEL SET mymaster parallel-syncs 10
S $id SENTINEL SET mymaster notification-script ../../tests/includes/notify.sh
S $id SENTINEL SET mymaster client-reconfig-script ../../tests/includes/notify.sh
}
}

View File

@ -0,0 +1,21 @@
#!/usr/bin/env bash
OS=`uname -s`
if [ ${OS} != "Linux" ]
then
exit 0
fi
# fd 3 is meant to catch the actual access to /proc/pid/fd,
# in case there's an fd leak by the sentinel,
# it can take 3, but then the access to /proc will take another fd, and we'll catch that.
leaked_fd_count=`ls /proc/self/fd | grep -vE '^[0|1|2|3]$' | wc -l`
if [ $leaked_fd_count -gt 0 ]
then
sentinel_fd_leaks_file="../sentinel_fd_leaks"
if [ ! -f $sentinel_fd_leaks_file ]
then
ls -l /proc/self/fd | cat >> $sentinel_fd_leaks_file
lsof -p $$ | cat >> $sentinel_fd_leaks_file
fi
fi

View File

@ -0,0 +1,11 @@
# assume master is down after being unresponsive for 20s
sentinel down-after-milliseconds setmaster 20000
# reconfigure one slave at a time
sentinel parallel-syncs setmaster 2
# wait for 4m before assuming failover went wrong
sentinel failover-timeout setmaster 240000
# monitoring set
sentinel monitor setmaster 10.0.0.1 30000 2

View File

@ -0,0 +1,18 @@
test "(start-init) Flush config and compare rewrite config file lines" {
foreach_sentinel_id id {
assert_match "OK" [S $id SENTINEL FLUSHCONFIG]
set file1 ../tests/includes/sentinel.conf
set file2 [file join "sentinel_${id}" "sentinel.conf"]
set fh1 [open $file1 r]
set fh2 [open $file2 r]
while {[gets $fh1 line1]} {
if {[gets $fh2 line2]} {
assert [string equal $line1 $line2]
} else {
fail "sentinel config file rewrite sequence changed"
}
}
close $fh1
close $fh2
}
}

View File

@ -244,6 +244,7 @@ proc ::redis::redis_read_reply {id fd} {
_ {redis_read_null $fd}
: -
+ {redis_read_line $fd}
, {expr {double([redis_read_line $fd])}}
- {return -code error [redis_read_line $fd]}
$ {redis_bulk_read $fd}
> -

View File

@ -152,20 +152,48 @@ proc server_is_up {host port retrynum} {
return 0
}
# Check if current ::tags match requested tags. If ::allowtags are used,
# there must be some intersection. If ::denytags are used, no intersection
# is allowed. Returns 1 if tags are acceptable or 0 otherwise, in which
# case err_return names a return variable for the message to be logged.
proc tags_acceptable {err_return} {
upvar $err_return err
# If tags are whitelisted, make sure there's match
if {[llength $::allowtags] > 0} {
set matched 0
foreach tag $::allowtags {
if {[lsearch $::tags $tag] >= 0} {
incr matched
}
}
if {$matched < 1} {
set err "Tag: none of the tags allowed"
return 0
}
}
foreach tag $::denytags {
if {[lsearch $::tags $tag] >= 0} {
set err "Tag: $tag denied"
return 0
}
}
return 1
}
# doesn't really belong here, but highly coupled to code in start_server
proc tags {tags code} {
# If we 'tags' contain multiple tags, quoted and seperated by spaces,
# we want to get rid of the quotes in order to have a proper list
set tags [string map { \" "" } $tags]
set ::tags [concat $::tags $tags]
# We skip unwanted tags
foreach tag $::denytags {
if {[lsearch $::tags $tag] >= 0} {
incr ::num_aborted
send_data_packet $::test_server_fd ignore "Tag: $tag"
set ::tags [lrange $::tags 0 end-[llength $tags]]
return
}
if {![tags_acceptable err]} {
incr ::num_aborted
send_data_packet $::test_server_fd ignore $err
set ::tags [lrange $::tags 0 end-[llength $tags]]
return
}
uplevel 1 $code
set ::tags [lrange $::tags 0 end-[llength $tags]]
@ -267,13 +295,11 @@ proc start_server {options {code undefined}} {
}
# We skip unwanted tags
foreach tag $::denytags {
if {[lsearch $::tags $tag] >= 0} {
incr ::num_aborted
send_data_packet $::test_server_fd ignore "Tag: $tag"
set ::tags [lrange $::tags 0 end-[llength $tags]]
return
}
if {![tags_acceptable err]} {
incr ::num_aborted
send_data_packet $::test_server_fd ignore $err
set ::tags [lrange $::tags 0 end-[llength $tags]]
return
}
# If we are running against an external server, we just push the

View File

@ -12,7 +12,11 @@ proc randstring {min max {type binary}} {
set maxval 52
}
while {$len} {
append output [format "%c" [expr {$minval+int(rand()*($maxval-$minval+1))}]]
set rr [expr {$minval+int(rand()*($maxval-$minval+1))}]
if {$type eq {alpha} && $rr eq 92} {
set rr 90; # avoid putting '\' char in the string, it can mess up TCL processing
}
append output [format "%c" $rr]
incr len -1
}
return $output
@ -86,12 +90,10 @@ proc waitForBgrewriteaof r {
}
proc wait_for_sync r {
while 1 {
if {[status $r master_link_status] eq "down"} {
after 10
} else {
break
}
wait_for_condition 50 100 {
[status $r master_link_status] eq "up"
} else {
fail "replica didn't sync in time"
}
}
@ -571,8 +573,8 @@ proc generate_fuzzy_traffic_on_key {key duration} {
# Commands per type, blocking commands removed
# TODO: extract these from help.h or elsewhere, and improve to include other types
set string_commands {APPEND BITCOUNT BITFIELD BITOP BITPOS DECR DECRBY GET GETBIT GETRANGE GETSET INCR INCRBY INCRBYFLOAT MGET MSET MSETNX PSETEX SET SETBIT SETEX SETNX SETRANGE STRALGO STRLEN}
set hash_commands {HDEL HEXISTS HGET HGETALL HINCRBY HINCRBYFLOAT HKEYS HLEN HMGET HMSET HSCAN HSET HSETNX HSTRLEN HVALS}
set zset_commands {ZADD ZCARD ZCOUNT ZINCRBY ZINTERSTORE ZLEXCOUNT ZPOPMAX ZPOPMIN ZRANGE ZRANGEBYLEX ZRANGEBYSCORE ZRANK ZREM ZREMRANGEBYLEX ZREMRANGEBYRANK ZREMRANGEBYSCORE ZREVRANGE ZREVRANGEBYLEX ZREVRANGEBYSCORE ZREVRANK ZSCAN ZSCORE ZUNIONSTORE}
set hash_commands {HDEL HEXISTS HGET HGETALL HINCRBY HINCRBYFLOAT HKEYS HLEN HMGET HMSET HSCAN HSET HSETNX HSTRLEN HVALS HRANDFIELD}
set zset_commands {ZADD ZCARD ZCOUNT ZINCRBY ZINTERSTORE ZLEXCOUNT ZPOPMAX ZPOPMIN ZRANGE ZRANGEBYLEX ZRANGEBYSCORE ZRANK ZREM ZREMRANGEBYLEX ZREMRANGEBYRANK ZREMRANGEBYSCORE ZREVRANGE ZREVRANGEBYLEX ZREVRANGEBYSCORE ZREVRANK ZSCAN ZSCORE ZUNIONSTORE ZRANDMEMBER}
set list_commands {LINDEX LINSERT LLEN LPOP LPOS LPUSH LPUSHX LRANGE LREM LSET LTRIM RPOP RPOPLPUSH RPUSH RPUSHX}
set set_commands {SADD SCARD SDIFF SDIFFSTORE SINTER SINTERSTORE SISMEMBER SMEMBERS SMOVE SPOP SRANDMEMBER SREM SSCAN SUNION SUNIONSTORE}
set stream_commands {XACK XADD XCLAIM XDEL XGROUP XINFO XLEN XPENDING XRANGE XREAD XREADGROUP XREVRANGE XTRIM}

View File

@ -52,6 +52,7 @@ set ::all_tests {
integration/psync2
integration/psync2-reg
integration/psync2-pingoff
integration/failover
integration/redis-cli
integration/redis-benchmark
unit/pubsub
@ -717,6 +718,7 @@ if {[llength $filtered_tests] < [llength $::all_tests]} {
}
proc attach_to_replication_stream {} {
r config set repl-ping-replica-period 3600
if {$::tls} {
set s [::tls::socket [srv 0 "host"] [srv 0 "port"]]
} else {
@ -774,6 +776,7 @@ proc assert_replication_stream {s patterns} {
proc close_replication_stream {s} {
close $s
r config set repl-ping-replica-period 10
}
# With the parallel test running multiple Redis instances at the same time

View File

@ -12,7 +12,7 @@ start_server {tags {"dump"}} {
r del foo
r restore foo 5000 $encoded
set ttl [r pttl foo]
assert {$ttl >= 3000 && $ttl <= 5000}
assert_range $ttl 3000 5000
r get foo
} {bar}
@ -22,7 +22,7 @@ start_server {tags {"dump"}} {
r del foo
r restore foo 2569591501 $encoded
set ttl [r pttl foo]
assert {$ttl >= (2569591501-3000) && $ttl <= 2569591501}
assert_range $ttl (2569591501-3000) 2569591501
r get foo
} {bar}
@ -33,7 +33,7 @@ start_server {tags {"dump"}} {
set now [clock milliseconds]
r restore foo [expr $now+3000] $encoded absttl
set ttl [r pttl foo]
assert {$ttl >= 2900 && $ttl <= 3100}
assert_range $ttl 2000 3100
r get foo
} {bar}

View File

@ -209,19 +209,101 @@ start_server {tags {"expire"}} {
set e
} {*not an integer*}
test {SET - use EX/PX option, TTL should not be reseted after loadaof} {
r config set appendonly yes
r set foo bar EX 100
after 2000
r debug loadaof
set ttl [r ttl foo]
assert {$ttl <= 98 && $ttl > 90}
test {EXPIRE and SET/GETEX EX/PX/EXAT/PXAT option, TTL should not be reset after loadaof} {
# This test makes sure that expire times are propagated as absolute
# times to the AOF file and not as relative time, so that when the AOF
# is reloaded the TTLs are not being shifted forward to the future.
# We want the time to logically pass when the server is restarted!
r config set appendonly yes
r set foo1 bar EX 100
r set foo2 bar PX 100000
r set foo3 bar
r set foo4 bar
r expire foo3 100
r pexpire foo4 100000
r setex foo5 100 bar
r psetex foo6 100000 bar
r set foo7 bar EXAT [expr [clock seconds] + 100]
r set foo8 bar PXAT [expr [clock milliseconds] + 100000]
r set foo9 bar
r getex foo9 EX 100
r set foo10 bar
r getex foo10 PX 100000
r set foo11 bar
r getex foo11 EXAT [expr [clock seconds] + 100]
r set foo12 bar
r getex foo12 PXAT [expr [clock milliseconds] + 100000]
r set foo bar PX 100000
after 2000
r debug loadaof
set ttl [r ttl foo]
assert {$ttl <= 98 && $ttl > 90}
assert_range [r ttl foo1] 90 98
assert_range [r ttl foo2] 90 98
assert_range [r ttl foo3] 90 98
assert_range [r ttl foo4] 90 98
assert_range [r ttl foo5] 90 98
assert_range [r ttl foo6] 90 98
assert_range [r ttl foo7] 90 98
assert_range [r ttl foo8] 90 98
assert_range [r ttl foo9] 90 98
assert_range [r ttl foo10] 90 98
assert_range [r ttl foo11] 90 98
assert_range [r ttl foo12] 90 98
}
test {EXPIRE relative and absolute propagation to replicas} {
# Make sure that relative and absolute expire commands are propagated
# "as is" to replicas.
# We want replicas to honor the same high level contract of expires that
# the master has, that is, we want the time to be counted logically
# starting from the moment the write was received. This usually provides
# the most coherent behavior from the point of view of the external
# users, with TTLs that are similar from the POV of the external observer.
#
# This test is here to stop some innocent / eager optimization or cleanup
# from doing the wrong thing without proper discussion, see:
# https://github.com/redis/redis/pull/5171#issuecomment-409553266
set repl [attach_to_replication_stream]
r set foo1 bar ex 200
r set foo1 bar px 100000
r set foo1 bar exat [expr [clock seconds]+100]
r set foo1 bar pxat [expr [clock milliseconds]+10000]
r setex foo1 100 bar
r psetex foo1 100000 bar
r set foo2 bar
r expire foo2 100
r pexpire foo2 100000
r set foo3 bar
r expireat foo3 [expr [clock seconds]+100]
r pexpireat foo3 [expr [clock seconds]*1000+100000]
r expireat foo3 [expr [clock seconds]-100]
r set foo4 bar
r getex foo4 ex 200
r getex foo4 px 200000
r getex foo4 exat [expr [clock seconds]+100]
r getex foo4 pxat [expr [clock milliseconds]+10000]
assert_replication_stream $repl {
{select *}
{set foo1 bar PX 200000}
{set foo1 bar PX 100000}
{set foo1 bar PXAT *}
{set foo1 bar PXAT *}
{set foo1 bar PX 100000}
{set foo1 bar PX 100000}
{set foo2 bar}
{expire foo2 100}
{pexpire foo2 100000}
{set foo3 bar}
{expireat foo3 *}
{pexpireat foo3 *}
{del foo3}
{set foo4 bar}
{pexpire foo4 200000}
{pexpire foo4 200000}
{pexpireat foo4 *}
{pexpireat foo4 *}
}
}
test {SET command will remove expire} {
@ -246,4 +328,32 @@ start_server {tags {"expire"}} {
set ttl [r ttl foo]
assert {$ttl <= 98 && $ttl > 90}
}
test {GETEX use of PERSIST option should remove TTL} {
r set foo bar EX 100
r getex foo PERSIST
r ttl foo
} {-1}
test {GETEX use of PERSIST option should remove TTL after loadaof} {
r set foo bar EX 100
r getex foo PERSIST
after 2000
r debug loadaof
r ttl foo
} {-1}
test {GETEX propagate as to replica as PERSIST, DEL, or nothing} {
set repl [attach_to_replication_stream]
r set foo bar EX 100
r getex foo PERSIST
r getex foo
r getex foo exat [expr [clock seconds]-100]
assert_replication_stream $repl {
{select *}
{set foo bar PX 100000}
{persist foo}
{del foo}
}
}
}

View File

@ -112,6 +112,7 @@ start_server {tags {"introspection"}} {
bio_cpulist
aof_rewrite_cpulist
bgsave_cpulist
set-proc-title
}
if {!$::tls} {

View File

@ -1,4 +1,4 @@
start_server {tags {"limits"} overrides {maxclients 10}} {
start_server {tags {"limits network"} overrides {maxclients 10}} {
if {$::tls} {
set expected_code "*I/O error*"
} else {

View File

@ -0,0 +1,67 @@
set testmodule [file normalize tests/modules/blockonbackground.so]
source tests/support/util.tcl
start_server {tags {"modules"}} {
r module load $testmodule
test { blocked clients time tracking - check blocked command that uses RedisModule_BlockedClientMeasureTimeStart() is tracking background time} {
r slowlog reset
r config set slowlog-log-slower-than 200000
assert_equal [r slowlog len] 0
r block.debug 0 10000
assert_equal [r slowlog len] 0
r config resetstat
r block.debug 200 10000
assert_equal [r slowlog len] 1
set cmdstatline [cmdrstat block.debug r]
regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call
assert {$usec >= 100000}
assert {$usec_per_call >= 100000}
}
test { blocked clients time tracking - check blocked command that uses RedisModule_BlockedClientMeasureTimeStart() is tracking background time even in timeout } {
r slowlog reset
r config set slowlog-log-slower-than 200000
assert_equal [r slowlog len] 0
r block.debug 0 20000
assert_equal [r slowlog len] 0
r config resetstat
r block.debug 20000 200
assert_equal [r slowlog len] 1
set cmdstatline [cmdrstat block.debug r]
regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call
assert {$usec >= 100000}
assert {$usec_per_call >= 100000}
}
test { blocked clients time tracking - check blocked command with multiple calls RedisModule_BlockedClientMeasureTimeStart() is tracking the total background time } {
r slowlog reset
r config set slowlog-log-slower-than 200000
assert_equal [r slowlog len] 0
r block.double_debug 0
assert_equal [r slowlog len] 0
r config resetstat
r block.double_debug 100
assert_equal [r slowlog len] 1
set cmdstatline [cmdrstat block.double_debug r]
regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call
assert {$usec >= 60000}
assert {$usec_per_call >= 60000}
}
test { blocked clients time tracking - check blocked command without calling RedisModule_BlockedClientMeasureTimeStart() is not reporting background time } {
r slowlog reset
r config set slowlog-log-slower-than 200000
assert_equal [r slowlog len] 0
r block.debug_no_track 200 1000
# ensure slowlog is still empty
assert_equal [r slowlog len] 0
}
}

View File

@ -168,7 +168,7 @@ start_server {tags {"modules"}} {
assert_error "*unblocked*" {$rd read}
}
test {Module client blocked on keys does not wake up on wrong type} {
test {Module client re-blocked on keys after woke up on wrong type} {
r del k
set rd [redis_deferring_client]
$rd fsl.bpop k 0
@ -184,5 +184,56 @@ start_server {tags {"modules"}} {
r del k
r fsl.push k 34
assert_equal {34} [$rd read]
assert_equal {1} [r get fsl_wrong_type] ;# first lpush caused one wrong-type wake-up
}
test {Module client blocked on keys woken up by LPUSH} {
r del k
set rd [redis_deferring_client]
$rd blockonkeys.popall k
# wait until client is actually blocked
wait_for_condition 50 100 {
[s 0 blocked_clients] eq {1}
} else {
fail "Client is not blocked"
}
r lpush k 42 squirrel banana
assert_equal {banana squirrel 42} [$rd read]
$rd close
}
test {Module client unblocks BLPOP} {
r del k
set rd [redis_deferring_client]
$rd blpop k 3
# wait until client is actually blocked
wait_for_condition 50 100 {
[s 0 blocked_clients] eq {1}
} else {
fail "Client is not blocked"
}
r blockonkeys.lpush k 42
assert_equal {k 42} [$rd read]
$rd close
}
test {Module unblocks module blocked on non-empty list} {
r del k
r lpush k aa
# Module client blocks to pop 5 elements from list
set rd [redis_deferring_client]
$rd blockonkeys.blpopn k 5
# Wait until client is actually blocked
wait_for_condition 50 100 {
[s 0 blocked_clients] eq {1}
} else {
fail "Client is not blocked"
}
# Check that RM_SignalKeyAsReady() can wake up BLPOPN
r blockonkeys.lpush_unblock k bb cc ;# Not enough elements for BLPOPN
r lpush k dd ee ff ;# Doesn't unblock module
r blockonkeys.lpush_unblock k gg ;# Unblocks module
assert_equal {gg ff ee dd cc} [$rd read]
$rd close
}
}

View File

@ -0,0 +1,155 @@
set testmodule [file normalize tests/modules/stream.so]
start_server {tags {"modules"}} {
r module load $testmodule
test {Module stream add and delete} {
r del mystream
# add to empty key
set streamid1 [r stream.add mystream item 1 value a]
# add to existing stream
set streamid2 [r stream.add mystream item 2 value b]
# check result
assert { [string match "*-*" $streamid1] }
set items [r XRANGE mystream - +]
assert_equal $items \
"{$streamid1 {item 1 value a}} {$streamid2 {item 2 value b}}"
# delete one of them and try deleting non-existing ID
assert_equal OK [r stream.delete mystream $streamid1]
assert_error "ERR StreamDelete*" {r stream.delete mystream 123-456}
assert_error "Invalid stream ID*" {r stream.delete mystream foo}
assert_equal "{$streamid2 {item 2 value b}}" [r XRANGE mystream - +]
# check error condition: wrong type
r del mystream
r set mystream mystring
assert_error "ERR StreamAdd*" {r stream.add mystream item 1 value a}
assert_error "ERR StreamDelete*" {r stream.delete mystream 123-456}
}
test {Module stream add unblocks blocking xread} {
r del mystream
# Blocking XREAD on an empty key
set rd1 [redis_deferring_client]
$rd1 XREAD BLOCK 3000 STREAMS mystream $
# wait until client is actually blocked
wait_for_condition 50 100 {
[s 0 blocked_clients] eq {1}
} else {
fail "Client is not blocked"
}
set id [r stream.add mystream field 1 value a]
assert_equal "{mystream {{$id {field 1 value a}}}}" [$rd1 read]
# Blocking XREAD on an existing stream
set rd2 [redis_deferring_client]
$rd2 XREAD BLOCK 3000 STREAMS mystream $
# wait until client is actually blocked
wait_for_condition 50 100 {
[s 0 blocked_clients] eq {1}
} else {
fail "Client is not blocked"
}
set id [r stream.add mystream field 2 value b]
assert_equal "{mystream {{$id {field 2 value b}}}}" [$rd2 read]
}
test {Module stream add benchmark (1M stream add)} {
set n 1000000
r del mystream
set result [r stream.addn mystream $n field value]
assert_equal $result $n
}
test {Module stream iterator} {
r del mystream
set streamid1 [r xadd mystream * item 1 value a]
set streamid2 [r xadd mystream * item 2 value b]
# range result
set result1 [r stream.range mystream "-" "+"]
set expect1 [r xrange mystream "-" "+"]
assert_equal $result1 $expect1
# reverse range
set result_rev [r stream.range mystream "+" "-"]
set expect_rev [r xrevrange mystream "+" "-"]
assert_equal $result_rev $expect_rev
# only one item: range with startid = endid
set result2 [r stream.range mystream "-" $streamid1]
assert_equal $result2 "{$streamid1 {item 1 value a}}"
assert_equal $result2 [list [list $streamid1 {item 1 value a}]]
# only one item: range with startid = endid
set result3 [r stream.range mystream $streamid2 $streamid2]
assert_equal $result3 "{$streamid2 {item 2 value b}}"
assert_equal $result3 [list [list $streamid2 {item 2 value b}]]
}
test {Module stream iterator delete} {
r del mystream
set id1 [r xadd mystream * normal item]
set id2 [r xadd mystream * selfdestruct yes]
set id3 [r xadd mystream * another item]
# stream.range deletes the "selfdestruct" item after returning it
assert_equal \
"{$id1 {normal item}} {$id2 {selfdestruct yes}} {$id3 {another item}}" \
[r stream.range mystream - +]
# now, the "selfdestruct" item is gone
assert_equal \
"{$id1 {normal item}} {$id3 {another item}}" \
[r stream.range mystream - +]
}
test {Module stream trim by length} {
r del mystream
# exact maxlen
r xadd mystream * item 1 value a
r xadd mystream * item 2 value b
r xadd mystream * item 3 value c
assert_equal 3 [r xlen mystream]
assert_equal 0 [r stream.trim mystream maxlen = 5]
assert_equal 3 [r xlen mystream]
assert_equal 2 [r stream.trim mystream maxlen = 1]
assert_equal 1 [r xlen mystream]
assert_equal 1 [r stream.trim mystream maxlen = 0]
# check that there is no limit for exact maxlen
r stream.addn mystream 20000 item x value y
assert_equal 20000 [r stream.trim mystream maxlen = 0]
# approx maxlen (100 items per node implies default limit 10K items)
r stream.addn mystream 20000 item x value y
assert_equal 20000 [r xlen mystream]
assert_equal 10000 [r stream.trim mystream maxlen ~ 2]
assert_equal 9900 [r stream.trim mystream maxlen ~ 2]
assert_equal 0 [r stream.trim mystream maxlen ~ 2]
assert_equal 100 [r xlen mystream]
assert_equal 100 [r stream.trim mystream maxlen ~ 0]
assert_equal 0 [r xlen mystream]
}
test {Module stream trim by ID} {
r del mystream
# exact minid
r xadd mystream * item 1 value a
r xadd mystream * item 2 value b
set minid [r xadd mystream * item 3 value c]
assert_equal 3 [r xlen mystream]
assert_equal 0 [r stream.trim mystream minid = -]
assert_equal 3 [r xlen mystream]
assert_equal 2 [r stream.trim mystream minid = $minid]
assert_equal 1 [r xlen mystream]
assert_equal 1 [r stream.trim mystream minid = +]
# check that there is no limit for exact minid
r stream.addn mystream 20000 item x value y
assert_equal 20000 [r stream.trim mystream minid = +]
# approx minid (100 items per node implies default limit 10K items)
r stream.addn mystream 19980 item x value y
set minid [r xadd mystream * item x value y]
r stream.addn mystream 19 item x value y
assert_equal 20000 [r xlen mystream]
assert_equal 10000 [r stream.trim mystream minid ~ $minid]
assert_equal 9900 [r stream.trim mystream minid ~ $minid]
assert_equal 0 [r stream.trim mystream minid ~ $minid]
assert_equal 100 [r xlen mystream]
assert_equal 100 [r stream.trim mystream minid ~ +]
assert_equal 0 [r xlen mystream]
}
}

View File

@ -39,7 +39,7 @@ if {$system_name eq {linux}} {
r bgsave
set child_pid [get_child_pid 0]
assert {[get_oom_score_adj $child_pid] == [expr $base + 30]}
assert_equal [get_oom_score_adj $child_pid] [expr $base + 30]
}
# Failed oom-score-adj tests can only run unprivileged

View File

@ -321,3 +321,47 @@ start_server {tags {"other"}} {
assert_match "*table size: 8192*" [r debug HTSTATS 9]
}
}
proc read_proc_title {pid} {
set fd [open "/proc/$pid/cmdline" "r"]
set cmdline [read $fd 1024]
close $fd
return $cmdline
}
start_server {tags {"other"}} {
test {Process title set as expected} {
# Test only on Linux where it's easy to get cmdline without relying on tools.
# Skip valgrind as it messes up the arguments.
set os [exec uname]
if {$os == "Linux" && !$::valgrind} {
# Set a custom template
r config set "proc-title-template" "TEST {title} {listen-addr} {port} {tls-port} {unixsocket} {config-file}"
set cmdline [read_proc_title [srv 0 pid]]
assert_equal "TEST" [lindex $cmdline 0]
assert_match "*/redis-server" [lindex $cmdline 1]
if {$::tls} {
set expect_port 0
set expect_tls_port [srv 0 port]
} else {
set expect_port [srv 0 port]
set expect_tls_port 0
}
set port [srv 0 port]
assert_equal "$::host:$port" [lindex $cmdline 2]
assert_equal $expect_port [lindex $cmdline 3]
assert_equal $expect_tls_port [lindex $cmdline 4]
assert_match "*/tests/tmp/server.*/socket" [lindex $cmdline 5]
assert_match "*/tests/tmp/redis.conf.*" [lindex $cmdline 6]
# Try setting a bad template
catch {r config set "proc-title-template" "{invalid-var}"} err
assert_match {*template format is invalid*} $err
}
}
}

View File

@ -1,4 +1,4 @@
start_server {tags {"pause"}} {
start_server {tags {"pause network"}} {
test "Test read commands are not blocked by client pause" {
r client PAUSE 100000000 WRITE
set rd [redis_deferring_client]

View File

@ -1,4 +1,4 @@
start_server {tags {"protocol"}} {
start_server {tags {"protocol network"}} {
test "Handle an empty query" {
reconnect
r write "\r\n"

View File

@ -1,4 +1,4 @@
start_server {tags {"pubsub"}} {
start_server {tags {"pubsub network"}} {
proc __consume_subscribe_messages {client type channels} {
set numsub -1
set counts {}

View File

@ -1,4 +1,4 @@
start_server {tags {"scan"}} {
start_server {tags {"scan network"}} {
test "SCAN basic" {
r flushdb
r debug populate 1000

View File

@ -330,6 +330,15 @@ start_server {tags {"scripting"}} {
set e
} {NOSCRIPT*}
test {SCRIPTING FLUSH ASYNC} {
for {set j 0} {$j < 100} {incr j} {
r script load "return $j"
}
assert { [string match "*number_of_cached_scripts:100*" [r info Memory]] }
r script flush async
assert { [string match "*number_of_cached_scripts:0*" [r info Memory]] }
}
test {SCRIPT EXISTS - can detect already defined scripts?} {
r eval "return 1+1" 0
r script exists a27e7e8a43702b7046d4f6a7ccf5b60cef6b9bd9 a27e7e8a43702b7046d4f6a7ccf5b60cef6b9bda

View File

@ -1,4 +1,4 @@
start_server {tags {"tracking"}} {
start_server {tags {"tracking network"}} {
# Create a deferred client we'll use to redirect invalidation
# messages to.
set rd_redirection [redis_deferring_client]

View File

@ -18,6 +18,181 @@ start_server {tags {"hash"}} {
assert_encoding ziplist smallhash
}
proc create_hash {key entries} {
r del $key
foreach entry $entries {
r hset $key [lindex $entry 0] [lindex $entry 1]
}
}
proc get_keys {l} {
set res {}
foreach entry $l {
set key [lindex $entry 0]
lappend res $key
}
return $res
}
foreach {type contents} "ziplist {{a 1} {b 2} {c 3}} hashtable {{a 1} {b 2} {[randstring 70 90 alpha] 3}}" {
set original_max_value [lindex [r config get hash-max-ziplist-value] 1]
r config set hash-max-ziplist-value 10
create_hash myhash $contents
assert_encoding $type myhash
test "HRANDFIELD - $type" {
unset -nocomplain myhash
array set myhash {}
for {set i 0} {$i < 100} {incr i} {
set key [r hrandfield myhash]
set myhash($key) 1
}
assert_equal [lsort [get_keys $contents]] [lsort [array names myhash]]
}
r config set hash-max-ziplist-value $original_max_value
}
test "HRANDFIELD with RESP3" {
r hello 3
set res [r hrandfield myhash 3 withvalues]
assert_equal [llength $res] 3
assert_equal [llength [lindex $res 1]] 2
set res [r hrandfield myhash 3]
assert_equal [llength $res] 3
assert_equal [llength [lindex $res 1]] 1
}
r hello 2
test "HRANDFIELD count of 0 is handled correctly" {
r hrandfield myhash 0
} {}
test "HRANDFIELD with <count> against non existing key" {
r hrandfield nonexisting_key 100
} {}
foreach {type contents} "
hashtable {{a 1} {b 2} {c 3} {d 4} {e 5} {6 f} {7 g} {8 h} {9 i} {[randstring 70 90 alpha] 10}}
ziplist {{a 1} {b 2} {c 3} {d 4} {e 5} {6 f} {7 g} {8 h} {9 i} {10 j}} " {
test "HRANDFIELD with <count> - $type" {
set original_max_value [lindex [r config get hash-max-ziplist-value] 1]
r config set hash-max-ziplist-value 10
create_hash myhash $contents
assert_encoding $type myhash
# create a dict for easy lookup
unset -nocomplain mydict
foreach {k v} [r hgetall myhash] {
dict append mydict $k $v
}
# We'll stress different parts of the code, see the implementation
# of HRANDFIELD for more information, but basically there are
# four different code paths.
# PATH 1: Use negative count.
# 1) Check that it returns repeated elements with and without values.
set res [r hrandfield myhash -20]
assert_equal [llength $res] 20
# again with WITHVALUES
set res [r hrandfield myhash -20 withvalues]
assert_equal [llength $res] 40
# 2) Check that all the elements actually belong to the original hash.
foreach {key val} $res {
assert {[dict exists $mydict $key]}
}
# 3) Check that eventually all the elements are returned.
# Use both WITHVALUES and without
unset -nocomplain auxset
set iterations 1000
while {$iterations != 0} {
incr iterations -1
if {[expr {$iterations % 2}] == 0} {
set res [r hrandfield myhash -3 withvalues]
foreach {key val} $res {
dict append auxset $key $val
}
} else {
set res [r hrandfield myhash -3]
foreach key $res {
dict append auxset $key $val
}
}
if {[lsort [dict keys $mydict]] eq
[lsort [dict keys $auxset]]} {
break;
}
}
assert {$iterations != 0}
# PATH 2: positive count (unique behavior) with requested size
# equal or greater than set size.
foreach size {10 20} {
set res [r hrandfield myhash $size]
assert_equal [llength $res] 10
assert_equal [lsort $res] [lsort [dict keys $mydict]]
# again with WITHVALUES
set res [r hrandfield myhash $size withvalues]
assert_equal [llength $res] 20
assert_equal [lsort $res] [lsort $mydict]
}
# PATH 3: Ask almost as elements as there are in the set.
# In this case the implementation will duplicate the original
# set and will remove random elements up to the requested size.
#
# PATH 4: Ask a number of elements definitely smaller than
# the set size.
#
# We can test both the code paths just changing the size but
# using the same code.
foreach size {8 2} {
set res [r hrandfield myhash $size]
assert_equal [llength $res] $size
# again with WITHVALUES
set res [r hrandfield myhash $size withvalues]
assert_equal [llength $res] [expr {$size * 2}]
# 1) Check that all the elements actually belong to the
# original set.
foreach ele [dict keys $res] {
assert {[dict exists $mydict $ele]}
}
# 2) Check that eventually all the elements are returned.
# Use both WITHVALUES and without
unset -nocomplain auxset
set iterations 1000
while {$iterations != 0} {
incr iterations -1
if {[expr {$iterations % 2}] == 0} {
set res [r hrandfield myhash $size withvalues]
foreach {key value} $res {
dict append auxset $key $value
}
} else {
set res [r hrandfield myhash $size]
foreach key $res {
dict append auxset $key
}
}
if {[lsort [dict keys $mydict]] eq
[lsort [dict keys $auxset]]} {
break;
}
}
assert {$iterations != 0}
}
}
r config set hash-max-ziplist-value $original_max_value
}
test {HSET/HLEN - Big hash creation} {
array set bighash {}
for {set i 0} {$i < 1024} {incr i} {

View File

@ -501,7 +501,7 @@ start_server {
set iterations 1000
while {$iterations != 0} {
incr iterations -1
set res [r srandmember myset -10]
set res [r srandmember myset $size]
foreach ele $res {
set auxset($ele) 1
}

View File

@ -102,6 +102,91 @@ start_server {tags {"string"}} {
assert_equal 20 [r get x]
}
test "GETEX EX option" {
r del foo
r set foo bar
r getex foo ex 10
assert_range [r ttl foo] 5 10
}
test "GETEX PX option" {
r del foo
r set foo bar
r getex foo px 10000
assert_range [r pttl foo] 5000 10000
}
test "GETEX EXAT option" {
r del foo
r set foo bar
r getex foo exat [expr [clock seconds] + 10]
assert_range [r ttl foo] 5 10
}
test "GETEX PXAT option" {
r del foo
r set foo bar
r getex foo pxat [expr [clock milliseconds] + 10000]
assert_range [r pttl foo] 5000 10000
}
test "GETEX PERSIST option" {
r del foo
r set foo bar ex 10
assert_range [r ttl foo] 5 10
r getex foo persist
assert_equal -1 [r ttl foo]
}
test "GETEX no option" {
r del foo
r set foo bar
r getex foo
assert_equal bar [r getex foo]
}
test "GETEX syntax errors" {
set ex {}
catch {r getex foo non-existent-option} ex
set ex
} {*syntax*}
test "GETEX no arguments" {
set ex {}
catch {r getex} ex
set ex
} {*wrong number of arguments*}
test "GETDEL command" {
r del foo
r set foo bar
assert_equal bar [r getdel foo ]
assert_equal {} [r getdel foo ]
}
test {GETDEL propagate as DEL command to replica} {
set repl [attach_to_replication_stream]
r set foo bar
r getdel foo
assert_replication_stream $repl {
{select *}
{set foo bar}
{del foo}
}
}
test {GETEX without argument does not propagate to replica} {
set repl [attach_to_replication_stream]
r set foo bar
r getex foo
r del foo
assert_replication_stream $repl {
{select *}
{set foo bar}
{del foo}
}
}
test {MGET} {
r flushdb
r set foo BAR
@ -437,6 +522,17 @@ start_server {tags {"string"}} {
assert {$ttl <= 10 && $ttl > 5}
}
test "Extended SET EXAT option" {
r del foo
r set foo bar exat [expr [clock seconds] + 10]
assert_range [r ttl foo] 5 10
}
test "Extended SET PXAT option" {
r del foo
r set foo bar pxat [expr [clock milliseconds] + 10000]
assert_range [r ttl foo] 5 10
}
test {Extended SET using multiple options at once} {
r set foo val
assert {[r set foo bar xx px 10000] eq {OK}}

View File

@ -7,6 +7,8 @@ start_server {tags {"zset"}} {
}
proc basics {encoding} {
set original_max_entries [lindex [r config get zset-max-ziplist-entries] 1]
set original_max_value [lindex [r config get zset-max-ziplist-value] 1]
if {$encoding == "ziplist"} {
r config set zset-max-ziplist-entries 128
r config set zset-max-ziplist-value 64
@ -713,6 +715,12 @@ start_server {tags {"zset"}} {
assert_equal {b 3 c 5} [r zinter 2 zseta zsetb withscores]
}
test "ZINTER RESP3 - $encoding" {
r hello 3
assert_equal {{b 3.0} {c 5.0}} [r zinter 2 zseta zsetb withscores]
}
r hello 2
test "ZINTERSTORE with weights - $encoding" {
assert_equal 2 [r zinterstore zsetc 2 zseta zsetb weights 2 3]
assert_equal {b 7 c 12} [r zrange zsetc 0 -1 withscores]
@ -919,6 +927,9 @@ start_server {tags {"zset"}} {
assert_equal 0 [r zcard z1]
assert_equal 1 [r zcard z2]
}
r config set zset-max-ziplist-entries $original_max_entries
r config set zset-max-ziplist-value $original_max_value
}
basics ziplist
@ -1016,6 +1027,8 @@ start_server {tags {"zset"}} {
}
proc stressers {encoding} {
set original_max_entries [lindex [r config get zset-max-ziplist-entries] 1]
set original_max_value [lindex [r config get zset-max-ziplist-value] 1]
if {$encoding == "ziplist"} {
# Little extra to allow proper fuzzing in the sorting stresser
r config set zset-max-ziplist-entries 256
@ -1440,6 +1453,8 @@ start_server {tags {"zset"}} {
r zadd zset 0 foo
assert_equal {zset foo 0} [$rd read]
}
r config set zset-max-ziplist-entries $original_max_entries
r config set zset-max-ziplist-value $original_max_value
}
tags {"slow"} {
@ -1481,6 +1496,12 @@ start_server {tags {"zset"}} {
r zrange z2 0 -1 withscores
} {a 1 b 2 c 3 d 4}
test {ZRANGESTORE RESP3} {
r hello 3
r zrange z2 0 -1 withscores
} {{a 1.0} {b 2.0} {c 3.0} {d 4.0}}
r hello 2
test {ZRANGESTORE range} {
set res [r zrangestore z2 z1 1 2]
assert_equal $res 2
@ -1554,4 +1575,171 @@ start_server {tags {"zset"}} {
catch {r zrangebyscore z1 0 -1 REV} err
assert_match "*syntax*" $err
}
proc get_keys {l} {
set res {}
foreach {score key} $l {
lappend res $key
}
return $res
}
foreach {type contents} "ziplist {1 a 2 b 3 c} skiplist {1 a 2 b 3 [randstring 70 90 alpha]}" {
set original_max_value [lindex [r config get zset-max-ziplist-value] 1]
r config set zset-max-ziplist-value 10
create_zset myzset $contents
assert_encoding $type myzset
test "ZRANDMEMBER - $type" {
unset -nocomplain myzset
array set myzset {}
for {set i 0} {$i < 100} {incr i} {
set key [r zrandmember myzset]
set myzset($key) 1
}
assert_equal [lsort [get_keys $contents]] [lsort [array names myzset]]
}
r config set zset-max-ziplist-value $original_max_value
}
test "ZRANDMEMBER with RESP3" {
r hello 3
set res [r zrandmember myzset 3 withscores]
assert_equal [llength $res] 3
assert_equal [llength [lindex $res 1]] 2
set res [r zrandmember myzset 3]
assert_equal [llength $res] 3
assert_equal [llength [lindex $res 1]] 1
}
r hello 2
test "ZRANDMEMBER count of 0 is handled correctly" {
r zrandmember myzset 0
} {}
test "ZRANDMEMBER with <count> against non existing key" {
r zrandmember nonexisting_key 100
} {}
foreach {type contents} "
skiplist {1 a 2 b 3 c 4 d 5 e 6 f 7 g 7 h 9 i 10 [randstring 70 90 alpha]}
ziplist {1 a 2 b 3 c 4 d 5 e 6 f 7 g 7 h 9 i 10 j} " {
test "ZRANDMEMBER with <count> - $type" {
set original_max_value [lindex [r config get zset-max-ziplist-value] 1]
r config set zset-max-ziplist-value 10
create_zset myzset $contents
assert_encoding $type myzset
# create a dict for easy lookup
unset -nocomplain mydict
foreach {k v} [r zrange myzset 0 -1 withscores] {
dict append mydict $k $v
}
# We'll stress different parts of the code, see the implementation
# of ZRANDMEMBER for more information, but basically there are
# four different code paths.
# PATH 1: Use negative count.
# 1) Check that it returns repeated elements with and without values.
set res [r zrandmember myzset -20]
assert_equal [llength $res] 20
# again with WITHSCORES
set res [r zrandmember myzset -20 withscores]
assert_equal [llength $res] 40
# 2) Check that all the elements actually belong to the original zset.
foreach {key val} $res {
assert {[dict exists $mydict $key]}
}
# 3) Check that eventually all the elements are returned.
# Use both WITHSCORES and without
unset -nocomplain auxset
set iterations 1000
while {$iterations != 0} {
incr iterations -1
if {[expr {$iterations % 2}] == 0} {
set res [r zrandmember myzset -3 withscores]
foreach {key val} $res {
dict append auxset $key $val
}
} else {
set res [r zrandmember myzset -3]
foreach key $res {
dict append auxset $key $val
}
}
if {[lsort [dict keys $mydict]] eq
[lsort [dict keys $auxset]]} {
break;
}
}
assert {$iterations != 0}
# PATH 2: positive count (unique behavior) with requested size
# equal or greater than set size.
foreach size {10 20} {
set res [r zrandmember myzset $size]
assert_equal [llength $res] 10
assert_equal [lsort $res] [lsort [dict keys $mydict]]
# again with WITHSCORES
set res [r zrandmember myzset $size withscores]
assert_equal [llength $res] 20
assert_equal [lsort $res] [lsort $mydict]
}
# PATH 3: Ask almost as elements as there are in the set.
# In this case the implementation will duplicate the original
# set and will remove random elements up to the requested size.
#
# PATH 4: Ask a number of elements definitely smaller than
# the set size.
#
# We can test both the code paths just changing the size but
# using the same code.
foreach size {8 2} {
set res [r zrandmember myzset $size]
assert_equal [llength $res] $size
# again with WITHSCORES
set res [r zrandmember myzset $size withscores]
assert_equal [llength $res] [expr {$size * 2}]
# 1) Check that all the elements actually belong to the
# original set.
foreach ele [dict keys $res] {
assert {[dict exists $mydict $ele]}
}
# 2) Check that eventually all the elements are returned.
# Use both WITHSCORES and without
unset -nocomplain auxset
set iterations 1000
while {$iterations != 0} {
incr iterations -1
if {[expr {$iterations % 2}] == 0} {
set res [r zrandmember myzset $size withscores]
foreach {key value} $res {
dict append auxset $key $value
}
} else {
set res [r zrandmember myzset $size]
foreach key $res {
dict append auxset $key
}
}
if {[lsort [dict keys $mydict]] eq
[lsort [dict keys $auxset]]} {
break;
}
}
assert {$iterations != 0}
}
}
r config set zset-max-ziplist-value $original_max_value
}
}

View File

@ -1,6 +1,6 @@
source tests/support/cli.tcl
start_server {tags {"wait"}} {
start_server {tags {"wait network"}} {
start_server {} {
set slave [srv 0 client]
set slave_host [srv 0 host]