Merge branch 'Multithread' into unstable

Former-commit-id: 8c074a7d013638e920606d139a6f1c4ff43c6511
This commit is contained in:
John Sully 2019-02-23 02:24:23 -05:00
commit 677391bd77
60 changed files with 3166 additions and 853 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
.*.swp
core
*.o
*.log
dump.rdb

54
.vscode/settings.json vendored
View File

@ -1,6 +1,56 @@
{
"files.associations": {
"zmalloc.h": "c",
"stat.h": "c"
"stat.h": "c",
"array": "cpp",
"atomic": "cpp",
"*.tcc": "cpp",
"cctype": "cpp",
"chrono": "cpp",
"clocale": "cpp",
"cmath": "cpp",
"condition_variable": "cpp",
"cstdarg": "cpp",
"cstddef": "cpp",
"cstdint": "cpp",
"cstdio": "cpp",
"cstdlib": "cpp",
"cstring": "cpp",
"ctime": "cpp",
"cwchar": "cpp",
"cwctype": "cpp",
"deque": "cpp",
"list": "cpp",
"unordered_map": "cpp",
"vector": "cpp",
"exception": "cpp",
"fstream": "cpp",
"functional": "cpp",
"future": "cpp",
"initializer_list": "cpp",
"iomanip": "cpp",
"iosfwd": "cpp",
"iostream": "cpp",
"istream": "cpp",
"limits": "cpp",
"memory": "cpp",
"mutex": "cpp",
"new": "cpp",
"numeric": "cpp",
"optional": "cpp",
"ostream": "cpp",
"ratio": "cpp",
"scoped_allocator": "cpp",
"sstream": "cpp",
"stdexcept": "cpp",
"streambuf": "cpp",
"string_view": "cpp",
"system_error": "cpp",
"thread": "cpp",
"cinttypes": "cpp",
"tuple": "cpp",
"type_traits": "cpp",
"typeinfo": "cpp",
"utility": "cpp"
}
}
}

View File

@ -291,6 +291,17 @@ dir ./
# refuse the replica request.
#
# masterauth <master-password>
#
# However this is not enough if you are using Redis ACLs (for Redis version
# 6 or greater), and the default user is not capable of running the PSYNC
# command and/or other commands needed for replication. In this case it's
# better to configure a special user to use with replication, and specify the
# masteruser configuration as such:
#
# masteruser <username>
#
# When masteruser is specified, the replica will authenticate against its
# master using the new AUTH form: AUTH <username> <password>.
# When a replica loses its connection with the master, or when the replication
# is still in progress, the replica can act in two different ways:
@ -501,6 +512,94 @@ replica-priority 100
# can be easily a long string from /dev/urandom or whatever, so by using a
# long and unguessable password no brute force attack will be possible.
# Redis ACL users are defined in the following format:
#
# user <username> ... acl rules ...
#
# For example:
#
# user worker +@list +@connection ~jobs:* on >ffa9203c493aa99
#
# The special username "default" is used for new connections. If this user
# has the "nopass" rule, then new connections will be immediately authenticated
# as the "default" user without the need of any password provided via the
# AUTH command. Otherwise if the "default" user is not flagged with "nopass"
# the connections will start in not authenticated state, and will require
# AUTH (or the HELLO command AUTH option) in order to be authenticated and
# start to work.
#
# The ACL rules that describe what an user can do are the following:
#
# on Enable the user: it is possible to authenticate as this user.
# off Disable the user: it's no longer possible to authenticate
# with this user, however the already authenticated connections
# will still work.
# +<command> Allow the execution of that command
# -<command> Disallow the execution of that command
# +@<category> Allow the execution of all the commands in such category
# with valid categories are like @admin, @set, @sortedset, ...
# and so forth, see the full list in the server.c file where
# the Redis command table is described and defined.
# The special category @all means all the commands, but currently
# present in the server, and that will be loaded in the future
# via modules.
# +<command>|subcommand Allow a specific subcommand of an otherwise
# disabled command. Note that this form is not
# allowed as negative like -DEBUG|SEGFAULT, but
# only additive starting with "+".
# allcommands Alias for +@all. Note that it implies the ability to execute
# all the future commands loaded via the modules system.
# nocommands Alias for -@all.
# ~<pattern> Add a pattern of keys that can be mentioned as part of
# commands. For instance ~* allows all the keys. The pattern
# is a glob-style pattern like the one of KEYS.
# It is possible to specify multiple patterns.
# allkeys Alias for ~*
# resetkeys Flush the list of allowed keys patterns.
# ><password> Add this passowrd to the list of valid password for the user.
# For example >mypass will add "mypass" to the list.
# This directive clears the "nopass" flag (see later).
# <<password> Remove this password from the list of valid passwords.
# nopass All the set passwords of the user are removed, and the user
# is flagged as requiring no password: it means that every
# password will work against this user. If this directive is
# used for the default user, every new connection will be
# immediately authenticated with the default user without
# any explicit AUTH command required. Note that the "resetpass"
# directive will clear this condition.
# resetpass Flush the list of allowed passwords. Moreover removes the
# "nopass" status. After "resetpass" the user has no associated
# passwords and there is no way to authenticate without adding
# some password (or setting it as "nopass" later).
# reset Performs the following actions: resetpass, resetkeys, off,
# -@all. The user returns to the same state it has immediately
# after its creation.
#
# ACL rules can be specified in any order: for instance you can start with
# passwords, then flags, or key patterns. However note that the additive
# and subtractive rules will CHANGE MEANING depending on the ordering.
# For instance see the following example:
#
# user alice on +@all -DEBUG ~* >somepassword
#
# This will allow "alice" to use all the commands with the exception of the
# DEBUG command, since +@all added all the commands to the set of the commands
# alice can use, and later DEBUG was removed. However if we invert the order
# of two ACL rules the result will be different:
#
# user alice on -DEBUG +@all ~* >somepassword
#
# Now DEBUG was removed when alice had yet no commands in the set of allowed
# commands, later all the commands are added, so the user will be able to
# execute everything.
#
# Basically ACL rules are processed left-to-right.
#
# For more information about ACL configuration please refer to
# the Redis web site at https://redis.io/topics/acl
# Using an external ACL file
#
# Instead of configuring users here in this file, it is possible to use
# a stand-alone file just listing users. The two methods cannot be mixed:
# if you configure users here and at the same time you activate the exteranl
@ -1399,3 +1498,8 @@ rdb-save-incremental-fsync yes
# reduces memory requirements by storing rarely accessed data on disk
# instead of RAM. A temporary file will be created in this directory.
# scratch-file-path /tmp/
# Number of worker threads serving requests. This number should be related to the performance
# of your network hardware, not the number of cores on your machine. We don't recommend going
# above 4 at this time. By default this is set 1.
server-threads 2

View File

@ -21,7 +21,7 @@ NODEPS:=clean distclean
# Default settings
STD=-std=c99 -pedantic -DREDIS_STATIC=''
CXX_STD=-std=c++14 -pedantic
CXX_STD=-std=c++14 -pedantic -fno-rtti
ifneq (,$(findstring clang,$(CC)))
ifneq (,$(findstring FreeBSD,$(uname_S)))
STD+=-Wno-c11-extensions
@ -39,7 +39,7 @@ MALLOC=libc
ifneq ($(uname_M),armv6l)
ifneq ($(uname_M),armv7l)
ifeq ($(uname_S),Linux)
MALLOC=memkind
MALLOC=jemalloc
endif
endif
endif
@ -134,23 +134,27 @@ FINAL_CXXFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src
ifeq ($(MALLOC),tcmalloc)
FINAL_CFLAGS+= -DUSE_TCMALLOC
FINAL_CXXFLAGS+= -DUSE_TCMALLOC
FINAL_LIBS+= -ltcmalloc
endif
ifeq ($(MALLOC),tcmalloc_minimal)
FINAL_CFLAGS+= -DUSE_TCMALLOC
FINAL_CXXFLAGS+= -DUSE_TCMALLOC
FINAL_LIBS+= -ltcmalloc_minimal
endif
ifeq ($(MALLOC),jemalloc)
DEPENDENCY_TARGETS+= jemalloc
FINAL_CFLAGS+= -DUSE_JEMALLOC -I../deps/jemalloc/include
FINAL_CXXFLAGS+= -DUSE_JEMALLOC -I../deps/jemalloc/include
FINAL_LIBS := ../deps/jemalloc/lib/libjemalloc.a $(FINAL_LIBS)
endif
ifeq ($(MALLOC),memkind)
DEPENDENCY_TARGETS+= memkind
FINAL_CFLAGS+= -DUSE_MEMKIND -I../deps/memkind/src/include
FINAL_CXXFLAGS+= -DUSE_MEMKIND -I../deps/memkind/src/include
FINAL_LIBS := ../deps/memkind/src/.libs/libmemkind.a -lnuma $(FINAL_LIBS)
endif

470
src/acl.c
View File

@ -28,6 +28,7 @@
*/
#include "server.h"
#include <fcntl.h>
/* =============================================================================
* Global state for ACLs
@ -90,6 +91,7 @@ struct ACLUserFlag {
void ACLResetSubcommandsForCommand(user *u, unsigned long id);
void ACLResetSubcommands(user *u);
void ACLAddAllowedSubcommand(user *u, unsigned long id, const char *sub);
/* =============================================================================
* Helper functions for the rest of the ACL implementation
@ -163,6 +165,11 @@ void ACLListFreeSds(void *item) {
sdsfree(item);
}
/* Method to duplicate list elements from ACL users password/ptterns lists. */
void *ACLListDupSds(void *item) {
return sdsdup(item);
}
/* Create a new user with the specified name, store it in the list
* of users (the Users global radix tree), and returns a reference to
* the structure representing the user.
@ -178,13 +185,32 @@ user *ACLCreateUser(const char *name, size_t namelen) {
u->patterns = listCreate();
listSetMatchMethod(u->passwords,ACLListMatchSds);
listSetFreeMethod(u->passwords,ACLListFreeSds);
listSetDupMethod(u->passwords,ACLListDupSds);
listSetMatchMethod(u->patterns,ACLListMatchSds);
listSetFreeMethod(u->patterns,ACLListFreeSds);
listSetDupMethod(u->patterns,ACLListDupSds);
memset(u->allowed_commands,0,sizeof(u->allowed_commands));
raxInsert(Users,(unsigned char*)name,namelen,u,NULL);
return u;
}
/* This function should be called when we need an unlinked "fake" user
* we can use in order to validate ACL rules or for other similar reasons.
* The user will not get linked to the Users radix tree. The returned
* user should be released with ACLFreeUser() as usually. */
user *ACLCreateUnlinkedUser(void) {
char username[64];
for (int j = 0; ; j++) {
snprintf(username,sizeof(username),"__fakeuser:%d__",j);
user *fakeuser = ACLCreateUser(username,strlen(username));
if (fakeuser == NULL) continue;
int retval = raxRemove(Users,(unsigned char*) username,
strlen(username),NULL);
serverAssert(retval != 0);
return fakeuser;
}
}
/* Release the memory used by the user structure. Note that this function
* will not remove the user from the Users global radix tree. */
void ACLFreeUser(user *u) {
@ -195,6 +221,62 @@ void ACLFreeUser(user *u) {
zfree(u);
}
/* When a user is deleted we need to cycle the active
* connections in order to kill all the pending ones that
* are authenticated with such user. */
void ACLFreeUserAndKillClients(user *u) {
listIter li;
listNode *ln;
listRewind(server.clients,&li);
while ((ln = listNext(&li)) != NULL) {
client *c = listNodeValue(ln);
if (c->puser == u) {
/* We'll free the conenction asynchronously, so
* in theory to set a different user is not needed.
* However if there are bugs in Redis, soon or later
* this may result in some security hole: it's much
* more defensive to set the default user and put
* it in non authenticated mode. */
c->puser = DefaultUser;
c->authenticated = 0;
freeClientAsync(c);
}
}
ACLFreeUser(u);
}
/* Copy the user ACL rules from the source user 'src' to the destination
* user 'dst' so that at the end of the process they'll have exactly the
* same rules (but the names will continue to be the original ones). */
void ACLCopyUser(user *dst, user *src) {
listRelease(dst->passwords);
listRelease(dst->patterns);
dst->passwords = listDup(src->passwords);
dst->patterns = listDup(src->patterns);
memcpy(dst->allowed_commands,src->allowed_commands,
sizeof(dst->allowed_commands));
dst->flags = src->flags;
ACLResetSubcommands(dst);
/* Copy the allowed subcommands array of array of SDS strings. */
if (src->allowed_subcommands) {
for (int j = 0; j < USER_COMMAND_BITS_COUNT; j++) {
if (src->allowed_subcommands[j]) {
for (int i = 0; src->allowed_subcommands[j][i]; i++)
{
ACLAddAllowedSubcommand(dst, j,
src->allowed_subcommands[j][i]);
}
}
}
}
}
/* Free all the users registered in the radix tree 'users' and free the
* radix tree itself. */
void ACLFreeUsersSet(rax *users) {
raxFreeWithCallback(users,(void(*)(void*))ACLFreeUserAndKillClients);
}
/* Given a command ID, this function set by reference 'word' and 'bit'
* so that user->allowed_commands[word] will address the right word
* where the corresponding bit for the provided ID is stored, and
@ -256,6 +338,7 @@ int ACLSetUserCommandBitsForCategory(user *u, const char *category, int value) {
dictEntry *de;
while ((de = dictNext(di)) != NULL) {
struct redisCommand *cmd = dictGetVal(de);
if (cmd->flags & CMD_MODULE) continue; /* Ignore modules commands. */
if (cmd->flags & cflag) {
ACLSetUserCommandBit(u,cmd->id,value);
ACLResetSubcommandsForCommand(u,cmd->id);
@ -579,6 +662,7 @@ void ACLAddAllowedSubcommand(user *u, unsigned long id, const char *sub) {
* fully added.
* EEXIST: You are adding a key pattern after "*" was already added. This is
* almost surely an error on the user side.
* ENODEV: The password you are trying to remove from the user does not exist.
*/
int ACLSetUser(user *u, const char *op, ssize_t oplen) {
if (oplen == -1) oplen = strlen(op);
@ -623,8 +707,13 @@ int ACLSetUser(user *u, const char *op, ssize_t oplen) {
} else if (op[0] == '<') {
sds delpass = sdsnewlen(op+1,oplen-1);
listNode *ln = listSearchKey(u->passwords,delpass);
if (ln) listDelNode(u->passwords,ln);
sdsfree(delpass);
if (ln) {
listDelNode(u->passwords,ln);
} else {
errno = ENODEV;
return C_ERR;
}
} else if (op[0] == '~') {
if (u->flags & USER_FLAG_ALLKEYS) {
errno = EEXIST;
@ -728,6 +817,9 @@ char *ACLSetUserStringError(void) {
"'allkeys' flag) is not valid and does not have any "
"effect. Try 'resetkeys' to start with an empty "
"list of patterns";
else if (errno == ENODEV)
errmsg = "The password you are trying to remove from the user does "
"not exist";
return errmsg;
}
@ -741,10 +833,9 @@ sds ACLDefaultUserFirstPassword(void) {
return listNodeValue(first);
}
/* Initialization of the ACL subsystem. */
void ACLInit(void) {
Users = raxNew();
UsersToLoad = listCreate();
/* Initialize the default user, that will always exist for all the process
* lifetime. */
void ACLInitDefaultUser(void) {
DefaultUser = ACLCreateUser("default",7);
ACLSetUser(DefaultUser,"+@all",-1);
ACLSetUser(DefaultUser,"~*",-1);
@ -752,6 +843,13 @@ void ACLInit(void) {
ACLSetUser(DefaultUser,"nopass",-1);
}
/* Initialization of the ACL subsystem. */
void ACLInit(void) {
Users = raxNew();
UsersToLoad = listCreate();
ACLInitDefaultUser();
}
/* Check the username and password pair and return C_OK if they are valid,
* otherwise C_ERR is returned and errno is set to:
*
@ -944,11 +1042,7 @@ int ACLAppendUserForLoading(sds *argv, int argc, int *argc_err) {
/* Try to apply the user rules in a fake user to see if they
* are actually valid. */
char *funame = "__fakeuser__";
user *fakeuser = ACLCreateUser(funame,strlen(funame));
serverAssert(fakeuser != NULL);
int retval = raxRemove(Users,(unsigned char*) funame,strlen(funame),NULL);
serverAssert(retval != 0);
user *fakeuser = ACLCreateUnlinkedUser();
for (int j = 2; j < argc; j++) {
if (ACLSetUser(fakeuser,argv[j],sdslen(argv[j])) == C_ERR) {
@ -1009,15 +1103,275 @@ int ACLLoadConfiguredUsers(void) {
return C_OK;
}
/* This function loads the ACL from the specified filename: every line
* is validated and shold be either empty or in the format used to specify
* users in the redis.conf configuration or in the ACL file, that is:
*
* user <username> ... rules ...
*
* Note that this function considers comments starting with '#' as errors
* because the ACL file is meant to be rewritten, and comments would be
* lost after the rewrite. Yet empty lines are allowed to avoid being too
* strict.
*
* One important part of implementing ACL LOAD, that uses this function, is
* to avoid ending with broken rules if the ACL file is invalid for some
* reason, so the function will attempt to validate the rules before loading
* each user. For every line that will be found broken the function will
* collect an error message.
*
* IMPORTANT: If there is at least a single error, nothing will be loaded
* and the rules will remain exactly as they were.
*
* At the end of the process, if no errors were found in the whole file then
* NULL is returned. Otherwise an SDS string describing in a single line
* a description of all the issues found is returned. */
sds ACLLoadFromFile(const char *filename) {
FILE *fp;
char buf[1024];
/* Open the ACL file. */
if ((fp = fopen(filename,"r")) == NULL) {
sds errors = sdscatprintf(sdsempty(),
"Error loading ACLs, opening file '%s': %s",
filename, strerror(errno));
return errors;
}
/* Load the whole file as a single string in memory. */
sds acls = sdsempty();
while(fgets(buf,sizeof(buf),fp) != NULL)
acls = sdscat(acls,buf);
fclose(fp);
/* Split the file into lines and attempt to load each line. */
int totlines;
sds *lines, errors = sdsempty();
lines = sdssplitlen(acls,strlen(acls),"\n",1,&totlines);
sdsfree(acls);
/* We need a fake user to validate the rules before making changes
* to the real user mentioned in the ACL line. */
user *fakeuser = ACLCreateUnlinkedUser();
/* We do all the loading in a fresh insteance of the Users radix tree,
* so if there are errors loading the ACL file we can rollback to the
* old version. */
rax *old_users = Users;
user *old_default_user = DefaultUser;
Users = raxNew();
ACLInitDefaultUser();
/* Load each line of the file. */
for (int i = 0; i < totlines; i++) {
sds *argv;
int argc;
int linenum = i+1;
lines[i] = sdstrim(lines[i]," \t\r\n");
/* Skip blank lines */
if (lines[i][0] == '\0') continue;
/* Split into arguments */
argv = sdssplitargs(lines[i],&argc);
if (argv == NULL) {
errors = sdscatprintf(errors,
"%s:%d: unbalanced quotes in acl line. ",
server.acl_filename, linenum);
continue;
}
/* Skip this line if the resulting command vector is empty. */
if (argc == 0) {
sdsfreesplitres(argv,argc);
continue;
}
/* The line should start with the "user" keyword. */
if (strcmp(argv[0],"user") || argc < 2) {
errors = sdscatprintf(errors,
"%s:%d should start with user keyword followed "
"by the username. ", server.acl_filename,
linenum);
sdsfreesplitres(argv,argc);
continue;
}
/* Try to process the line using the fake user to validate iif
* the rules are able to apply cleanly. */
ACLSetUser(fakeuser,"reset",-1);
int j;
for (j = 2; j < argc; j++) {
if (ACLSetUser(fakeuser,argv[j],sdslen(argv[j])) != C_OK) {
char *errmsg = ACLSetUserStringError();
errors = sdscatprintf(errors,
"%s:%d: %s. ",
server.acl_filename, linenum, errmsg);
continue;
}
}
/* Apply the rule to the new users set only if so far there
* are no errors, otherwise it's useless since we are going
* to discard the new users set anyway. */
if (sdslen(errors) != 0) {
sdsfreesplitres(argv,argc);
continue;
}
/* We can finally lookup the user and apply the rule. If the
* user already exists we always reset it to start. */
user *u = ACLCreateUser(argv[1],sdslen(argv[1]));
if (!u) {
u = ACLGetUserByName(argv[1],sdslen(argv[1]));
serverAssert(u != NULL);
ACLSetUser(u,"reset",-1);
}
/* Note that the same rules already applied to the fake user, so
* we just assert that everything goess well: it should. */
for (j = 2; j < argc; j++)
serverAssert(ACLSetUser(u,argv[j],sdslen(argv[j])) == C_OK);
sdsfreesplitres(argv,argc);
}
ACLFreeUser(fakeuser);
sdsfreesplitres(lines,totlines);
DefaultUser = old_default_user; /* This pointer must never change. */
/* Check if we found errors and react accordingly. */
if (sdslen(errors) == 0) {
/* The default user pointer is referenced in different places: instead
* of replacing such occurrences it is much simpler to copy the new
* default user configuration in the old one. */
user *new = ACLGetUserByName("default",7);
serverAssert(new != NULL);
ACLCopyUser(DefaultUser,new);
ACLFreeUser(new);
raxInsert(Users,(unsigned char*)"default",7,DefaultUser,NULL);
raxRemove(old_users,(unsigned char*)"default",7,NULL);
ACLFreeUsersSet(old_users);
sdsfree(errors);
return NULL;
} else {
ACLFreeUsersSet(Users);
Users = old_users;
errors = sdscat(errors,"WARNING: ACL errors detected, no change to the previously active ACL rules was performed");
return errors;
}
}
/* Generate a copy of the ACLs currently in memory in the specified filename.
* Returns C_OK on success or C_ERR if there was an error during the I/O.
* When C_ERR is returned a log is produced with hints about the issue. */
int ACLSaveToFile(const char *filename) {
sds acl = sdsempty();
int fd = -1;
sds tmpfilename = NULL;
int retval = C_ERR;
/* Let's generate an SDS string containing the new version of the
* ACL file. */
raxIterator ri;
raxStart(&ri,Users);
raxSeek(&ri,"^",NULL,0);
while(raxNext(&ri)) {
user *u = ri.data;
/* Return information in the configuration file format. */
sds user = sdsnew("user ");
user = sdscatsds(user,u->name);
user = sdscatlen(user," ",1);
sds descr = ACLDescribeUser(u);
user = sdscatsds(user,descr);
sdsfree(descr);
acl = sdscatsds(acl,user);
acl = sdscatlen(acl,"\n",1);
sdsfree(user);
}
raxStop(&ri);
/* Create a temp file with the new content. */
tmpfilename = sdsnew(filename);
tmpfilename = sdscatfmt(tmpfilename,".tmp-%i-%I",
(int)getpid(),(int)mstime());
if ((fd = open(tmpfilename,O_WRONLY|O_CREAT,0644)) == -1) {
serverLog(LL_WARNING,"Opening temp ACL file for ACL SAVE: %s",
strerror(errno));
goto cleanup;
}
/* Write it. */
if (write(fd,acl,sdslen(acl)) != (ssize_t)sdslen(acl)) {
serverLog(LL_WARNING,"Writing ACL file for ACL SAVE: %s",
strerror(errno));
goto cleanup;
}
close(fd); fd = -1;
/* Let's replace the new file with the old one. */
if (rename(tmpfilename,filename) == -1) {
serverLog(LL_WARNING,"Renaming ACL file for ACL SAVE: %s",
strerror(errno));
goto cleanup;
}
sdsfree(tmpfilename); tmpfilename = NULL;
retval = C_OK; /* If we reached this point, everything is fine. */
cleanup:
if (fd != -1) close(fd);
if (tmpfilename) unlink(tmpfilename);
sdsfree(tmpfilename);
sdsfree(acl);
return retval;
}
/* This function is called once the server is already running, modules are
* loaded, and we are ready to start, in order to load the ACLs either from
* the pending list of users defined in redis.conf, or from the ACL file.
* The function will just exit with an error if the user is trying to mix
* both the loading methods. */
void ACLLoadUsersAtStartup(void) {
if (server.acl_filename[0] != '\0' && listLength(UsersToLoad) != 0) {
serverLog(LL_WARNING,
"Configuring Redis with users defined in redis.conf and at "
"the same setting an ACL file path is invalid. This setup "
"is very likely to lead to configuration errors and security "
"holes, please define either an ACL file or declare users "
"directly in your redis.conf, but not both.");
exit(1);
}
if (ACLLoadConfiguredUsers() == C_ERR) {
serverLog(LL_WARNING,
"Critical error while loading ACLs. Exiting.");
exit(1);
}
if (server.acl_filename[0] != '\0') {
sds errors = ACLLoadFromFile(server.acl_filename);
if (errors) {
serverLog(LL_WARNING,
"Aborting Redis startup because of ACL errors: %s", errors);
sdsfree(errors);
exit(1);
}
}
}
/* =============================================================================
* ACL related commands
* ==========================================================================*/
/* ACL -- show and modify the configuration of ACL users.
* ACL HELP
* ACL LOAD
* ACL LIST
* ACL SETUSER <username> ... user attribs ...
* ACL DELUSER <username>
* ACL USERS
* ACL CAT [<category>]
* ACL SETUSER <username> ... acl rules ...
* ACL DELUSER <username> [...]
* ACL GETUSER <username>
*/
void aclCommand(client *c) {
@ -1045,32 +1399,16 @@ void aclCommand(client *c) {
addReplyError(c,"The 'default' user cannot be removed");
return;
}
}
for (int j = 2; j < c->argc; j++) {
sds username = ptrFromObj(c->argv[j]);
user *u;
if (raxRemove(Users,(unsigned char*)username,
sdslen(username),
(void**)&u))
{
/* When a user is deleted we need to cycle the active
* connections in order to kill all the pending ones that
* are authenticated with such user. */
ACLFreeUser(u);
listIter li;
listNode *ln;
listRewind(server.clients,&li);
while ((ln = listNext(&li)) != NULL) {
client *c = listNodeValue(ln);
if (c->puser == u) {
/* We'll free the conenction asynchronously, so
* in theory to set a different user is not needed.
* However if there are bugs in Redis, soon or later
* this may result in some security hole: it's much
* more defensive to set the default user and put
* it in non authenticated mode. */
c->puser = DefaultUser;
c->authenticated = 0;
freeClientAsync(c);
}
}
ACLFreeUserAndKillClients(u);
deleted++;
}
}
@ -1151,19 +1489,69 @@ void aclCommand(client *c) {
}
}
raxStop(&ri);
} else if (!strcasecmp(sub,"whoami")) {
} else if (!strcasecmp(sub,"whoami") && c->argc == 2) {
if (c->puser != NULL) {
addReplyBulkCBuffer(c,c->puser->name,sdslen(c->puser->name));
} else {
addReplyNull(c);
}
} else if (server.acl_filename[0] == '\0' &&
(!strcasecmp(sub,"load") || !strcasecmp(sub,"save")))
{
addReplyError(c,"This Redis instance is not configured to use an ACL file. You may want to specify users via the ACL SETUSER command and then issue a CONFIG REWRITE (assuming you have a Redis configuration file set) in order to store users in the Redis configuration.");
return;
} else if (!strcasecmp(sub,"load") && c->argc == 2) {
sds errors = ACLLoadFromFile(server.acl_filename);
if (errors == NULL) {
addReply(c,shared.ok);
} else {
addReplyError(c,errors);
sdsfree(errors);
}
} else if (!strcasecmp(sub,"save") && c->argc == 2) {
if (ACLSaveToFile(server.acl_filename) == C_OK) {
addReply(c,shared.ok);
} else {
addReplyError(c,"There was an error trying to save the ACLs. "
"Please check the server logs for more "
"information");
}
} else if (!strcasecmp(sub,"cat") && c->argc == 2) {
void *dl = addReplyDeferredLen(c);
int j;
for (j = 0; ACLCommandCategories[j].flag != 0; j++)
addReplyBulkCString(c,ACLCommandCategories[j].name);
setDeferredArrayLen(c,dl,j);
} else if (!strcasecmp(sub,"cat") && c->argc == 3) {
uint64_t cflag = ACLGetCommandCategoryFlagByName(ptrFromObj(c->argv[2]));
if (cflag == 0) {
addReplyErrorFormat(c, "Unknown category '%s'", (char*)ptrFromObj(c->argv[2]));
return;
}
int arraylen = 0;
void *dl = addReplyDeferredLen(c);
dictIterator *di = dictGetIterator(server.orig_commands);
dictEntry *de;
while ((de = dictNext(di)) != NULL) {
struct redisCommand *cmd = dictGetVal(de);
if (cmd->flags & CMD_MODULE) continue;
if (cmd->flags & cflag) {
addReplyBulkCString(c,cmd->name);
arraylen++;
}
}
dictReleaseIterator(di);
setDeferredArrayLen(c,dl,arraylen);
} else if (!strcasecmp(sub,"help")) {
const char *help[] = {
"LOAD -- Reload users from the ACL file.",
"LIST -- Show user details in config file format.",
"USERS -- List all the registered usernames.",
"SETUSER <username> [attribs ...] -- Create or modify a user.",
"GETUSER <username> -- Get the user details.",
"DELUSER <username> -- Delete a user.",
"DELUSER <username> [...] -- Delete a list of users.",
"CAT -- List available categories.",
"CAT <category> -- List commands inside category.",
"WHOAMI -- Return the current connection username.",
NULL
};
@ -1172,3 +1560,15 @@ NULL
addReplySubcommandSyntaxError(c);
}
}
void addReplyCommandCategories(client *c, struct redisCommand *cmd) {
int flagcount = 0;
void *flaglen = addReplyDeferredLen(c);
for (int j = 0; ACLCommandCategories[j].flag != 0; j++) {
if (cmd->flags & ACLCommandCategories[j].flag) {
addReplyStatusFormat(c, "@%s", ACLCommandCategories[j].name);
flagcount++;
}
}
setDeferredSetLen(c, flaglen, flagcount);
}

View File

@ -31,6 +31,10 @@
#ifndef __ADLIST_H__
#define __ADLIST_H__
#ifdef __cplusplus
extern "C" {
#endif
/* Node, List, and Iterator are the only data structures used currently. */
typedef struct listNode {
@ -92,4 +96,8 @@ void listJoin(list *l, list *o);
#define AL_START_HEAD 0
#define AL_START_TAIL 1
#ifdef __cplusplus
}
#endif
#endif /* __ADLIST_H__ */

View File

@ -30,7 +30,11 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <condition_variable>
#include <atomic>
#include <mutex>
#include <stdio.h>
#include <fcntl.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
@ -41,11 +45,49 @@
#include <errno.h>
#include "ae.h"
#include "fastlock.h"
extern "C" {
#include "zmalloc.h"
#include "config.h"
}
#ifdef USE_MUTEX
thread_local int cOwnLock = 0;
class mutex_wrapper
{
std::recursive_mutex m_mutex;
public:
void lock() {
m_mutex.lock();
cOwnLock++;
}
void unlock() {
cOwnLock--;
m_mutex.unlock();
}
bool try_lock() {
if (m_mutex.try_lock()) {
cOwnLock++;
return true;
}
return false;
}
bool fOwnLock() {
return cOwnLock > 0;
}
};
mutex_wrapper g_lock;
#else
fastlock g_lock;
#endif
thread_local aeEventLoop *g_eventLoopThisThread = NULL;
#define AE_ASSERT(x) if (!(x)) do { fprintf(stderr, "AE_ASSER FAILURE\n"); *((volatile int*)0) = 1; } while(0)
/* Include the best multiplexing layer supported by this system.
* The following should be ordered by performances, descending. */
#ifdef HAVE_EVPORT
@ -62,6 +104,178 @@ extern "C" {
#endif
#endif
enum class AE_ASYNC_OP
{
PostFunction,
PostCppFunction,
DeleteFileEvent,
CreateFileEvent,
};
struct aeCommandControl
{
std::condition_variable cv;
std::atomic<int> rval;
std::mutex mutexcv;
};
struct aeCommand
{
AE_ASYNC_OP op;
int fd;
int mask;
union {
aePostFunctionProc *proc;
aeFileProc *fproc;
std::function<void()> *pfn;
};
void *clientData;
aeCommandControl *pctl;
};
void aeProcessCmd(aeEventLoop *eventLoop, int fd, void *, int )
{
aeCommand cmd;
for (;;)
{
auto cb = read(fd, &cmd, sizeof(aeCommand));
if (cb != sizeof(cmd))
{
AE_ASSERT(errno == EAGAIN);
break;
}
switch (cmd.op)
{
case AE_ASYNC_OP::DeleteFileEvent:
aeDeleteFileEvent(eventLoop, cmd.fd, cmd.mask);
break;
case AE_ASYNC_OP::CreateFileEvent:
{
if (cmd.pctl != nullptr)
{
cmd.pctl->mutexcv.lock();
std::atomic_store(&cmd.pctl->rval, aeCreateFileEvent(eventLoop, cmd.fd, cmd.mask, cmd.fproc, cmd.clientData));
cmd.pctl->cv.notify_all();
cmd.pctl->mutexcv.unlock();
}
else
{
aeCreateFileEvent(eventLoop, cmd.fd, cmd.mask, cmd.fproc, cmd.clientData);
}
}
break;
case AE_ASYNC_OP::PostFunction:
{
std::unique_lock<decltype(g_lock)> ulock(g_lock);
((aePostFunctionProc*)cmd.proc)(cmd.clientData);
break;
}
case AE_ASYNC_OP::PostCppFunction:
{
if (cmd.pctl != nullptr)
cmd.pctl->mutexcv.lock();
std::unique_lock<decltype(g_lock)> ulock(g_lock);
(*cmd.pfn)();
if (cmd.pctl != nullptr)
{
cmd.pctl->cv.notify_all();
cmd.pctl->mutexcv.unlock();
}
delete cmd.pfn;
}
break;
}
}
}
int aeCreateRemoteFileEvent(aeEventLoop *eventLoop, int fd, int mask,
aeFileProc *proc, void *clientData, int fSynchronous)
{
if (eventLoop == g_eventLoopThisThread)
return aeCreateFileEvent(eventLoop, fd, mask, proc, clientData);
int ret = AE_OK;
aeCommand cmd;
cmd.op = AE_ASYNC_OP::CreateFileEvent;
cmd.fd = fd;
cmd.mask = mask;
cmd.fproc = proc;
cmd.clientData = clientData;
cmd.pctl = nullptr;
if (fSynchronous)
cmd.pctl = new aeCommandControl();
std::unique_lock<std::mutex> ulock(cmd.pctl->mutexcv, std::defer_lock);
if (fSynchronous)
cmd.pctl->mutexcv.lock();
auto size = write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd));
if (size != sizeof(cmd))
{
AE_ASSERT(errno == EAGAIN);
ret = AE_ERR;
}
if (fSynchronous)
{
cmd.pctl->cv.wait(ulock);
ret = cmd.pctl->rval;
delete cmd.pctl;
}
return ret;
}
int aePostFunction(aeEventLoop *eventLoop, aePostFunctionProc *proc, void *arg)
{
if (eventLoop == g_eventLoopThisThread)
{
proc(arg);
return AE_OK;
}
aeCommand cmd;
cmd.op = AE_ASYNC_OP::PostFunction;
cmd.proc = proc;
cmd.clientData = arg;
auto size = write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd));
AE_ASSERT(size == sizeof(cmd));
return AE_OK;
}
int aePostFunction(aeEventLoop *eventLoop, std::function<void()> fn, bool fSynchronous)
{
if (eventLoop == g_eventLoopThisThread)
{
fn();
return AE_OK;
}
aeCommand cmd;
cmd.op = AE_ASYNC_OP::PostCppFunction;
cmd.pfn = new std::function<void()>(fn);
cmd.pctl = nullptr;
if (fSynchronous)
cmd.pctl = new aeCommandControl();
std::unique_lock<std::mutex> ulock(cmd.pctl->mutexcv, std::defer_lock);
if (fSynchronous)
cmd.pctl->mutexcv.lock();
auto size = write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd));
AE_ASSERT(size == sizeof(cmd));
int ret = AE_OK;
if (fSynchronous)
{
cmd.pctl->cv.wait(ulock);
ret = cmd.pctl->rval;
delete cmd.pctl;
}
return ret;
}
aeEventLoop *aeCreateEventLoop(int setsize) {
aeEventLoop *eventLoop;
int i;
@ -83,6 +297,18 @@ aeEventLoop *aeCreateEventLoop(int setsize) {
* vector with it. */
for (i = 0; i < setsize; i++)
eventLoop->events[i].mask = AE_NONE;
fastlock_init(&eventLoop->flock);
int rgfd[2];
if (pipe(rgfd) < 0)
goto err;
eventLoop->fdCmdRead = rgfd[0];
eventLoop->fdCmdWrite = rgfd[1];
fcntl(eventLoop->fdCmdWrite, F_SETFL, O_NONBLOCK);
fcntl(eventLoop->fdCmdRead, F_SETFL, O_NONBLOCK);
eventLoop->cevents = 0;
aeCreateFileEvent(eventLoop, eventLoop->fdCmdRead, AE_READABLE|AE_READ_THREADSAFE, aeProcessCmd, NULL);
return eventLoop;
err:
@ -107,6 +333,7 @@ int aeGetSetSize(aeEventLoop *eventLoop) {
*
* Otherwise AE_OK is returned and the operation is successful. */
int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) {
AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop);
int i;
if (setsize == eventLoop->setsize) return AE_OK;
@ -129,19 +356,25 @@ extern "C" void aeDeleteEventLoop(aeEventLoop *eventLoop) {
zfree(eventLoop->events);
zfree(eventLoop->fired);
zfree(eventLoop);
fastlock_free(&eventLoop->flock);
close(eventLoop->fdCmdRead);
close(eventLoop->fdCmdWrite);
}
extern "C" void aeStop(aeEventLoop *eventLoop) {
AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop);
eventLoop->stop = 1;
}
extern "C" int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
aeFileProc *proc, void *clientData)
{
AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop);
if (fd >= eventLoop->setsize) {
errno = ERANGE;
return AE_ERR;
}
aeFileEvent *fe = &eventLoop->events[fd];
if (aeApiAddEvent(eventLoop, fd, mask) == -1)
@ -155,8 +388,21 @@ extern "C" int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
return AE_OK;
}
void aeDeleteFileEventAsync(aeEventLoop *eventLoop, int fd, int mask)
{
if (eventLoop == g_eventLoopThisThread)
return aeDeleteFileEvent(eventLoop, fd, mask);
aeCommand cmd;
cmd.op = AE_ASYNC_OP::DeleteFileEvent;
cmd.fd = fd;
cmd.mask = mask;
auto cb = write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd));
AE_ASSERT(cb == sizeof(cmd));
}
extern "C" void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask)
{
AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop);
if (fd >= eventLoop->setsize) return;
aeFileEvent *fe = &eventLoop->events[fd];
if (fe->mask == AE_NONE) return;
@ -165,6 +411,9 @@ extern "C" void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask)
* is removed. */
if (mask & AE_WRITABLE) mask |= AE_BARRIER;
if (mask & AE_WRITABLE) mask |= AE_WRITE_THREADSAFE;
if (mask & AE_READABLE) mask |= AE_READ_THREADSAFE;
aeApiDelEvent(eventLoop, fd, mask);
fe->mask = fe->mask & (~mask);
if (fd == eventLoop->maxfd && fe->mask == AE_NONE) {
@ -211,6 +460,7 @@ extern "C" long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long millise
aeTimeProc *proc, void *clientData,
aeEventFinalizerProc *finalizerProc)
{
AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop);
long long id = eventLoop->timeEventNextId++;
aeTimeEvent *te;
@ -231,6 +481,7 @@ extern "C" long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long millise
extern "C" int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id)
{
AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop);
aeTimeEvent *te = eventLoop->timeEventHead;
while(te) {
if (te->id == id) {
@ -255,6 +506,7 @@ extern "C" int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id)
*/
static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop)
{
AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop);
aeTimeEvent *te = eventLoop->timeEventHead;
aeTimeEvent *nearest = NULL;
@ -270,6 +522,7 @@ static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop)
/* Process time events */
static int processTimeEvents(aeEventLoop *eventLoop) {
std::unique_lock<decltype(g_lock)> ulock(g_lock);
int processed = 0;
aeTimeEvent *te;
long long maxId;
@ -343,6 +596,62 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
return processed;
}
extern "C" void ProcessEventCore(aeEventLoop *eventLoop, aeFileEvent *fe, int mask, int fd)
{
#define LOCK_IF_NECESSARY(fe, tsmask) \
std::unique_lock<decltype(g_lock)> ulock(g_lock, std::defer_lock); \
if (!(fe->mask & tsmask)) \
ulock.lock()
int fired = 0; /* Number of events fired for current fd. */
/* Normally we execute the readable event first, and the writable
* event laster. This is useful as sometimes we may be able
* to serve the reply of a query immediately after processing the
* query.
*
* However if AE_BARRIER is set in the mask, our application is
* asking us to do the reverse: never fire the writable event
* after the readable. In such a case, we invert the calls.
* This is useful when, for instance, we want to do things
* in the beforeSleep() hook, like fsynching a file to disk,
* before replying to a client. */
int invert = fe->mask & AE_BARRIER;
/* Note the "fe->mask & mask & ..." code: maybe an already
* processed event removed an element that fired and we still
* didn't processed, so we check if the event is still valid.
*
* Fire the readable event if the call sequence is not
* inverted. */
if (!invert && fe->mask & mask & AE_READABLE) {
LOCK_IF_NECESSARY(fe, AE_READ_THREADSAFE);
fe->rfileProc(eventLoop,fd,fe->clientData,mask | (fe->mask & AE_READ_THREADSAFE));
fired++;
}
/* Fire the writable event. */
if (fe->mask & mask & AE_WRITABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
LOCK_IF_NECESSARY(fe, AE_WRITE_THREADSAFE);
fe->wfileProc(eventLoop,fd,fe->clientData,mask | (fe->mask & AE_WRITE_THREADSAFE));
fired++;
}
}
/* If we have to invert the call, fire the readable event now
* after the writable one. */
if (invert && fe->mask & mask & AE_READABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
LOCK_IF_NECESSARY(fe, AE_READ_THREADSAFE);
fe->rfileProc(eventLoop,fd,fe->clientData,mask | (fe->mask & AE_READ_THREADSAFE));
fired++;
}
}
#undef LOCK_IF_NECESSARY
}
/* Process every pending time event, then every pending file event
* (that may be registered by time event callbacks just processed).
* Without special flags the function sleeps until some file event
@ -359,6 +668,7 @@ static int processTimeEvents(aeEventLoop *eventLoop) {
* The function returns the number of events processed. */
int aeProcessEvents(aeEventLoop *eventLoop, int flags)
{
AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop);
int processed = 0, numevents;
/* Nothing to do? return ASAP */
@ -413,55 +723,19 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
numevents = aeApiPoll(eventLoop, tvp);
/* After sleep callback. */
if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP)
if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP) {
std::unique_lock<decltype(g_lock)> ulock(g_lock, std::defer_lock);
if (!(eventLoop->beforesleepFlags & AE_SLEEP_THREADSAFE))
ulock.lock();
eventLoop->aftersleep(eventLoop);
}
for (j = 0; j < numevents; j++) {
aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];
int mask = eventLoop->fired[j].mask;
int fd = eventLoop->fired[j].fd;
int fired = 0; /* Number of events fired for current fd. */
/* Normally we execute the readable event first, and the writable
* event laster. This is useful as sometimes we may be able
* to serve the reply of a query immediately after processing the
* query.
*
* However if AE_BARRIER is set in the mask, our application is
* asking us to do the reverse: never fire the writable event
* after the readable. In such a case, we invert the calls.
* This is useful when, for instance, we want to do things
* in the beforeSleep() hook, like fsynching a file to disk,
* before replying to a client. */
int invert = fe->mask & AE_BARRIER;
/* Note the "fe->mask & mask & ..." code: maybe an already
* processed event removed an element that fired and we still
* didn't processed, so we check if the event is still valid.
*
* Fire the readable event if the call sequence is not
* inverted. */
if (!invert && fe->mask & mask & AE_READABLE) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
/* Fire the writable event. */
if (fe->mask & mask & AE_WRITABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
fe->wfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
}
/* If we have to invert the call, fire the readable event now
* after the writable one. */
if (invert && fe->mask & mask & AE_READABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
}
ProcessEventCore(eventLoop, fe, mask, fd);
processed++;
}
@ -470,6 +744,7 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags)
if (flags & AE_TIME_EVENTS)
processed += processTimeEvents(eventLoop);
eventLoop->cevents += processed;
return processed; /* return the number of processed file/time events */
}
@ -497,10 +772,17 @@ int aeWait(int fd, int mask, long long milliseconds) {
void aeMain(aeEventLoop *eventLoop) {
eventLoop->stop = 0;
g_eventLoopThisThread = eventLoop;
while (!eventLoop->stop) {
if (eventLoop->beforesleep != NULL)
if (eventLoop->beforesleep != NULL) {
std::unique_lock<decltype(g_lock)> ulock(g_lock, std::defer_lock);
if (!(eventLoop->beforesleepFlags & AE_SLEEP_THREADSAFE))
ulock.lock();
eventLoop->beforesleep(eventLoop);
}
AE_ASSERT(!aeThreadOwnsLock()); // we should have relinquished it after processing
aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP);
AE_ASSERT(!aeThreadOwnsLock()); // we should have relinquished it after processing
}
}
@ -508,10 +790,32 @@ const char *aeGetApiName(void) {
return aeApiName();
}
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) {
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep, int flags) {
eventLoop->beforesleep = beforesleep;
eventLoop->beforesleepFlags = flags;
}
void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep) {
void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep, int flags) {
eventLoop->aftersleep = aftersleep;
eventLoop->aftersleepFlags = flags;
}
void aeAcquireLock()
{
g_lock.lock();
}
int aeTryAcquireLock()
{
return g_lock.try_lock();
}
void aeReleaseLock()
{
g_lock.unlock();
}
int aeThreadOwnsLock()
{
return g_lock.fOwnLock();
}

View File

@ -33,7 +33,11 @@
#ifndef __AE_H__
#define __AE_H__
#ifdef __cplusplus
#include <functional>
#endif
#include <time.h>
#include "fastlock.h"
#ifdef __cplusplus
extern "C" {
@ -50,7 +54,9 @@ extern "C" {
loop iteration. Useful when you want to persist
things to disk before sending replies, and want
to do that in a group fashion. */
#define AE_THREADSAFE 8 /* Ok to run concurrently */
#define AE_READ_THREADSAFE 8
#define AE_WRITE_THREADSAFE 16
#define AE_SLEEP_THREADSAFE 32
#define AE_FILE_EVENTS 1
#define AE_TIME_EVENTS 2
@ -71,6 +77,7 @@ typedef void aeFileProc(struct aeEventLoop *eventLoop, int fd, void *clientData,
typedef int aeTimeProc(struct aeEventLoop *eventLoop, long long id, void *clientData);
typedef void aeEventFinalizerProc(struct aeEventLoop *eventLoop, void *clientData);
typedef void aeBeforeSleepProc(struct aeEventLoop *eventLoop);
typedef void aePostFunctionProc(void *pvArgs);
/* File event structure */
typedef struct aeFileEvent {
@ -110,16 +117,33 @@ typedef struct aeEventLoop {
int stop;
void *apidata; /* This is used for polling API specific data */
aeBeforeSleepProc *beforesleep;
int beforesleepFlags;
aeBeforeSleepProc *aftersleep;
int aftersleepFlags;
struct fastlock flock;
int fdCmdWrite;
int fdCmdRead;
int cevents;
} aeEventLoop;
/* Prototypes */
aeEventLoop *aeCreateEventLoop(int setsize);
int aePostFunction(aeEventLoop *eventLoop, aePostFunctionProc *proc, void *arg);
#ifdef __cplusplus
} // EXTERN C
int aePostFunction(aeEventLoop *eventLoop, std::function<void()> fn, bool fSynchronous = false);
extern "C" {
#endif
void aeDeleteEventLoop(aeEventLoop *eventLoop);
void aeStop(aeEventLoop *eventLoop);
int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
aeFileProc *proc, void *clientData);
int aeCreateRemoteFileEvent(aeEventLoop *eventLoop, int fd, int mask,
aeFileProc *proc, void *clientData, int fSynchronous);
void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask);
void aeDeleteFileEventAsync(aeEventLoop *eventLoop, int fd, int mask);
int aeGetFileEvents(aeEventLoop *eventLoop, int fd);
long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
aeTimeProc *proc, void *clientData,
@ -129,11 +153,16 @@ int aeProcessEvents(aeEventLoop *eventLoop, int flags);
int aeWait(int fd, int mask, long long milliseconds);
void aeMain(aeEventLoop *eventLoop);
const char *aeGetApiName(void);
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep);
void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep);
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep, int flags);
void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep, int flags);
int aeGetSetSize(aeEventLoop *eventLoop);
int aeResizeSetSize(aeEventLoop *eventLoop, int setsize);
void aeAcquireLock();
int aeTryAcquireLock();
void aeReleaseLock();
int aeThreadOwnsLock();
#ifdef __cplusplus
}
#endif

View File

@ -83,7 +83,11 @@ static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
if (mask & AE_READABLE) ee.events |= EPOLLIN;
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
ee.data.fd = fd;
if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1;
if (epoll_ctl(state->epfd,op,fd,&ee) == -1)
{
perror("epoll_ctl failed");
return -1;
}
return 0;
}

View File

@ -246,6 +246,16 @@ static int anetSetReuseAddr(char *err, int fd) {
return ANET_OK;
}
static int anetSetReusePort(char *err, int fd) {
int yes = 1;
/* Let us load balance listen()s from multiple threads */
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &yes, sizeof(yes)) == -1) {
anetSetError(err, "setsockopt SO_REUSEPORT: %s", strerror(errno));
return ANET_ERR;
}
return ANET_OK;
}
static int anetCreateSocket(char *err, int domain) {
int s;
if ((s = socket(domain, SOCK_STREAM, 0)) == -1) {
@ -265,6 +275,7 @@ static int anetCreateSocket(char *err, int domain) {
#define ANET_CONNECT_NONE 0
#define ANET_CONNECT_NONBLOCK 1
#define ANET_CONNECT_BE_BINDING 2 /* Best effort binding. */
#define ANET_CONNECT_REUSEPORT 4
static int anetTcpGenericConnect(char *err, char *addr, int port,
char *source_addr, int flags)
{
@ -287,7 +298,10 @@ static int anetTcpGenericConnect(char *err, char *addr, int port,
* the next entry in servinfo. */
if ((s = socket(p->ai_family,p->ai_socktype,p->ai_protocol)) == -1)
continue;
if (anetSetReuseAddr(err,s) == ANET_ERR) goto error;
if (anetSetReuseAddr(err,s) == ANET_ERR)
goto error;
if (flags & ANET_CONNECT_REUSEPORT && anetSetReusePort(err, s) != ANET_OK)
goto error;
if (flags & ANET_CONNECT_NONBLOCK && anetNonBlock(err,s) != ANET_OK)
goto error;
if (source_addr) {
@ -462,7 +476,7 @@ static int anetV6Only(char *err, int s) {
return ANET_OK;
}
static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backlog)
static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backlog, int fReusePort)
{
int s = -1, rv;
char _port[6]; /* strlen("65535") */
@ -484,6 +498,7 @@ static int _anetTcpServer(char *err, int port, char *bindaddr, int af, int backl
if (af == AF_INET6 && anetV6Only(err,s) == ANET_ERR) goto error;
if (anetSetReuseAddr(err,s) == ANET_ERR) goto error;
if (fReusePort && anetSetReusePort(err,s) == ANET_ERR) goto error;
if (anetListen(err,s,p->ai_addr,p->ai_addrlen,backlog) == ANET_ERR) s = ANET_ERR;
goto end;
}
@ -500,14 +515,14 @@ end:
return s;
}
int anetTcpServer(char *err, int port, char *bindaddr, int backlog)
int anetTcpServer(char *err, int port, char *bindaddr, int backlog, int fReusePort)
{
return _anetTcpServer(err, port, bindaddr, AF_INET, backlog);
return _anetTcpServer(err, port, bindaddr, AF_INET, backlog, fReusePort);
}
int anetTcp6Server(char *err, int port, char *bindaddr, int backlog)
int anetTcp6Server(char *err, int port, char *bindaddr, int backlog, int fReusePort)
{
return _anetTcpServer(err, port, bindaddr, AF_INET6, backlog);
return _anetTcpServer(err, port, bindaddr, AF_INET6, backlog, fReusePort);
}
int anetUnixServer(char *err, char *path, mode_t perm, int backlog)

View File

@ -33,6 +33,10 @@
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ANET_OK 0
#define ANET_ERR -1
#define ANET_ERR_LEN 256
@ -58,8 +62,8 @@ int anetUnixNonBlockConnect(char *err, char *path);
int anetRead(int fd, char *buf, int count);
int anetResolve(char *err, char *host, char *ipbuf, size_t ipbuf_len);
int anetResolveIP(char *err, char *host, char *ipbuf, size_t ipbuf_len);
int anetTcpServer(char *err, int port, char *bindaddr, int backlog);
int anetTcp6Server(char *err, int port, char *bindaddr, int backlog);
int anetTcpServer(char *err, int port, char *bindaddr, int backlog, int fReusePort);
int anetTcp6Server(char *err, int port, char *bindaddr, int backlog, int fReusePort);
int anetUnixServer(char *err, char *path, mode_t perm, int backlog);
int anetTcpAccept(char *err, int serversock, char *ip, size_t ip_len, int *port);
int anetUnixAccept(char *err, int serversock);
@ -77,4 +81,8 @@ int anetFormatAddr(char *fmt, size_t fmt_len, char *ip, int port);
int anetFormatPeer(int fd, char *fmt, size_t fmt_len);
int anetFormatSock(int fd, char *fmt, size_t fmt_len);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -96,6 +96,8 @@ void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask) {
listNode *ln;
aofrwblock *block;
ssize_t nwritten;
serverAssert(aeThreadOwnsLock());
UNUSED(el);
UNUSED(fd);
UNUSED(privdata);
@ -105,7 +107,7 @@ void aofChildWriteDiffData(aeEventLoop *el, int fd, void *privdata, int mask) {
ln = listFirst(server.aof_rewrite_buf_blocks);
block = ln ? ln->value : NULL;
if (server.aof_stop_sending_diff || !block) {
aeDeleteFileEvent(server.el,server.aof_pipe_write_data_to_child,
aeDeleteFileEvent(el,server.aof_pipe_write_data_to_child,
AE_WRITABLE);
return;
}
@ -162,8 +164,8 @@ void aofRewriteBufferAppend(unsigned char *s, unsigned long len) {
/* Install a file event to send data to the rewrite child if there is
* not one already. */
if (aeGetFileEvents(server.el,server.aof_pipe_write_data_to_child) == 0) {
aeCreateFileEvent(server.el, server.aof_pipe_write_data_to_child,
if (aeGetFileEvents(serverTL->el,server.aof_pipe_write_data_to_child) == 0) {
aeCreateFileEvent(serverTL->el, server.aof_pipe_write_data_to_child,
AE_WRITABLE, aofChildWriteDiffData, NULL);
}
}
@ -631,6 +633,7 @@ struct client *createFakeClient(void) {
selectDb(c,0);
c->fd = -1;
c->iel = IDX_EVENT_LOOP_MAIN;
c->name = NULL;
c->querybuf = sdsempty();
c->querybuf_peak = 0;
@ -638,6 +641,7 @@ struct client *createFakeClient(void) {
c->argv = NULL;
c->bufpos = 0;
c->flags = 0;
c->fPendingAsyncWrite = FALSE;
c->btype = BLOCKED_NONE;
/* We set the fake client as a slave waiting for the synchronization
* so that Redis will not try to send replies to this client. */
@ -651,6 +655,8 @@ struct client *createFakeClient(void) {
c->puser = NULL;
listSetFreeMethod(c->reply,freeClientReplyValue);
listSetDupMethod(c->reply,dupClientReplyValue);
fastlock_init(&c->lock);
fastlock_lock(&c->lock);
initClientMultiState(c);
return c;
}
@ -668,6 +674,8 @@ void freeFakeClient(struct client *c) {
listRelease(c->reply);
listRelease(c->watched_keys);
freeClientMultiState(c);
fastlock_unlock(&c->lock);
fastlock_free(&c->lock);
zfree(c);
}
@ -682,6 +690,7 @@ int loadAppendOnlyFile(char *filename) {
long loops = 0;
off_t valid_up_to = 0; /* Offset of latest well-formed command loaded. */
off_t valid_before_multi = 0; /* Offset before MULTI command loaded. */
serverAssert(serverTL != NULL); // This happens early in boot, ensure serverTL was setup
if (fp == NULL) {
serverLog(LL_WARNING,"Fatal error: can't open the append log file for reading: %s",strerror(errno));
@ -738,7 +747,7 @@ int loadAppendOnlyFile(char *filename) {
/* Serve the clients from time to time */
if (!(loops++ % 1000)) {
loadingProgress(ftello(fp));
processEventsWhileBlocked();
processEventsWhileBlocked(serverTL - server.rgthreadvar);
}
if (fgets(buf,sizeof(buf),fp) == NULL) {
@ -1470,7 +1479,7 @@ void aofChildPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask) {
}
/* Remove the handler since this can be called only one time during a
* rewrite. */
aeDeleteFileEvent(server.el,server.aof_pipe_read_ack_from_child,AE_READABLE);
aeDeleteFileEventAsync(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,server.aof_pipe_read_ack_from_child,AE_READABLE);
}
/* Create the pipes used for parent - child process IPC during rewrite.
@ -1488,12 +1497,13 @@ int aofCreatePipes(void) {
/* Parent -> children data is non blocking. */
if (anetNonBlock(NULL,fds[0]) != ANET_OK) goto error;
if (anetNonBlock(NULL,fds[1]) != ANET_OK) goto error;
if (aeCreateFileEvent(server.el, fds[2], AE_READABLE, aofChildPipeReadable, NULL) == AE_ERR) goto error;
if (aeCreateFileEvent(serverTL->el, fds[2], AE_READABLE, aofChildPipeReadable, NULL) == AE_ERR) goto error;
server.aof_pipe_write_data_to_child = fds[1];
server.aof_pipe_read_data_from_parent = fds[0];
server.aof_pipe_write_ack_to_parent = fds[3];
server.aof_pipe_read_ack_from_child = fds[2];
server.el_alf_pip_read_ack_from_child = serverTL->el;
server.aof_pipe_write_ack_to_child = fds[5];
server.aof_pipe_read_ack_from_parent = fds[4];
server.aof_stop_sending_diff = 0;
@ -1507,8 +1517,8 @@ error:
}
void aofClosePipes(void) {
aeDeleteFileEvent(server.el,server.aof_pipe_read_ack_from_child,AE_READABLE);
aeDeleteFileEvent(server.el,server.aof_pipe_write_data_to_child,AE_WRITABLE);
aeDeleteFileEventAsync(server.el_alf_pip_read_ack_from_child,server.aof_pipe_read_ack_from_child,AE_READABLE);
aeDeleteFileEventAsync(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,server.aof_pipe_write_data_to_child,AE_WRITABLE);
close(server.aof_pipe_write_data_to_child);
close(server.aof_pipe_read_data_from_parent);
close(server.aof_pipe_write_ack_to_parent);

View File

@ -100,6 +100,7 @@ int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int
* flag is set client query buffer is not longer processed, but accumulated,
* and will be processed when the client is unblocked. */
void blockClient(client *c, int btype) {
serverAssert(aeThreadOwnsLock());
c->flags |= CLIENT_BLOCKED;
c->btype = btype;
server.blocked_clients++;
@ -109,15 +110,22 @@ void blockClient(client *c, int btype) {
/* This function is called in the beforeSleep() function of the event loop
* in order to process the pending input buffer of clients that were
* unblocked after a blocking operation. */
void processUnblockedClients(void) {
void processUnblockedClients(int iel) {
serverAssert(aeThreadOwnsLock());
listNode *ln;
client *c;
list *unblocked_clients = server.rgthreadvar[iel].unblocked_clients;
serverAssert(iel == (serverTL - server.rgthreadvar));
while (listLength(server.unblocked_clients)) {
ln = listFirst(server.unblocked_clients);
while (listLength(unblocked_clients)) {
ln = listFirst(unblocked_clients);
serverAssert(ln != NULL);
c = ln->value;
listDelNode(server.unblocked_clients,ln);
listDelNode(unblocked_clients,ln);
AssertCorrectThread(c);
fastlock_lock(&c->lock);
c->flags &= ~CLIENT_UNBLOCKED;
/* Process remaining data in the input buffer, unless the client
@ -129,6 +137,7 @@ void processUnblockedClients(void) {
processInputBufferAndReplicate(c);
}
}
fastlock_unlock(&c->lock);
}
}
@ -151,15 +160,19 @@ void processUnblockedClients(void) {
void queueClientForReprocessing(client *c) {
/* The client may already be into the unblocked list because of a previous
* blocking operation, don't add back it into the list multiple times. */
serverAssert(aeThreadOwnsLock());
fastlock_lock(&c->lock);
if (!(c->flags & CLIENT_UNBLOCKED)) {
c->flags |= CLIENT_UNBLOCKED;
listAddNodeTail(server.unblocked_clients,c);
listAddNodeTail(server.rgthreadvar[c->iel].unblocked_clients,c);
}
fastlock_unlock(&c->lock);
}
/* Unblock a client calling the right function depending on the kind
* of operation the client is blocking for. */
void unblockClient(client *c) {
serverAssert(aeThreadOwnsLock());
if (c->btype == BLOCKED_LIST ||
c->btype == BLOCKED_ZSET ||
c->btype == BLOCKED_STREAM) {
@ -205,20 +218,23 @@ void replyToBlockedClientTimedOut(client *c) {
* The semantics is to send an -UNBLOCKED error to the client, disconnecting
* it at the same time. */
void disconnectAllBlockedClients(void) {
serverAssert(aeThreadOwnsLock());
listNode *ln;
listIter li;
listRewind(server.clients,&li);
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
fastlock_lock(&c->lock);
if (c->flags & CLIENT_BLOCKED) {
addReplySds(c,sdsnew(
addReplySdsAsync(c,sdsnew(
"-UNBLOCKED force unblock from blocking operation, "
"instance state changed (master -> replica?)\r\n"));
unblockClient(c);
c->flags |= CLIENT_CLOSE_AFTER_REPLY;
}
fastlock_unlock(&c->lock);
}
}
@ -244,6 +260,7 @@ void disconnectAllBlockedClients(void) {
* be used only for a single type, like virtually any Redis application will
* do, the function is already fair. */
void handleClientsBlockedOnKeys(void) {
serverAssert(aeThreadOwnsLock());
while(listLength(server.ready_keys) != 0) {
list *l;
@ -297,6 +314,7 @@ void handleClientsBlockedOnKeys(void) {
* freed by the next unblockClient()
* call. */
if (dstkey) incrRefCount(dstkey);
fastlock_lock(&receiver->lock);
unblockClient(receiver);
if (serveClientBlockedOnList(receiver,
@ -309,6 +327,7 @@ void handleClientsBlockedOnKeys(void) {
}
if (dstkey) decrRefCount(dstkey);
fastlock_unlock(&receiver->lock);
decrRefCount(value);
} else {
break;
@ -348,6 +367,7 @@ void handleClientsBlockedOnKeys(void) {
continue;
}
fastlock_lock(&receiver->lock);
int where = (receiver->lastcmd &&
receiver->lastcmd->proc == bzpopminCommand)
? ZSET_MIN : ZSET_MAX;
@ -365,6 +385,7 @@ void handleClientsBlockedOnKeys(void) {
incrRefCount(rl->key);
propagate(cmd,receiver->db->id,
argv,2,PROPAGATE_AOF|PROPAGATE_REPL);
fastlock_unlock(&receiver->lock);
decrRefCount(argv[0]);
decrRefCount(argv[1]);
}
@ -407,10 +428,12 @@ void handleClientsBlockedOnKeys(void) {
/* If the group was not found, send an error
* to the consumer. */
if (!group) {
addReplyError(receiver,
fastlock_lock(&receiver->lock);
addReplyErrorAsync(receiver,
"-NOGROUP the consumer group this client "
"was blocked on no longer exists");
unblockClient(receiver);
fastlock_unlock(&receiver->lock);
continue;
} else {
*gt = group->last_id;
@ -432,17 +455,19 @@ void handleClientsBlockedOnKeys(void) {
noack = receiver->bpop.xread_group_noack;
}
fastlock_lock(&receiver->lock);
/* Emit the two elements sub-array consisting of
* the name of the stream and the data we
* extracted from it. Wrapped in a single-item
* array, since we have just one key. */
if (receiver->resp == 2) {
addReplyArrayLen(receiver,1);
addReplyArrayLen(receiver,2);
addReplyArrayLenAsync(receiver,1);
addReplyArrayLenAsync(receiver,2);
} else {
addReplyMapLen(receiver,1);
addReplyMapLenAsync(receiver,1);
}
addReplyBulk(receiver,rl->key);
addReplyBulkAsync(receiver,rl->key);
streamPropInfo pi = {
rl->key,
@ -457,6 +482,7 @@ void handleClientsBlockedOnKeys(void) {
* valid, so we must do the setup above before
* this call. */
unblockClient(receiver);
fastlock_unlock(&receiver->lock);
}
}
}

View File

@ -486,14 +486,14 @@ void clusterInit(void) {
}
if (listenToPort(server.port+CLUSTER_PORT_INCR,
server.cfd,&server.cfd_count) == C_ERR)
server.cfd,&server.cfd_count, 0 /*fReusePort*/) == C_ERR)
{
exit(1);
} else {
int j;
for (j = 0; j < server.cfd_count; j++) {
if (aeCreateFileEvent(server.el, server.cfd[j], AE_READABLE,
if (aeCreateFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el, server.cfd[j], AE_READABLE,
clusterAcceptHandler, NULL) == AE_ERR)
serverPanic("Unrecoverable error creating Redis Cluster "
"file event.");
@ -601,7 +601,7 @@ clusterLink *createClusterLink(clusterNode *node) {
* with this link will have the 'link' field set to NULL. */
void freeClusterLink(clusterLink *link) {
if (link->fd != -1) {
aeDeleteFileEvent(server.el, link->fd, AE_READABLE|AE_WRITABLE);
aeDeleteFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el, link->fd, AE_READABLE|AE_WRITABLE);
}
sdsfree(link->sndbuf);
sdsfree(link->rcvbuf);
@ -645,7 +645,7 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
* node identity. */
link = createClusterLink(NULL);
link->fd = cfd;
aeCreateFileEvent(server.el,cfd,AE_READABLE,clusterReadHandler,link);
aeCreateFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,cfd,AE_READABLE,clusterReadHandler,link);
}
}
@ -2132,7 +2132,7 @@ void clusterWriteHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
}
sdsrange(link->sndbuf,nwritten,-1);
if (sdslen(link->sndbuf) == 0)
aeDeleteFileEvent(server.el, link->fd, AE_WRITABLE);
aeDeleteFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el, link->fd, AE_WRITABLE);
}
/* Read data. Try to read the first field of the header first to check the
@ -2208,7 +2208,7 @@ void clusterReadHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
* from event handlers that will do stuff with the same link later. */
void clusterSendMessage(clusterLink *link, unsigned char *msg, size_t msglen) {
if (sdslen(link->sndbuf) == 0 && msglen != 0)
aeCreateFileEvent(server.el,link->fd,AE_WRITABLE|AE_BARRIER,
aeCreateFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,link->fd,AE_WRITABLE|AE_BARRIER,
clusterWriteHandler,link);
link->sndbuf = sdscatlen(link->sndbuf, msg, msglen);
@ -3402,7 +3402,7 @@ void clusterCron(void) {
link = createClusterLink(node);
link->fd = fd;
node->link = link;
aeCreateFileEvent(server.el,link->fd,AE_READABLE,
aeCreateFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,link->fd,AE_READABLE,
clusterReadHandler,link);
/* Queue a PING in the new connection ASAP: this is crucial
* to avoid false positives in failure detection.
@ -5390,6 +5390,7 @@ socket_err:
* the target instance. See the Redis Cluster specification for more
* information. */
void askingCommand(client *c) {
serverAssert(aeThreadOwnsLock());
if (server.cluster_enabled == 0) {
addReplyError(c,"This instance has cluster support disabled");
return;
@ -5402,6 +5403,7 @@ void askingCommand(client *c) {
* In this mode slaves will not redirect clients as long as clients access
* with read-only commands to keys that are served by the slave's master. */
void readonlyCommand(client *c) {
serverAssert(aeThreadOwnsLock());
if (server.cluster_enabled == 0) {
addReplyError(c,"This instance has cluster support disabled");
return;
@ -5412,6 +5414,7 @@ void readonlyCommand(client *c) {
/* The READWRITE command just clears the READONLY command state. */
void readwriteCommand(client *c) {
serverAssert(aeThreadOwnsLock());
c->flags &= ~CLIENT_READONLY;
addReply(c,shared.ok);
}
@ -5455,6 +5458,11 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
multiState *ms, _ms;
multiCmd mc;
int i, slot = 0, migrating_slot = 0, importing_slot = 0, missing_keys = 0;
serverAssert(aeThreadOwnsLock());
/* Allow any key to be set if a module disabled cluster redirections. */
if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION)
return myself;
/* Allow any key to be set if a module disabled cluster redirections. */
if (server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_REDIRECTION)
@ -5663,6 +5671,7 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co
* longer handles, the client is sent a redirection error, and the function
* returns 1. Otherwise 0 is returned and no operation is performed. */
int clusterRedirectBlockedClientIfNeeded(client *c) {
serverAssert(aeThreadOwnsLock());
if (c->flags & CLIENT_BLOCKED &&
(c->btype == BLOCKED_LIST ||
c->btype == BLOCKED_ZSET ||

View File

@ -1,6 +1,10 @@
#ifndef __CLUSTER_H
#define __CLUSTER_H
#ifdef __cplusplus
extern "C" {
#endif
/*-----------------------------------------------------------------------------
* Redis cluster data structures, defines, exported API.
*----------------------------------------------------------------------------*/
@ -287,4 +291,8 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in
int clusterRedirectBlockedClientIfNeeded(client *c);
void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code);
#ifdef __cplusplus
}
#endif
#endif /* __CLUSTER_H */

View File

@ -395,6 +395,9 @@ void loadServerConfigFromString(char *config) {
err = "repl-backlog-ttl can't be negative ";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"masteruser") && argc == 2) {
zfree(server.masteruser);
server.masteruser = argv[1][0] ? zstrdup(argv[1]) : NULL;
} else if (!strcasecmp(argv[0],"masterauth") && argc == 2) {
zfree(server.masterauth);
server.masterauth = argv[1][0] ? zstrdup(argv[1]) : NULL;
@ -821,7 +824,18 @@ void loadServerConfigFromString(char *config) {
if (err) goto loaderr;
}
} else if (!strcasecmp(argv[0],"scratch-file-path")) {
#ifdef USE_MEMKIND
storage_init(argv[1], server.maxmemory);
#else
err = "KeyDB not compliled with scratch-file support.";
goto loaderr;
#endif
} else if (!strcasecmp(argv[0],"server-threads") && argc == 2) {
server.cthreads = atoi(argv[1]);
if (server.cthreads <= 0 || server.cthreads > MAX_EVENT_LOOPS) {
err = "Invalid number of threads specified";
goto loaderr;
}
} else {
err = "Bad directive or wrong number of arguments"; goto loaderr;
}
@ -948,6 +962,9 @@ void configSetCommand(client *c) {
sds aclop = sdscatprintf(sdsempty(),">%s",(char*)ptrFromObj(o));
ACLSetUser(DefaultUser,aclop,sdslen(aclop));
sdsfree(aclop);
} config_set_special_field("masteruser") {
zfree(server.masteruser);
server.masteruser = ((char*)ptrFromObj(o))[0] ? zstrdup(ptrFromObj(o)) : NULL;
} config_set_special_field("masterauth") {
zfree(server.masterauth);
server.masterauth = ((char*)ptrFromObj(o))[0] ? zstrdup(ptrFromObj(o)) : NULL;
@ -961,6 +978,7 @@ void configSetCommand(client *c) {
/* Try to check if the OS is capable of supporting so many FDs. */
server.maxclients = ll;
serverAssert(FALSE);
if (ll > orig_value) {
adjustOpenFilesLimit();
if (server.maxclients != ll) {
@ -968,15 +986,18 @@ void configSetCommand(client *c) {
server.maxclients = orig_value;
return;
}
if ((unsigned int) aeGetSetSize(server.el) <
if ((unsigned int) aeGetSetSize(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el) <
server.maxclients + CONFIG_FDSET_INCR)
{
if (aeResizeSetSize(server.el,
server.maxclients + CONFIG_FDSET_INCR) == AE_ERR)
for (int iel = 0; iel < server.cthreads; ++iel)
{
addReplyError(c,"The event loop API used by Redis is not able to handle the specified number of clients");
server.maxclients = orig_value;
return;
if (aeResizeSetSize(server.rgthreadvar[iel].el,
server.maxclients + CONFIG_FDSET_INCR) == AE_ERR)
{
addReplyError(c,"The event loop API used by Redis is not able to handle the specified number of clients");
server.maxclients = orig_value;
return;
}
}
}
}
@ -1359,6 +1380,7 @@ void configGetCommand(client *c) {
/* String values */
config_get_string_field("dbfilename",server.rdb_filename);
config_get_string_field("masteruser",server.masteruser);
config_get_string_field("masterauth",server.masterauth);
config_get_string_field("cluster-announce-ip",server.cluster_announce_ip);
config_get_string_field("unixsocket",server.unixsocket);
@ -2019,7 +2041,7 @@ void rewriteConfigClientoutputbufferlimitOption(struct rewriteConfigState *state
rewriteConfigFormatMemory(soft,sizeof(soft),
server.client_obuf_limits[j].soft_limit_bytes);
char *typename = getClientTypeName(j);
const char *typename = getClientTypeName(j);
if (!strcmp(typename,"slave")) typename = "replica";
line = sdscatprintf(sdsempty(),"%s %s %s %s %ld",
option, typename, hard, soft,
@ -2237,6 +2259,7 @@ int rewriteConfig(char *path) {
rewriteConfigDirOption(state);
rewriteConfigSlaveofOption(state,"replicaof");
rewriteConfigStringOption(state,"replica-announce-ip",server.slave_announce_ip,CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP);
rewriteConfigStringOption(state,"masteruser",server.masteruser,NULL);
rewriteConfigStringOption(state,"masterauth",server.masterauth,NULL);
rewriteConfigStringOption(state,"cluster-announce-ip",server.cluster_announce_ip,NULL);
rewriteConfigYesNoOption(state,"replica-serve-stale-data",server.repl_serve_stale_data,CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA);

View File

@ -3,10 +3,18 @@
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
#ifdef REDIS_TEST
int crc64Test(int argc, char *argv[]);
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -99,6 +99,7 @@ robj *lookupKey(redisDb *db, robj *key, int flags) {
* expiring our key via DELs in the replication link. */
robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) {
robj *val;
serverAssert(aeThreadOwnsLock());
if (expireIfNeeded(db,key) == 1) {
/* Key expired. If we are in the context of a master, expireIfNeeded()
@ -1072,6 +1073,7 @@ int removeExpire(redisDb *db, robj *key) {
* after which the key will no longer be considered valid. */
void setExpire(client *c, redisDb *db, robj *key, long long when) {
dictEntry *kde, *de;
serverAssert(aeThreadOwnsLock());
/* Reuse the sds from the main dict in the expire dict */
kde = dictFind(db->pdict,ptrFromObj(key));
@ -1108,6 +1110,7 @@ long long getExpire(redisDb *db, robj *key) {
* will be consistent even if we allow write operations against expiring
* keys. */
void propagateExpire(redisDb *db, robj *key, int lazy) {
serverAssert(aeThreadOwnsLock());
robj *argv[2];
argv[0] = lazy ? shared.unlink : shared.del;

View File

@ -803,7 +803,7 @@ static void *getMcontextEip(ucontext_t *uc) {
#endif
#elif defined(__linux__)
/* Linux */
#if defined(__i386__)
#if defined(__i386__) || defined(__ILP32__)
return (void*) uc->uc_mcontext.gregs[14]; /* Linux 32 */
#elif defined(__X86_64__) || defined(__x86_64__)
return (void*) uc->uc_mcontext.gregs[16]; /* Linux 64 */
@ -915,7 +915,7 @@ void logRegisters(ucontext_t *uc) {
/* Linux */
#elif defined(__linux__)
/* Linux x86 */
#if defined(__i386__)
#if defined(__i386__) || defined(__ILP32__)
serverLog(LL_WARNING,
"\n"
"EAX:%08lx EBX:%08lx ECX:%08lx EDX:%08lx\n"

View File

@ -116,17 +116,15 @@ robj *activeDefragStringOb(robj* ob, long *defragged) {
/* try to defrag string object */
if (ob->type == OBJ_STRING) {
if(ob->encoding==OBJ_ENCODING_RAW) {
sds newsds = activeDefragSds((sds)ob->ptr);
sds newsds = activeDefragSds((sds)ptrFromObj(ob));
if (newsds) {
ob->ptr = newsds;
ob->m_ptr = newsds;
(*defragged)++;
}
} else if (ob->encoding==OBJ_ENCODING_EMBSTR) {
/* The sds is embedded in the object allocation, calculate the
* offset and update the pointer in the new allocation. */
long ofs = (intptr_t)ob->ptr - (intptr_t)ob;
if ((ret = activeDefragAlloc(ob))) {
ret->ptr = (void*)((intptr_t)ret + ofs);
(*defragged)++;
}
} else if (ob->encoding!=OBJ_ENCODING_INT) {
@ -441,7 +439,7 @@ void defragLater(redisDb *db, dictEntry *kde) {
}
long scanLaterList(robj *ob) {
quicklist *ql = ob->ptr;
quicklist *ql = ptrFromObj(ob);
if (ob->type != OBJ_LIST || ob->encoding != OBJ_ENCODING_QUICKLIST)
return 0;
server.stat_active_defrag_scanned+=ql->len;
@ -463,7 +461,7 @@ void scanLaterZsetCallback(void *privdata, const dictEntry *_de) {
long scanLaterZset(robj *ob, unsigned long *cursor) {
if (ob->type != OBJ_ZSET || ob->encoding != OBJ_ENCODING_SKIPLIST)
return 0;
zset *zs = (zset*)ob->ptr;
zset *zs = (zset*)ptrFromObj(ob);
dict *d = zs->pdict;
scanLaterZsetData data = {zs, 0};
*cursor = dictScan(d, *cursor, scanLaterZsetCallback, defragDictBucketCallback, &data);
@ -483,7 +481,7 @@ long scanLaterSet(robj *ob, unsigned long *cursor) {
long defragged = 0;
if (ob->type != OBJ_SET || ob->encoding != OBJ_ENCODING_HT)
return 0;
dict *d = ob->ptr;
dict *d = ptrFromObj(ob);
*cursor = dictScan(d, *cursor, scanLaterSetCallback, defragDictBucketCallback, &defragged);
return defragged;
}
@ -504,7 +502,7 @@ long scanLaterHash(robj *ob, unsigned long *cursor) {
long defragged = 0;
if (ob->type != OBJ_HASH || ob->encoding != OBJ_ENCODING_HT)
return 0;
dict *d = ob->ptr;
dict *d = ptrFromObj(ob);
*cursor = dictScan(d, *cursor, scanLaterHashCallback, defragDictBucketCallback, &defragged);
return defragged;
}
@ -512,10 +510,10 @@ long scanLaterHash(robj *ob, unsigned long *cursor) {
long defragQuicklist(redisDb *db, dictEntry *kde) {
robj *ob = dictGetVal(kde);
long defragged = 0;
quicklist *ql = ob->ptr, *newql;
quicklist *ql = ptrFromObj(ob), *newql;
serverAssert(ob->type == OBJ_LIST && ob->encoding == OBJ_ENCODING_QUICKLIST);
if ((newql = activeDefragAlloc(ql)))
defragged++, ob->ptr = ql = newql;
defragged++, ob->m_ptr = ql = newql;
if (ql->len > server.active_defrag_max_scan_fields)
defragLater(db, kde);
else
@ -526,7 +524,7 @@ long defragQuicklist(redisDb *db, dictEntry *kde) {
long defragZsetSkiplist(redisDb *db, dictEntry *kde) {
robj *ob = dictGetVal(kde);
long defragged = 0;
zset *zs = (zset*)ob->ptr;
zset *zs = (zset*)ptrFromObj(ob);
zset *newzs;
zskiplist *newzsl;
dict *newdict;
@ -534,7 +532,7 @@ long defragZsetSkiplist(redisDb *db, dictEntry *kde) {
struct zskiplistNode *newheader;
serverAssert(ob->type == OBJ_ZSET && ob->encoding == OBJ_ENCODING_SKIPLIST);
if ((newzs = activeDefragAlloc(zs)))
defragged++, ob->ptr = zs = newzs;
defragged++, ob->m_ptr = zs = newzs;
if ((newzsl = activeDefragAlloc(zs->zsl)))
defragged++, zs->zsl = newzsl;
if ((newheader = activeDefragAlloc(zs->zsl->header)))
@ -561,16 +559,16 @@ long defragHash(redisDb *db, dictEntry *kde) {
robj *ob = dictGetVal(kde);
dict *d, *newd;
serverAssert(ob->type == OBJ_HASH && ob->encoding == OBJ_ENCODING_HT);
d = ob->ptr;
d = ptrFromObj(ob);
if (dictSize(d) > server.active_defrag_max_scan_fields)
defragLater(db, kde);
else
defragged += activeDefragSdsDict(d, DEFRAG_SDS_DICT_VAL_IS_SDS);
/* handle the dict struct */
if ((newd = activeDefragAlloc(ob->ptr)))
defragged++, ob->ptr = newd;
if ((newd = activeDefragAlloc(ptrFromObj(ob))))
defragged++, ob->m_ptr = newd;
/* defrag the dict tables */
defragged += dictDefragTables(ob->ptr);
defragged += dictDefragTables(ptrFromObj(ob));
return defragged;
}
@ -579,16 +577,16 @@ long defragSet(redisDb *db, dictEntry *kde) {
robj *ob = dictGetVal(kde);
dict *d, *newd;
serverAssert(ob->type == OBJ_SET && ob->encoding == OBJ_ENCODING_HT);
d = ob->ptr;
d = ptrFromObj(ob);
if (dictSize(d) > server.active_defrag_max_scan_fields)
defragLater(db, kde);
else
defragged += activeDefragSdsDict(d, DEFRAG_SDS_DICT_NO_VAL);
/* handle the dict struct */
if ((newd = activeDefragAlloc(ob->ptr)))
defragged++, ob->ptr = newd;
if ((newd = activeDefragAlloc(ptrFromObj(ob))))
defragged++, ob->m_ptr = newd;
/* defrag the dict tables */
defragged += dictDefragTables(ob->ptr);
defragged += dictDefragTables(ptrFromObj(ob));
return defragged;
}
@ -613,11 +611,11 @@ int scanLaterStraemListpacks(robj *ob, unsigned long *cursor, long long endtime,
return 0;
}
stream *s = ob->ptr;
raxStart(&ri,s->rax);
stream *s = ptrFromObj(ob);
raxStart(&ri,s->prax);
if (*cursor == 0) {
/* if cursor is 0, we start new iteration */
defragRaxNode(&s->rax->head);
defragRaxNode(&s->prax->head);
/* assign the iterator node callback before the seek, so that the
* initial nodes that are processed till the first item are covered */
ri.node_cb = defragRaxNode;
@ -738,19 +736,19 @@ long defragStream(redisDb *db, dictEntry *kde) {
long defragged = 0;
robj *ob = dictGetVal(kde);
serverAssert(ob->type == OBJ_STREAM && ob->encoding == OBJ_ENCODING_STREAM);
stream *s = ob->ptr, *news;
stream *s = ptrFromObj(ob), *news;
/* handle the main struct */
if ((news = activeDefragAlloc(s)))
defragged++, ob->ptr = s = news;
defragged++, ob->m_ptr = s = news;
if (raxSize(s->rax) > server.active_defrag_max_scan_fields) {
rax *newrax = activeDefragAlloc(s->rax);
if (raxSize(s->prax) > server.active_defrag_max_scan_fields) {
rax *newrax = activeDefragAlloc(s->prax);
if (newrax)
defragged++, s->rax = newrax;
defragged++, s->prax = newrax;
defragLater(db, kde);
} else
defragged += defragRadixTree(&s->rax, 1, NULL, NULL);
defragged += defragRadixTree(&s->prax, 1, NULL, NULL);
if (s->cgroups)
defragged += defragRadixTree(&s->cgroups, 1, defragStreamConsumerGroup, NULL);
@ -792,8 +790,8 @@ long defragKey(redisDb *db, dictEntry *de) {
if (ob->encoding == OBJ_ENCODING_QUICKLIST) {
defragged += defragQuicklist(db, de);
} else if (ob->encoding == OBJ_ENCODING_ZIPLIST) {
if ((newzl = activeDefragAlloc(ob->ptr)))
defragged++, ob->ptr = newzl;
if ((newzl = activeDefragAlloc(ptrFromObj(ob))))
defragged++, ob->m_ptr = newzl;
} else {
serverPanic("Unknown list encoding");
}
@ -801,16 +799,16 @@ long defragKey(redisDb *db, dictEntry *de) {
if (ob->encoding == OBJ_ENCODING_HT) {
defragged += defragSet(db, de);
} else if (ob->encoding == OBJ_ENCODING_INTSET) {
intset *newis, *is = ob->ptr;
intset *newis, *is = ptrFromObj(ob);
if ((newis = activeDefragAlloc(is)))
defragged++, ob->ptr = newis;
defragged++, ob->m_ptr = newis;
} else {
serverPanic("Unknown set encoding");
}
} else if (ob->type == OBJ_ZSET) {
if (ob->encoding == OBJ_ENCODING_ZIPLIST) {
if ((newzl = activeDefragAlloc(ob->ptr)))
defragged++, ob->ptr = newzl;
if ((newzl = activeDefragAlloc(ptrFromObj(ob))))
defragged++, ob->m_ptr = newzl;
} else if (ob->encoding == OBJ_ENCODING_SKIPLIST) {
defragged += defragZsetSkiplist(db, de);
} else {
@ -818,8 +816,8 @@ long defragKey(redisDb *db, dictEntry *de) {
}
} else if (ob->type == OBJ_HASH) {
if (ob->encoding == OBJ_ENCODING_ZIPLIST) {
if ((newzl = activeDefragAlloc(ob->ptr)))
defragged++, ob->ptr = newzl;
if ((newzl = activeDefragAlloc(ptrFromObj(ob))))
defragged++, ob->m_ptr = newzl;
} else if (ob->encoding == OBJ_ENCODING_HT) {
defragged += defragHash(db, de);
} else {

View File

@ -739,6 +739,30 @@ unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count) {
return stored;
}
/* This is like dictGetRandomKey() from the POV of the API, but will do more
* work to ensure a better distribution of the returned element.
*
* This function improves the distribution because the dictGetRandomKey()
* problem is that it selects a random bucket, then it selects a random
* element from the chain in the bucket. However elements being in different
* chain lengths will have different probabilities of being reported. With
* this function instead what we do is to consider a "linear" range of the table
* that may be constituted of N buckets with chains of different lengths
* appearing one after the other. Then we report a random element in the range.
* In this way we smooth away the problem of different chain lenghts. */
#define GETFAIR_NUM_ENTRIES 15
dictEntry *dictGetFairRandomKey(dict *d) {
dictEntry *entries[GETFAIR_NUM_ENTRIES];
unsigned int count = dictGetSomeKeys(d,entries,GETFAIR_NUM_ENTRIES);
/* Note that dictGetSomeKeys() may return zero elements in an unlucky
* run() even if there are actually elements inside the hash table. So
* when we get zero, we call the true dictGetRandomKey() that will always
* yeld the element if the hash table has at least one. */
if (count == 0) return dictGetRandomKey(d);
unsigned int idx = rand() % count;
return entries[idx];
}
/* Function to reverse bits. Algorithm from:
* http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel */
static unsigned long rev(unsigned long v) {

View File

@ -35,6 +35,10 @@
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef __DICT_H
#define __DICT_H
@ -166,6 +170,7 @@ dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
dictEntry *dictGetFairRandomKey(dict *d);
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
void dictGetStats(char *buf, size_t bufsize, dict *d);
uint64_t dictGenHashFunction(const void *key, int len);
@ -186,4 +191,8 @@ extern dictType dictTypeHeapStringCopyKey;
extern dictType dictTypeHeapStrings;
extern dictType dictTypeHeapStringCopyKeyValue;
#ifdef __cplusplus
}
#endif
#endif /* __DICT_H */

View File

@ -36,6 +36,10 @@
#include "config.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
void memrev16(void *p);
void memrev32(void *p);
void memrev64(void *p);
@ -75,4 +79,8 @@ uint64_t intrev64(uint64_t v);
int endianconvTest(int argc, char *argv[]);
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -350,6 +350,7 @@ unsigned long LFUDecrAndReturn(robj *o) {
* used memory: the eviction should use mostly data size. This function
* returns the sum of AOF and slaves buffer. */
size_t freeMemoryGetNotCountedMemory(void) {
serverAssert(aeThreadOwnsLock());
size_t overhead = 0;
int slaves = listLength(server.slaves);
@ -444,6 +445,7 @@ int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *lev
* Otehrwise if we are over the memory limit, but not enough memory
* was freed to return back under the limit, the function returns C_ERR. */
int freeMemoryIfNeeded(void) {
serverAssert(aeThreadOwnsLock());
/* By default replicas should ignore maxmemory
* and just be masters exact copies. */
if (server.masterhost && server.repl_slave_ignore_maxmemory) return C_OK;

View File

@ -1,25 +1,135 @@
/*
* Copyright (c) 2019, John Sully <john at eqalpha dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "fastlock.h"
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sched.h>
#include <atomic>
#include <assert.h>
/****************************************************
*
* Implementation of a fair spinlock. To promote fairness we
* use a ticket lock instead of a raw spinlock
*
****************************************************/
static_assert(sizeof(pid_t) <= sizeof(fastlock::m_pidOwner), "fastlock::m_pidOwner not large enough");
static pid_t gettid()
{
static thread_local int pidCache = -1;
if (pidCache == -1)
pidCache = syscall(SYS_gettid);
return pidCache;
}
extern "C" void fastlock_init(struct fastlock *lock)
{
lock->m_lock = 0;
lock->m_ticket.m_active = 0;
lock->m_ticket.m_avail = 0;
lock->m_depth = 0;
lock->m_pidOwner = -1;
}
extern "C" void fastlock_lock(struct fastlock *lock)
{
while (!__sync_bool_compare_and_swap(&lock->m_lock, 0, 1))
if ((int)__atomic_load_4(&lock->m_pidOwner, __ATOMIC_ACQUIRE) == gettid())
{
++lock->m_depth;
return;
}
unsigned myticket = __atomic_fetch_add(&lock->m_ticket.m_avail, 1, __ATOMIC_RELEASE);
int cloops = 0;
while (__atomic_load_2(&lock->m_ticket.m_active, __ATOMIC_ACQUIRE) != myticket)
{
if ((++cloops % 1024*1024) == 0)
sched_yield();
}
lock->m_depth = 1;
__atomic_store_4(&lock->m_pidOwner, gettid(), __ATOMIC_RELEASE);
std::atomic_thread_fence(std::memory_order_acquire);
}
extern "C" int fastlock_trylock(struct fastlock *lock)
{
if ((int)__atomic_load_4(&lock->m_pidOwner, __ATOMIC_ACQUIRE) == gettid())
{
++lock->m_depth;
return true;
}
// cheap test
if (lock->m_ticket.m_active != lock->m_ticket.m_avail)
return false;
uint16_t active = __atomic_load_2(&lock->m_ticket.m_active, __ATOMIC_RELAXED);
uint16_t next = active + 1;
struct ticket ticket_expect { active, active };
struct ticket ticket_setiflocked { active, next };
if (__atomic_compare_exchange(&lock->m_ticket, &ticket_expect, &ticket_setiflocked, true /*strong*/, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE))
{
lock->m_depth = 1;
__atomic_store_4(&lock->m_pidOwner, gettid(), __ATOMIC_RELEASE);
return true;
}
return false;
}
extern "C" void fastlock_unlock(struct fastlock *lock)
{
__sync_bool_compare_and_swap(&lock->m_lock, 1, 0);
--lock->m_depth;
if (lock->m_depth == 0)
{
assert((int)__atomic_load_4(&lock->m_pidOwner, __ATOMIC_RELAXED) >= 0); // unlock after free
lock->m_pidOwner = -1;
std::atomic_thread_fence(std::memory_order_acquire);
__atomic_fetch_add(&lock->m_ticket.m_active, 1, __ATOMIC_ACQ_REL);
}
}
extern "C" void fastlock_free(struct fastlock *lock)
{
// NOP
(void)lock;
assert((lock->m_ticket.m_active == lock->m_ticket.m_avail) // Asser the lock is unlocked
|| (lock->m_pidOwner == gettid() && (lock->m_ticket.m_active == lock->m_ticket.m_avail-1))); // OR we own the lock and nobody else is waiting
lock->m_pidOwner = -2; // sentinal value indicating free
}
bool fastlock::fOwnLock()
{
return gettid() == m_pidOwner;
}

View File

@ -1,4 +1,5 @@
#pragma once
#include <inttypes.h>
#ifdef __cplusplus
extern "C" {
@ -8,6 +9,7 @@ extern "C" {
struct fastlock;
void fastlock_init(struct fastlock *lock);
void fastlock_lock(struct fastlock *lock);
int fastlock_trylock(struct fastlock *lock);
void fastlock_unlock(struct fastlock *lock);
void fastlock_free(struct fastlock *lock);
@ -16,19 +18,39 @@ void fastlock_free(struct fastlock *lock);
}
#endif
struct ticket
{
uint16_t m_active;
uint16_t m_avail;
};
struct fastlock
{
int m_lock;
volatile struct ticket m_ticket;
volatile int m_pidOwner;
volatile int m_depth;
#ifdef __cplusplus
fastlock()
{
fastlock_init(this);
}
void lock()
{
fastlock_lock(this);
}
bool try_lock()
{
return !!fastlock_trylock(this);
}
void unlock()
{
fastlock_unlock(this);
}
bool fOwnLock(); // true if this thread owns the lock, NOTE: not 100% reliable, use for debugging only
#endif
};

View File

@ -30,13 +30,11 @@
#ifndef _REDIS_FMACRO_H
#define _REDIS_FMACRO_H
#define _BSD_SOURCE
#define _DEFAULT_SOURCE 1
#if defined(__linux__)
#ifndef __cplusplus
#define _GNU_SOURCE
#define _DEFAULT_SOURCE
#endif
#define _GNU_SOURCE 1
#define _DEFAULT_SOURCE 1
#endif
#if defined(_AIX)

View File

@ -41,7 +41,9 @@
typedef struct intset {
uint32_t encoding;
uint32_t length;
int8_t contents[ZERO_LENGTH_ARRAY_LENGTH];
#ifndef __cplusplus
int8_t contents[];
#endif
} intset;
intset *intsetNew(void);

View File

@ -484,6 +484,7 @@ void moduleFreeContext(RedisModuleCtx *ctx) {
* details needed to correctly replicate commands. */
void moduleHandlePropagationAfterCommandCallback(RedisModuleCtx *ctx) {
client *c = ctx->client;
serverAssert(aeThreadOwnsLock());
if (c->flags & CLIENT_LUA) return;
@ -2696,7 +2697,7 @@ RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const ch
/* Create the client and dispatch the command. */
va_start(ap, fmt);
c = createClient(-1);
c = createClient(-1, IDX_EVENT_LOOP_MAIN);
c->puser = NULL; /* Root user. */
argv = moduleCreateArgvFromUserFormat(cmdname,fmt,&argc,&flags,ap);
replicate = flags & REDISMODULE_ARGV_REPLICATE;
@ -3546,7 +3547,7 @@ RedisModuleBlockedClient *RM_BlockClient(RedisModuleCtx *ctx, RedisModuleCmdFunc
bc->disconnect_callback = NULL; /* Set by RM_SetDisconnectCallback() */
bc->free_privdata = free_privdata;
bc->privdata = NULL;
bc->reply_client = createClient(-1);
bc->reply_client = createClient(-1, IDX_EVENT_LOOP_MAIN);
bc->reply_client->flags |= CLIENT_MODULE;
bc->dbid = c->db->id;
c->bpop.timeout = timeout_ms ? (mstime()+timeout_ms) : 0;
@ -3623,6 +3624,7 @@ void RM_SetDisconnectCallback(RedisModuleBlockedClient *bc, RedisModuleDisconnec
void moduleHandleBlockedClients(void) {
listNode *ln;
RedisModuleBlockedClient *bc;
serverAssert(aeThreadOwnsLock());
pthread_mutex_lock(&moduleUnblockedClientsMutex);
/* Here we unblock all the pending clients blocked in modules operations
@ -3633,9 +3635,16 @@ void moduleHandleBlockedClients(void) {
ln = listFirst(moduleUnblockedClients);
bc = ln->value;
client *c = bc->client;
serverAssert(c->iel == IDX_EVENT_LOOP_MAIN);
listDelNode(moduleUnblockedClients,ln);
pthread_mutex_unlock(&moduleUnblockedClientsMutex);
if (c)
{
AssertCorrectThread(c);
fastlock_lock(&c->lock);
}
/* Release the lock during the loop, as long as we don't
* touch the shared list. */
@ -3692,13 +3701,15 @@ void moduleHandleBlockedClients(void) {
!(c->flags & CLIENT_PENDING_WRITE))
{
c->flags |= CLIENT_PENDING_WRITE;
listAddNodeHead(server.clients_pending_write,c);
AssertCorrectThread(c);
listAddNodeHead(server.rgthreadvar[c->iel].clients_pending_write,c);
}
}
/* Free 'bc' only after unblocking the client, since it is
* referenced in the client blocking context, and must be valid
* when calling unblockClient(). */
fastlock_unlock(&c->lock);
zfree(bc);
/* Lock again before to iterate the loop. */
@ -3794,7 +3805,7 @@ RedisModuleCtx *RM_GetThreadSafeContext(RedisModuleBlockedClient *bc) {
* access it safely from another thread, so we create a fake client here
* in order to keep things like the currently selected database and similar
* things. */
ctx->client = createClient(-1);
ctx->client = createClient(-1, IDX_EVENT_LOOP_MAIN);
if (bc) selectDb(ctx->client,bc->dbid);
return ctx;
}
@ -4300,7 +4311,7 @@ RedisModuleTimerID RM_CreateTimer(RedisModuleCtx *ctx, mstime_t period, RedisMod
if (memcmp(ri.key,&key,sizeof(key)) == 0) {
/* This is the first key, we need to re-install the timer according
* to the just added event. */
aeDeleteTimeEvent(server.el,aeTimer);
aeDeleteTimeEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,aeTimer);
aeTimer = -1;
}
raxStop(&ri);
@ -4309,7 +4320,7 @@ RedisModuleTimerID RM_CreateTimer(RedisModuleCtx *ctx, mstime_t period, RedisMod
/* If we have no main timer (the old one was invalidated, or this is the
* first module timer we have), install one. */
if (aeTimer == -1)
aeTimer = aeCreateTimeEvent(server.el,period,moduleTimerHandler,NULL,NULL);
aeTimer = aeCreateTimeEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,period,moduleTimerHandler,NULL,NULL);
return key;
}
@ -4659,7 +4670,7 @@ void moduleInitModulesSystem(void) {
/* Set up the keyspace notification susbscriber list and static client */
moduleKeyspaceSubscribers = listCreate();
moduleFreeContextReusedClient = createClient(-1);
moduleFreeContextReusedClient = createClient(-1, IDX_EVENT_LOOP_MAIN);
moduleFreeContextReusedClient->flags |= CLIENT_MODULE;
moduleFreeContextReusedClient->puser = NULL; /* root user. */

View File

@ -72,6 +72,7 @@ void queueMultiCommand(client *c) {
}
void discardTransaction(client *c) {
serverAssert(aeThreadOwnsLock());
freeClientMultiState(c);
initClientMultiState(c);
c->flags &= ~(CLIENT_MULTI|CLIENT_DIRTY_CAS|CLIENT_DIRTY_EXEC);
@ -81,11 +82,13 @@ void discardTransaction(client *c) {
/* Flag the transacation as DIRTY_EXEC so that EXEC will fail.
* Should be called every time there is an error while queueing a command. */
void flagTransaction(client *c) {
serverAssert(aeThreadOwnsLock());
if (c->flags & CLIENT_MULTI)
c->flags |= CLIENT_DIRTY_EXEC;
}
void multiCommand(client *c) {
serverAssert(aeThreadOwnsLock());
if (c->flags & CLIENT_MULTI) {
addReplyError(c,"MULTI calls can not be nested");
return;
@ -291,6 +294,7 @@ void unwatchAllKeys(client *c) {
/* "Touch" a key, so that if this key is being WATCHed by some client the
* next EXEC will fail. */
void touchWatchedKey(redisDb *db, robj *key) {
serverAssert(aeThreadOwnsLock());
list *clients;
listIter li;
listNode *ln;
@ -316,6 +320,7 @@ void touchWatchedKey(redisDb *db, robj *key) {
void touchWatchedKeysOnFlush(int dbid) {
listIter li1, li2;
listNode *ln;
serverAssert(aeThreadOwnsLock());
/* For every client, check all the waited keys */
listRewind(server.clients,&li1);
@ -350,6 +355,7 @@ void watchCommand(client *c) {
void unwatchCommand(client *c) {
unwatchAllKeys(c);
serverAssert(aeThreadOwnsLock());
c->flags &= (~CLIENT_DIRTY_CAS);
addReply(c,shared.ok);
}

File diff suppressed because it is too large Load Diff

View File

@ -82,7 +82,10 @@ robj *createRawStringObject(const char *ptr, size_t len) {
* an object where the sds string is actually an unmodifiable string
* allocated in the same chunk as the object itself. */
robj *createEmbeddedStringObject(const char *ptr, size_t len) {
robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1-sizeof(o->m_ptr), MALLOC_SHARED);
size_t allocsize = sizeof(struct sdshdr8)+len+1;
if (allocsize < sizeof(void*))
allocsize = sizeof(void*);
robj *o = zmalloc(sizeof(robj)+allocsize-sizeof(o->m_ptr), MALLOC_SHARED);
struct sdshdr8 *sh = (void*)(&o->m_ptr);
o->type = OBJ_STRING;
@ -394,7 +397,7 @@ robj *resetRefCount(robj *obj) {
int checkType(client *c, robj *o, int type) {
if (o->type != type) {
addReply(c,shared.wrongtypeerr);
addReplyAsync(c,shared.wrongtypeerr);
return 1;
}
return 0;
@ -940,6 +943,7 @@ void freeMemoryOverheadData(struct redisMemOverhead *mh) {
* information used for the MEMORY OVERHEAD and INFO command. The returned
* structure pointer should be freed calling freeMemoryOverheadData(). */
struct redisMemOverhead *getMemoryOverheadData(void) {
serverAssert(aeThreadOwnsLock());
int j;
size_t mem_total = 0;
size_t mem = 0;
@ -982,6 +986,8 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
if (c->flags & CLIENT_CLOSE_ASAP)
continue;
mem += getClientOutputBufferMemoryUsage(c);
mem += sdsAllocSize(c->querybuf);
mem += sizeof(client);
@ -1077,6 +1083,7 @@ void inputCatSds(void *result, const char *str) {
/* This implements MEMORY DOCTOR. An human readable analysis of the Redis
* memory condition. */
sds getMemoryDoctorReport(void) {
serverAssert(aeThreadOwnsLock());
int empty = 0; /* Instance is empty or almost empty. */
int big_peak = 0; /* Memory peak is much larger than used mem. */
int high_frag = 0; /* High fragmentation. */

View File

@ -38,12 +38,12 @@ int clientSubscriptionsCount(client *c);
/* Send a pubsub message of type "message" to the client. */
void addReplyPubsubMessage(client *c, robj *channel, robj *msg) {
if (c->resp == 2)
addReply(c,shared.mbulkhdr[3]);
addReplyAsync(c,shared.mbulkhdr[3]);
else
addReplyPushLen(c,3);
addReply(c,shared.messagebulk);
addReplyBulk(c,channel);
addReplyBulk(c,msg);
addReplyPushLenAsync(c,3);
addReplyAsync(c,shared.messagebulk);
addReplyBulkAsync(c,channel);
addReplyBulkAsync(c,msg);
}
/* Send a pubsub message of type "pmessage" to the client. The difference
@ -51,13 +51,13 @@ void addReplyPubsubMessage(client *c, robj *channel, robj *msg) {
* this message format also includes the pattern that matched the message. */
void addReplyPubsubPatMessage(client *c, robj *pat, robj *channel, robj *msg) {
if (c->resp == 2)
addReply(c,shared.mbulkhdr[4]);
addReplyAsync(c,shared.mbulkhdr[4]);
else
addReplyPushLen(c,4);
addReply(c,shared.pmessagebulk);
addReplyBulk(c,pat);
addReplyBulk(c,channel);
addReplyBulk(c,msg);
addReplyPushLenAsync(c,4);
addReplyAsync(c,shared.pmessagebulk);
addReplyBulkAsync(c,pat);
addReplyBulkAsync(c,channel);
addReplyBulkAsync(c,msg);
}
/* Send the pubsub subscription notification to the client. */
@ -293,7 +293,9 @@ int pubsubPublishMessage(robj *channel, robj *message) {
listRewind(list,&li);
while ((ln = listNext(&li)) != NULL) {
client *c = ln->value;
fastlock_lock(&c->lock);
addReplyPubsubMessage(c,channel,message);
fastlock_unlock(&c->lock);
receivers++;
}
}
@ -309,8 +311,10 @@ int pubsubPublishMessage(robj *channel, robj *message) {
(char*)ptrFromObj(channel),
sdslen(ptrFromObj(channel)),0))
{
fastlock_lock(&pat->pclient->lock);
addReplyPubsubPatMessage(pat->pclient,
pat->pattern,channel,message);
fastlock_unlock(&pat->pclient->lock);
receivers++;
}
}
@ -325,6 +329,7 @@ int pubsubPublishMessage(robj *channel, robj *message) {
void subscribeCommand(client *c) {
int j;
serverAssert(aeThreadOwnsLock());
for (j = 1; j < c->argc; j++)
pubsubSubscribeChannel(c,c->argv[j]);
@ -345,6 +350,7 @@ void unsubscribeCommand(client *c) {
void psubscribeCommand(client *c) {
int j;
serverAssert(aeThreadOwnsLock());
for (j = 1; j < c->argc; j++)
pubsubSubscribePattern(c,c->argv[j]);

View File

@ -67,7 +67,9 @@ typedef struct quicklistNode {
* When quicklistNode->zl is compressed, node->zl points to a quicklistLZF */
typedef struct quicklistLZF {
unsigned int sz; /* LZF size in bytes*/
char compressed[ZERO_LENGTH_ARRAY_LENGTH];
#ifndef __cplusplus
char compressed[];
#endif
} quicklistLZF;
/* quicklist is a 40 byte struct (on 64-bit systems) describing a quicklist.

View File

@ -30,9 +30,17 @@
#ifndef REDIS_RANDOM_H
#define REDIS_RANDOM_H
#ifdef __cplusplus
extern "C" {
#endif
int32_t redisLrand48();
void redisSrand48(int32_t seedval);
#ifdef __cplusplus
}
#endif
#define REDIS_LRAND48_MAX INT32_MAX
#endif

View File

@ -39,6 +39,10 @@
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/* Representation of a radix tree as implemented in this file, that contains
* the strings "foo", "foobar" and "footer" after the insertion of each
* word. When the node represents a key inside the radix tree, we write it
@ -133,7 +137,9 @@ typedef struct raxNode {
* children, an additional value pointer is present (as you can see
* in the representation above as "value-ptr" field).
*/
unsigned char data[ZERO_LENGTH_ARRAY_LENGTH];
#ifndef __cplusplus
unsigned char data[];
#endif
} raxNode;
typedef struct rax {
@ -219,4 +225,8 @@ void raxSetDebugMsg(int onoff);
* in a low level way, so this function is exported as well. */
void raxSetData(raxNode *n, void *data);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,7 +1,7 @@
extern "C" {
#include "rio.h"
#include "server.h"
}
#include "server.h"
#include <unistd.h>
#include <sys/wait.h>

View File

@ -1862,7 +1862,7 @@ void rdbLoadProgressCallback(rio *r, const void *buf, size_t len) {
if (server.masterhost && server.repl_state == REPL_STATE_TRANSFER)
replicationSendNewlineToMaster();
loadingProgress(r->processed_bytes);
processEventsWhileBlocked();
processEventsWhileBlocked(serverTL - server.rgthreadvar);
}
}
@ -2140,6 +2140,7 @@ void backgroundSaveDoneHandlerDisk(int exitcode, int bysignal) {
* This function covers the case of RDB -> Salves socket transfers for
* diskless replication. */
void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
serverAssert(aeThreadOwnsLock());
uint64_t *ok_slaves;
if (!bysignal && exitcode == 0) {
@ -2259,6 +2260,7 @@ void killRDBChild(void) {
/* Spawn an RDB child that writes the RDB to the sockets of the slaves
* that are currently in SLAVE_STATE_WAIT_BGSAVE_START state. */
int rdbSaveToSlavesSockets(rdbSaveInfo *rsi) {
serverAssert(aeThreadOwnsLock());
int *fds;
uint64_t *clientids;
int numfds;

View File

@ -39,6 +39,7 @@
#include <sys/time.h>
#include <signal.h>
#include <assert.h>
#include <math.h>
#include <sds.h> /* Use hiredis sds. */
#include "ae.h"
@ -49,6 +50,7 @@
#define UNUSED(V) ((void) V)
#define RANDPTR_INITIAL_SIZE 8
#define MAX_LATENCY_PRECISION 3
static struct config {
aeEventLoop *el;
@ -80,6 +82,7 @@ static struct config {
sds dbnumstr;
char *tests;
char *auth;
int precision;
} config;
typedef struct _client {
@ -429,8 +432,19 @@ static int compareLatency(const void *a, const void *b) {
return (*(long long*)a)-(*(long long*)b);
}
static int ipow(int base, int exp) {
int result = 1;
while (exp) {
if (exp & 1) result *= base;
exp /= 2;
base *= base;
}
return result;
}
static void showLatencyReport(void) {
int i, curlat = 0;
int usbetweenlat = ipow(10, MAX_LATENCY_PRECISION-config.precision);
float perc, reqpersec;
reqpersec = (float)config.requests_finished/((float)config.totlatency/1000);
@ -445,10 +459,21 @@ static void showLatencyReport(void) {
qsort(config.latency,config.requests,sizeof(long long),compareLatency);
for (i = 0; i < config.requests; i++) {
if (config.latency[i]/1000 != curlat || i == (config.requests-1)) {
curlat = config.latency[i]/1000;
if (config.latency[i]/usbetweenlat != curlat ||
i == (config.requests-1))
{
curlat = config.latency[i]/usbetweenlat;
perc = ((float)(i+1)*100)/config.requests;
printf("%.2f%% <= %d milliseconds\n", perc, curlat);
printf("%.2f%% <= %.*f milliseconds\n", perc, config.precision,
curlat/pow(10.0, config.precision));
/* After the 2 milliseconds latency to have percentages split
* by decimals will just add a lot of noise to the output. */
if (config.latency[i] > 2000) {
config.precision = 0;
usbetweenlat = ipow(10,
MAX_LATENCY_PRECISION-config.precision);
}
}
}
printf("%.2f requests per second\n\n", reqpersec);
@ -547,6 +572,11 @@ int parseOptions(int argc, const char **argv) {
if (lastarg) goto invalid;
config.dbnum = atoi(argv[++i]);
config.dbnumstr = sdsfromlonglong(config.dbnum);
} else if (!strcmp(argv[i],"--precision")) {
if (lastarg) goto invalid;
config.precision = atoi(argv[++i]);
if (config.precision < 0) config.precision = 0;
if (config.precision > MAX_LATENCY_PRECISION) config.precision = MAX_LATENCY_PRECISION;
} else if (!strcmp(argv[i],"--help")) {
exit_status = 0;
goto usage;
@ -586,6 +616,7 @@ usage:
" -e If server replies with errors, show them on stdout.\n"
" (no more than 1 error per second is displayed)\n"
" -q Quiet. Just show query/sec values\n"
" --precision Number of decimal places to display in latency output (default 0)\n"
" --csv Output in CSV format\n"
" -l Loop. Run the tests forever\n"
" -t <tests> Only run the comma separated list of tests. The test\n"
@ -682,6 +713,7 @@ int main(int argc, const char **argv) {
config.tests = NULL;
config.dbnum = 0;
config.auth = NULL;
config.precision = 1;
i = parseOptions(argc,argv);
argc -= i;

View File

@ -211,6 +211,8 @@ static struct config {
char *pattern;
char *rdb_filename;
int bigkeys;
int memkeys;
unsigned memkeys_samples;
int hotkeys;
int stdinarg; /* get last arg from stdin. (-x option) */
char *auth;
@ -1337,6 +1339,12 @@ static int parseOptions(int argc, char **argv) {
config.pipe_timeout = atoi(argv[++i]);
} else if (!strcmp(argv[i],"--bigkeys")) {
config.bigkeys = 1;
} else if (!strcmp(argv[i],"--memkeys")) {
config.memkeys = 1;
config.memkeys_samples = 0; /* use redis default */
} else if (!strcmp(argv[i],"--memkeys-samples")) {
config.memkeys = 1;
config.memkeys_samples = atoi(argv[++i]);
} else if (!strcmp(argv[i],"--hotkeys")) {
config.hotkeys = 1;
} else if (!strcmp(argv[i],"--eval") && !lastarg) {
@ -1535,7 +1543,10 @@ static void usage(void) {
" --pipe-timeout <n> In --pipe mode, abort with error if after sending all data.\n"
" no reply is received within <n> seconds.\n"
" Default timeout: %d. Use 0 to wait forever.\n"
" --bigkeys Sample Redis keys looking for big keys.\n"
" --bigkeys Sample Redis keys looking for keys with many elements (complexity).\n"
" --memkeys Sample Redis keys looking for keys consuming a lot of memory.\n"
" --memkeys-samples <n> Sample Redis keys looking for keys consuming a lot of memory.\n"
" And define number of key elements to sample\n"
" --hotkeys Sample Redis keys looking for hot keys.\n"
" only works when maxmemory-policy is *lfu.\n"
" --scan List all keys using the SCAN command.\n"
@ -4919,6 +4930,12 @@ static int clusterManagerCommandCreate(int argc, char **argv) {
cursor += slots_per_node;
}
/* Rotating the list sometimes helps to get better initial
* anti-affinity before the optimizer runs. */
clusterManagerNode *first_node = interleaved[0];
for (i = 0; i < (interleaved_len - 1); i++)
interleaved[i] = interleaved[i + 1];
interleaved[interleaved_len - 1] = first_node;
int assign_unused = 0, available_count = interleaved_len;
assign_replicas:
for (i = 0; i < masters_count; i++) {
@ -6142,9 +6159,31 @@ static void latencyDistMode(void) {
* Slave mode
*--------------------------------------------------------------------------- */
#define RDB_EOF_MARK_SIZE 40
void sendReplconf(const char* arg1, const char* arg2) {
printf("sending REPLCONF %s %s\n", arg1, arg2);
redisReply *reply = redisCommand(context, "REPLCONF %s %s", arg1, arg2);
/* Handle any error conditions */
if(reply == NULL) {
fprintf(stderr, "\nI/O error\n");
exit(1);
} else if(reply->type == REDIS_REPLY_ERROR) {
fprintf(stderr, "REPLCONF %s error: %s\n", arg1, reply->str);
/* non fatal, old versions may not support it */
}
freeReplyObject(reply);
}
void sendCapa() {
sendReplconf("capa", "eof");
}
/* Sends SYNC and reads the number of bytes in the payload. Used both by
* slaveMode() and getRDB(). */
unsigned long long sendSync(int fd) {
* slaveMode() and getRDB().
* returns 0 in case an EOF marker is used. */
unsigned long long sendSync(int fd, char *out_eof) {
/* To start we need to send the SYNC command and return the payload.
* The hiredis client lib does not understand this part of the protocol
* and we don't want to mess with its buffers, so everything is performed
@ -6174,17 +6213,33 @@ unsigned long long sendSync(int fd) {
printf("SYNC with master failed: %s\n", buf);
exit(1);
}
if (strncmp(buf+1,"EOF:",4) == 0 && strlen(buf+5) >= RDB_EOF_MARK_SIZE) {
memcpy(out_eof, buf+5, RDB_EOF_MARK_SIZE);
return 0;
}
return strtoull(buf+1,NULL,10);
}
static void slaveMode(void) {
int fd = context->fd;
unsigned long long payload = sendSync(fd);
static char eofmark[RDB_EOF_MARK_SIZE];
static char lastbytes[RDB_EOF_MARK_SIZE];
static int usemark = 0;
unsigned long long payload = sendSync(fd, eofmark);
char buf[1024];
int original_output = config.output;
fprintf(stderr,"SYNC with master, discarding %llu "
"bytes of bulk transfer...\n", payload);
if (payload == 0) {
payload = ULLONG_MAX;
memset(lastbytes,0,RDB_EOF_MARK_SIZE);
usemark = 1;
fprintf(stderr,"SYNC with master, discarding "
"bytes of bulk transfer until EOF marker...\n");
} else {
fprintf(stderr,"SYNC with master, discarding %llu "
"bytes of bulk transfer...\n", payload);
}
/* Discard the payload. */
while(payload) {
@ -6196,8 +6251,29 @@ static void slaveMode(void) {
exit(1);
}
payload -= nread;
if (usemark) {
/* Update the last bytes array, and check if it matches our delimiter.*/
if (nread >= RDB_EOF_MARK_SIZE) {
memcpy(lastbytes,buf+nread-RDB_EOF_MARK_SIZE,RDB_EOF_MARK_SIZE);
} else {
int rem = RDB_EOF_MARK_SIZE-nread;
memmove(lastbytes,lastbytes+nread,rem);
memcpy(lastbytes+rem,buf,nread);
}
if (memcmp(lastbytes,eofmark,RDB_EOF_MARK_SIZE) == 0)
break;
}
}
fprintf(stderr,"SYNC done. Logging commands from master.\n");
if (usemark) {
unsigned long long offset = ULLONG_MAX - payload;
fprintf(stderr,"SYNC done after %llu bytes. Logging commands from master.\n", offset);
/* put the slave online */
sleep(1);
sendReplconf("ACK", "0");
} else
fprintf(stderr,"SYNC done. Logging commands from master.\n");
/* Now we can use hiredis to read the incoming protocol. */
config.output = OUTPUT_CSV;
@ -6214,11 +6290,22 @@ static void slaveMode(void) {
static void getRDB(void) {
int s = context->fd;
int fd;
unsigned long long payload = sendSync(s);
static char eofmark[RDB_EOF_MARK_SIZE];
static char lastbytes[RDB_EOF_MARK_SIZE];
static int usemark = 0;
unsigned long long payload = sendSync(s, eofmark);
char buf[4096];
fprintf(stderr,"SYNC sent to master, writing %llu bytes to '%s'\n",
payload, config.rdb_filename);
if (payload == 0) {
payload = ULLONG_MAX;
memset(lastbytes,0,RDB_EOF_MARK_SIZE);
usemark = 1;
fprintf(stderr,"SYNC sent to master, writing bytes of bulk transfer until EOF marker to '%s'\n",
config.rdb_filename);
} else {
fprintf(stderr,"SYNC sent to master, writing %llu bytes to '%s'\n",
payload, config.rdb_filename);
}
/* Write to file. */
if (!strcmp(config.rdb_filename,"-")) {
@ -6247,11 +6334,31 @@ static void getRDB(void) {
exit(1);
}
payload -= nread;
if (usemark) {
/* Update the last bytes array, and check if it matches our delimiter.*/
if (nread >= RDB_EOF_MARK_SIZE) {
memcpy(lastbytes,buf+nread-RDB_EOF_MARK_SIZE,RDB_EOF_MARK_SIZE);
} else {
int rem = RDB_EOF_MARK_SIZE-nread;
memmove(lastbytes,lastbytes+nread,rem);
memcpy(lastbytes+rem,buf,nread);
}
if (memcmp(lastbytes,eofmark,RDB_EOF_MARK_SIZE) == 0)
break;
}
}
if (usemark) {
payload = ULLONG_MAX - payload - RDB_EOF_MARK_SIZE;
if (ftruncate(fd, payload) == -1)
fprintf(stderr,"ftruncate failed: %s.\n", strerror(errno));
fprintf(stderr,"Transfer finished with success after %llu bytes\n", payload);
} else {
fprintf(stderr,"Transfer finished with success.\n");
}
close(s); /* Close the file descriptor ASAP as fsync() may take time. */
fsync(fd);
close(fd);
fprintf(stderr,"Transfer finished with success.\n");
exit(0);
}
@ -6419,15 +6526,6 @@ static void pipeMode(void) {
* Find big keys
*--------------------------------------------------------------------------- */
#define TYPE_STRING 0
#define TYPE_LIST 1
#define TYPE_SET 2
#define TYPE_HASH 3
#define TYPE_ZSET 4
#define TYPE_STREAM 5
#define TYPE_NONE 6
#define TYPE_COUNT 7
static redisReply *sendScan(unsigned long long *it) {
redisReply *reply = redisCommand(context, "SCAN %llu", *it);
@ -6474,28 +6572,51 @@ static int getDbSize(void) {
return size;
}
static int toIntType(char *key, char *type) {
if(!strcmp(type, "string")) {
return TYPE_STRING;
} else if(!strcmp(type, "list")) {
return TYPE_LIST;
} else if(!strcmp(type, "set")) {
return TYPE_SET;
} else if(!strcmp(type, "hash")) {
return TYPE_HASH;
} else if(!strcmp(type, "zset")) {
return TYPE_ZSET;
} else if(!strcmp(type, "stream")) {
return TYPE_STREAM;
} else if(!strcmp(type, "none")) {
return TYPE_NONE;
} else {
fprintf(stderr, "Unknown type '%s' for key '%s'\n", type, key);
exit(1);
}
typedef struct {
char *name;
char *sizecmd;
char *sizeunit;
unsigned long long biggest;
unsigned long long count;
unsigned long long totalsize;
sds biggest_key;
} typeinfo;
typeinfo type_string = { "string", "STRLEN", "bytes" };
typeinfo type_list = { "list", "LLEN", "items" };
typeinfo type_set = { "set", "SCARD", "members" };
typeinfo type_hash = { "hash", "HLEN", "fields" };
typeinfo type_zset = { "zset", "ZCARD", "members" };
typeinfo type_stream = { "stream", "XLEN", "entries" };
typeinfo type_other = { "other", NULL, "?" };
static typeinfo* typeinfo_add(dict *types, char* name, typeinfo* type_template) {
typeinfo *info = zmalloc(sizeof(typeinfo), MALLOC_LOCAL);
*info = *type_template;
info->name = sdsnew(name);
dictAdd(types, info->name, info);
return info;
}
static void getKeyTypes(redisReply *keys, int *types) {
void type_free(void* priv_data, void* val) {
typeinfo *info = val;
UNUSED(priv_data);
if (info->biggest_key)
sdsfree(info->biggest_key);
sdsfree(info->name);
zfree(info);
}
static dictType typeinfoDictType = {
dictSdsHash, /* hash function */
NULL, /* key dup */
NULL, /* val dup */
dictSdsKeyCompare, /* key compare */
NULL, /* key destructor (owned by the value)*/
type_free /* val destructor */
};
static void getKeyTypes(dict *types_dict, redisReply *keys, typeinfo **types) {
redisReply *reply;
unsigned int i;
@ -6521,32 +6642,47 @@ static void getKeyTypes(redisReply *keys, int *types) {
exit(1);
}
types[i] = toIntType(keys->element[i]->str, reply->str);
sds typereply = sdsnew(reply->str);
dictEntry *de = dictFind(types_dict, typereply);
sdsfree(typereply);
typeinfo *type = NULL;
if (de)
type = dictGetVal(de);
else if (strcmp(reply->str, "none")) /* create new types for modules, (but not for deleted keys) */
type = typeinfo_add(types_dict, reply->str, &type_other);
types[i] = type;
freeReplyObject(reply);
}
}
static void getKeySizes(redisReply *keys, int *types,
unsigned long long *sizes)
static void getKeySizes(redisReply *keys, typeinfo **types,
unsigned long long *sizes, int memkeys,
unsigned memkeys_samples)
{
redisReply *reply;
char *sizecmds[] = {"STRLEN","LLEN","SCARD","HLEN","ZCARD"};
unsigned int i;
/* Pipeline size commands */
for(i=0;i<keys->elements;i++) {
/* Skip keys that were deleted */
if(types[i]==TYPE_NONE)
/* Skip keys that disappeared between SCAN and TYPE (or unknown types when not in memkeys mode) */
if(!types[i] || (!types[i]->sizecmd && !memkeys))
continue;
redisAppendCommand(context, "%s %s", sizecmds[types[i]],
keys->element[i]->str);
if (!memkeys)
redisAppendCommand(context, "%s %s",
types[i]->sizecmd, keys->element[i]->str);
else if (memkeys_samples==0)
redisAppendCommand(context, "%s %s %s",
"MEMORY", "USAGE", keys->element[i]->str);
else
redisAppendCommand(context, "%s %s %s SAMPLES %u",
"MEMORY", "USAGE", keys->element[i]->str, memkeys_samples);
}
/* Retrieve sizes */
for(i=0;i<keys->elements;i++) {
/* Skip keys that disappeared between SCAN and TYPE */
if(types[i] == TYPE_NONE) {
/* Skip keys that disappeared between SCAN and TYPE (or unknown types when not in memkeys mode) */
if(!types[i] || (!types[i]->sizecmd && !memkeys)) {
sizes[i] = 0;
continue;
}
@ -6561,7 +6697,8 @@ static void getKeySizes(redisReply *keys, int *types,
* added as a different type between TYPE and SIZE */
fprintf(stderr,
"Warning: %s on '%s' failed (may have changed type)\n",
sizecmds[types[i]], keys->element[i]->str);
!memkeys? types[i]->sizecmd: "MEMORY USAGE",
keys->element[i]->str);
sizes[i] = 0;
} else {
sizes[i] = reply->integer;
@ -6571,17 +6708,23 @@ static void getKeySizes(redisReply *keys, int *types,
}
}
static void findBigKeys(void) {
unsigned long long biggest[TYPE_COUNT] = {0}, counts[TYPE_COUNT] = {0}, totalsize[TYPE_COUNT] = {0};
static void findBigKeys(int memkeys, unsigned memkeys_samples) {
unsigned long long sampled = 0, total_keys, totlen=0, *sizes=NULL, it=0;
sds maxkeys[TYPE_COUNT] = {0};
char *typename[] = {"string","list","set","hash","zset","stream","none"};
char *typeunit[] = {"bytes","items","members","fields","members","entries",""};
redisReply *reply, *keys;
unsigned int arrsize=0, i;
int type, *types=NULL;
dictIterator *di;
dictEntry *de;
typeinfo **types = NULL;
double pct;
dict *types_dict = dictCreate(&typeinfoDictType, NULL);
typeinfo_add(types_dict, "string", &type_string);
typeinfo_add(types_dict, "list", &type_list);
typeinfo_add(types_dict, "set", &type_set);
typeinfo_add(types_dict, "hash", &type_hash);
typeinfo_add(types_dict, "zset", &type_zset);
typeinfo_add(types_dict, "stream", &type_stream);
/* Total keys pre scanning */
total_keys = getDbSize();
@ -6590,15 +6733,6 @@ static void findBigKeys(void) {
printf("# average sizes per key type. You can use -i 0.1 to sleep 0.1 sec\n");
printf("# per 100 SCAN commands (not usually needed).\n\n");
/* New up sds strings to keep track of overall biggest per type */
for(i=0;i<TYPE_NONE; i++) {
maxkeys[i] = sdsempty();
if(!maxkeys[i]) {
fprintf(stderr, "Failed to allocate memory for largest key names!\n");
exit(1);
}
}
/* SCAN loop */
do {
/* Calculate approximate percentage completion */
@ -6622,34 +6756,38 @@ static void findBigKeys(void) {
}
/* Retrieve types and then sizes */
getKeyTypes(keys, types);
getKeySizes(keys, types, sizes);
getKeyTypes(types_dict, keys, types);
getKeySizes(keys, types, sizes, memkeys, memkeys_samples);
/* Now update our stats */
for(i=0;i<keys->elements;i++) {
if((type = types[i]) == TYPE_NONE)
typeinfo *type = types[i];
/* Skip keys that disappeared between SCAN and TYPE */
if(!type)
continue;
totalsize[type] += sizes[i];
counts[type]++;
type->totalsize += sizes[i];
type->count++;
totlen += keys->element[i]->len;
sampled++;
if(biggest[type]<sizes[i]) {
if(type->biggest<sizes[i]) {
printf(
"[%05.2f%%] Biggest %-6s found so far '%s' with %llu %s\n",
pct, typename[type], keys->element[i]->str, sizes[i],
typeunit[type]);
pct, type->name, keys->element[i]->str, sizes[i],
!memkeys? type->sizeunit: "bytes");
/* Keep track of biggest key name for this type */
maxkeys[type] = sdscpy(maxkeys[type], keys->element[i]->str);
if(!maxkeys[type]) {
if (type->biggest_key)
sdsfree(type->biggest_key);
type->biggest_key = sdsnew(keys->element[i]->str);
if(!type->biggest_key) {
fprintf(stderr, "Failed to allocate memory for key!\n");
exit(1);
}
/* Keep track of the biggest size for this type */
biggest[type] = sizes[i];
type->biggest = sizes[i];
}
/* Update overall progress */
@ -6677,26 +6815,29 @@ static void findBigKeys(void) {
totlen, totlen ? (double)totlen/sampled : 0);
/* Output the biggest keys we found, for types we did find */
for(i=0;i<TYPE_NONE;i++) {
if(sdslen(maxkeys[i])>0) {
printf("Biggest %6s found '%s' has %llu %s\n", typename[i], maxkeys[i],
biggest[i], typeunit[i]);
di = dictGetIterator(types_dict);
while ((de = dictNext(di))) {
typeinfo *type = dictGetVal(de);
if(type->biggest_key) {
printf("Biggest %6s found '%s' has %llu %s\n", type->name, type->biggest_key,
type->biggest, !memkeys? type->sizeunit: "bytes");
}
}
dictReleaseIterator(di);
printf("\n");
for(i=0;i<TYPE_NONE;i++) {
di = dictGetIterator(types_dict);
while ((de = dictNext(di))) {
typeinfo *type = dictGetVal(de);
printf("%llu %ss with %llu %s (%05.2f%% of keys, avg size %.2f)\n",
counts[i], typename[i], totalsize[i], typeunit[i],
sampled ? 100 * (double)counts[i]/sampled : 0,
counts[i] ? (double)totalsize[i]/counts[i] : 0);
type->count, type->name, type->totalsize, !memkeys? type->sizeunit: "bytes",
sampled ? 100 * (double)type->count/sampled : 0,
type->count ? (double)type->totalsize/type->count : 0);
}
dictReleaseIterator(di);
/* Free sds strings containing max keys */
for(i=0;i<TYPE_NONE;i++) {
sdsfree(maxkeys[i]);
}
dictRelease(types_dict);
/* Success! */
exit(0);
@ -7271,12 +7412,14 @@ int main(int argc, char **argv) {
/* Slave mode */
if (config.slave_mode) {
if (cliConnect(0) == REDIS_ERR) exit(1);
sendCapa();
slaveMode();
}
/* Get RDB mode. */
if (config.getrdb_mode) {
if (cliConnect(0) == REDIS_ERR) exit(1);
sendCapa();
getRDB();
}
@ -7289,7 +7432,19 @@ int main(int argc, char **argv) {
/* Find big keys */
if (config.bigkeys) {
if (cliConnect(0) == REDIS_ERR) exit(1);
findBigKeys();
findBigKeys(0, 0);
}
/* Find large keys */
if (config.memkeys) {
if (cliConnect(0) == REDIS_ERR) exit(1);
findBigKeys(1, config.memkeys_samples);
}
/* Find hot keys */
if (config.hotkeys) {
if (cliConnect(0) == REDIS_ERR) exit(1);
findHotKeys();
}
/* Find hot keys */

View File

@ -1,6 +1,7 @@
/* Asynchronous replication implementation.
*
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
* Copyright (c) 2019 John Sully <john at eqalpha dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -36,6 +37,7 @@
#include <fcntl.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <mutex>
void replicationDiscardCachedMaster(void);
void replicationResurrectCachedMaster(int newfd);
@ -76,7 +78,7 @@ char *replicationGetSlaveName(client *c) {
void createReplicationBacklog(void) {
serverAssert(server.repl_backlog == NULL);
server.repl_backlog = zmalloc(server.repl_backlog_size, MALLOC_LOCAL);
server.repl_backlog = (char*)zmalloc(server.repl_backlog_size, MALLOC_LOCAL);
server.repl_backlog_histlen = 0;
server.repl_backlog_idx = 0;
@ -105,7 +107,7 @@ void resizeReplicationBacklog(long long newsize) {
* worse often we need to alloc additional space before freeing the
* old buffer. */
zfree(server.repl_backlog);
server.repl_backlog = zmalloc(server.repl_backlog_size, MALLOC_LOCAL);
server.repl_backlog = (char*)zmalloc(server.repl_backlog_size, MALLOC_LOCAL);
server.repl_backlog_histlen = 0;
server.repl_backlog_idx = 0;
/* Next byte we have is... the next since the buffer is empty. */
@ -114,6 +116,7 @@ void resizeReplicationBacklog(long long newsize) {
}
void freeReplicationBacklog(void) {
serverAssert(aeThreadOwnsLock());
serverAssert(listLength(server.slaves) == 0);
zfree(server.repl_backlog);
server.repl_backlog = NULL;
@ -124,7 +127,8 @@ void freeReplicationBacklog(void) {
* server.master_repl_offset, because there is no case where we want to feed
* the backlog without incrementing the offset. */
void feedReplicationBacklog(void *ptr, size_t len) {
unsigned char *p = ptr;
serverAssert(aeThreadOwnsLock());
unsigned char *p = (unsigned char*)ptr;
server.master_repl_offset += len;
@ -159,7 +163,7 @@ void feedReplicationBacklogWithObject(robj *o) {
len = ll2string(llstr,sizeof(llstr),(long)ptrFromObj(o));
p = llstr;
} else {
len = sdslen(ptrFromObj(o));
len = sdslen((sds)ptrFromObj(o));
p = ptrFromObj(o);
}
feedReplicationBacklog(p,len);
@ -175,6 +179,7 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
listIter li;
int j, len;
char llstr[LONG_STR_SIZE];
serverAssert(aeThreadOwnsLock());
/* If the instance is not a top level master, return ASAP: we'll just proxy
* the stream of data we receive from our master instead, in order to
@ -190,6 +195,12 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
/* We can't have slaves attached and no backlog. */
serverAssert(!(listLength(slaves) != 0 && server.repl_backlog == NULL));
/* Get the lock on all slaves */
listRewind(slaves,&li);
while((ln = listNext(&li))) {
((client*)ln->value)->lock.lock();
}
/* Send SELECT command to every slave if needed. */
if (server.slaveseldb != dictid) {
robj *selectcmd;
@ -213,9 +224,9 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
/* Send it to slaves. */
listRewind(slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
addReply(slave,selectcmd);
addReplyAsync(slave,selectcmd);
}
if (dictid < 0 || dictid >= PROTO_SHARED_SELECT_CMDS)
@ -253,7 +264,7 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
/* Write the command to every slave. */
listRewind(slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
/* Don't feed slaves that are still waiting for BGSAVE to start */
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
@ -263,12 +274,18 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
* or are already in sync with the master. */
/* Add the multi bulk length. */
addReplyArrayLen(slave,argc);
addReplyArrayLenAsync(slave,argc);
/* Finally any additional argument that was not stored inside the
* static buffer if any (from j to argc). */
for (j = 0; j < argc; j++)
addReplyBulk(slave,argv[j]);
addReplyBulkAsync(slave,argv[j]);
}
/* Release the lock on all slaves */
listRewind(slaves,&li);
while((ln = listNext(&li))) {
((client*)ln->value)->lock.unlock();
}
}
@ -292,12 +309,16 @@ void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t bufle
if (server.repl_backlog) feedReplicationBacklog(buf,buflen);
listRewind(slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
std::lock_guard<decltype(slave->lock)> ulock(slave->lock);
/* Don't feed slaves that are still waiting for BGSAVE to start */
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) continue;
addReplyProto(slave,buf,buflen);
addReplyProtoAsync(slave,buf,buflen);
}
if (listLength(slaves))
ProcessPendingAsyncWrites(); // flush them to their respective threads
}
void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc) {
@ -307,6 +328,7 @@ void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv,
sds cmdrepr = sdsnew("+");
robj *cmdobj;
struct timeval tv;
serverAssert(aeThreadOwnsLock());
gettimeofday(&tv,NULL);
cmdrepr = sdscatprintf(cmdrepr,"%ld.%06ld ",(long)tv.tv_sec,(long)tv.tv_usec);
@ -323,7 +345,7 @@ void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv,
cmdrepr = sdscatprintf(cmdrepr, "\"%ld\"", (long)ptrFromObj(argv[j]));
} else {
cmdrepr = sdscatrepr(cmdrepr,(char*)ptrFromObj(argv[j]),
sdslen(ptrFromObj(argv[j])));
sdslen((sds)ptrFromObj(argv[j])));
}
if (j != argc-1)
cmdrepr = sdscatlen(cmdrepr," ",1);
@ -333,8 +355,9 @@ void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv,
listRewind(monitors,&li);
while((ln = listNext(&li))) {
client *monitor = ln->value;
addReply(monitor,cmdobj);
client *monitor = (client*)ln->value;
std::lock_guard<decltype(monitor->lock)> lock(monitor->lock);
addReplyAsync(monitor,cmdobj);
}
decrRefCount(cmdobj);
}
@ -445,8 +468,9 @@ int replicationSetupSlaveForFullResync(client *slave, long long offset) {
* On success return C_OK, otherwise C_ERR is returned and we proceed
* with the usual full resync. */
int masterTryPartialResynchronization(client *c) {
serverAssert(aeThreadOwnsLock());
long long psync_offset, psync_len;
char *master_replid = ptrFromObj(c->argv[1]);
char *master_replid = (char*)ptrFromObj(c->argv[1]);
char buf[128];
int buflen;
@ -519,7 +543,10 @@ int masterTryPartialResynchronization(client *c) {
buflen = snprintf(buf,sizeof(buf),"+CONTINUE\r\n");
}
if (write(c->fd,buf,buflen) != buflen) {
freeClientAsync(c);
if (FCorrectThread(c))
freeClient(c);
else
freeClientAsync(c);
return C_OK;
}
psync_len = addReplyReplicationBacklog(c,psync_offset);
@ -561,6 +588,7 @@ need_full_resync:
*
* Returns C_OK on success or C_ERR otherwise. */
int startBgsaveForReplication(int mincapa) {
serverAssert(aeThreadOwnsLock());
int retval;
int socket_target = server.repl_diskless_sync && (mincapa & SLAVE_CAPA_EOF);
listIter li;
@ -590,7 +618,7 @@ int startBgsaveForReplication(int mincapa) {
serverLog(LL_WARNING,"BGSAVE for replication failed");
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
slave->flags &= ~CLIENT_SLAVE;
@ -608,7 +636,7 @@ int startBgsaveForReplication(int mincapa) {
if (!socket_target) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
replicationSetupSlaveForFullResync(slave,
@ -656,12 +684,12 @@ void syncCommand(client *c) {
*
* So the slave knows the new replid and offset to try a PSYNC later
* if the connection with the master is lost. */
if (!strcasecmp(ptrFromObj(c->argv[0]),"psync")) {
if (!strcasecmp((const char*)ptrFromObj(c->argv[0]),"psync")) {
if (masterTryPartialResynchronization(c) == C_OK) {
server.stat_sync_partial_ok++;
return; /* No full resync needed, return. */
} else {
char *master_replid = ptrFromObj(c->argv[1]);
char *master_replid = (char*)ptrFromObj(c->argv[1]);
/* Increment stats for failed PSYNCs, but only if the
* replid is not "?", as this is used by slaves to force a full
@ -711,9 +739,10 @@ void syncCommand(client *c) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
slave = ln->value;
slave = (client*)ln->value;
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) break;
}
/* To attach this slave, we check that it has at least all the
* capabilities of the slave that triggered the current BGSAVE. */
if (ln && ((c->slave_capa & slave->slave_capa) == slave->slave_capa)) {
@ -785,15 +814,15 @@ void replconfCommand(client *c) {
/* Process every option-value pair. */
for (j = 1; j < c->argc; j+=2) {
if (!strcasecmp(ptrFromObj(c->argv[j]),"listening-port")) {
if (!strcasecmp((const char*)ptrFromObj(c->argv[j]),"listening-port")) {
long port;
if ((getLongFromObjectOrReply(c,c->argv[j+1],
&port,NULL) != C_OK))
return;
c->slave_listening_port = port;
} else if (!strcasecmp(ptrFromObj(c->argv[j]),"ip-address")) {
sds ip = ptrFromObj(c->argv[j+1]);
} else if (!strcasecmp((const char*)ptrFromObj(c->argv[j]),"ip-address")) {
sds ip = (sds)ptrFromObj(c->argv[j+1]);
if (sdslen(ip) < sizeof(c->slave_ip)) {
memcpy(c->slave_ip,ip,sdslen(ip)+1);
} else {
@ -801,13 +830,13 @@ void replconfCommand(client *c) {
"replica instance is too long: %zd bytes", sdslen(ip));
return;
}
} else if (!strcasecmp(ptrFromObj(c->argv[j]),"capa")) {
} else if (!strcasecmp((const char*)ptrFromObj(c->argv[j]),"capa")) {
/* Ignore capabilities not understood by this master. */
if (!strcasecmp(ptrFromObj(c->argv[j+1]),"eof"))
if (!strcasecmp((const char*)ptrFromObj(c->argv[j+1]),"eof"))
c->slave_capa |= SLAVE_CAPA_EOF;
else if (!strcasecmp(ptrFromObj(c->argv[j+1]),"psync2"))
else if (!strcasecmp((const char*)ptrFromObj(c->argv[j+1]),"psync2"))
c->slave_capa |= SLAVE_CAPA_PSYNC2;
} else if (!strcasecmp(ptrFromObj(c->argv[j]),"ack")) {
} else if (!strcasecmp((const char*)ptrFromObj(c->argv[j]),"ack")) {
/* REPLCONF ACK is used by slave to inform the master the amount
* of replication stream that it processed so far. It is an
* internal only command that normal clients should never use. */
@ -826,7 +855,7 @@ void replconfCommand(client *c) {
putSlaveOnline(c);
/* Note: this command does not reply anything! */
return;
} else if (!strcasecmp(ptrFromObj(c->argv[j]),"getack")) {
} else if (!strcasecmp((const char*)ptrFromObj(c->argv[j]),"getack")) {
/* REPLCONF GETACK is used in order to request an ACK ASAP
* to the slave. */
if (server.masterhost && server.master) replicationSendAck();
@ -856,7 +885,8 @@ void putSlaveOnline(client *slave) {
slave->replstate = SLAVE_STATE_ONLINE;
slave->repl_put_online_on_ack = 0;
slave->repl_ack_time = server.unixtime; /* Prevent false timeout. */
if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE,
AssertCorrectThread(slave);
if (aeCreateFileEvent(server.rgthreadvar[slave->iel].el, slave->fd, AE_WRITABLE|AE_WRITE_THREADSAFE,
sendReplyToClient, slave) == AE_ERR) {
serverLog(LL_WARNING,"Unable to register writable event for replica bulk transfer: %s", strerror(errno));
freeClient(slave);
@ -868,9 +898,10 @@ void putSlaveOnline(client *slave) {
}
void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
client *slave = privdata;
client *slave = (client*)privdata;
UNUSED(el);
UNUSED(mask);
serverAssert(ielFromEventLoop(el) == slave->iel);
char buf[PROTO_IOBUF_LEN];
ssize_t nwritten, buflen;
@ -878,6 +909,7 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
* replication process. Currently the preamble is just the bulk count of
* the file in the form "$<length>\r\n". */
if (slave->replpreamble) {
serverAssert(slave->replpreamble[0] == '$');
nwritten = write(fd,slave->replpreamble,sdslen(slave->replpreamble));
if (nwritten == -1) {
serverLog(LL_VERBOSE,"Write error sending RDB preamble to replica: %s",
@ -918,7 +950,7 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
if (slave->repldboff == slave->repldbsize) {
close(slave->repldbfd);
slave->repldbfd = -1;
aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
aeDeleteFileEvent(el,slave->fd,AE_WRITABLE);
putSlaveOnline(slave);
}
}
@ -937,20 +969,22 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) {
* otherwise C_ERR is passed to the function.
* The 'type' argument is the type of the child that terminated
* (if it had a disk or socket target). */
void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
void updateSlavesWaitingBgsave(int bgsaveerr, int type)
{
listNode *ln;
listIter li;
int startbgsave = 0;
int mincapa = -1;
listIter li;
serverAssert(aeThreadOwnsLock());
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
startbgsave = 1;
mincapa = (mincapa == -1) ? slave->slave_capa :
(mincapa & slave->slave_capa);
(mincapa & slave->slave_capa);
} else if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
struct redis_stat buf;
@ -973,13 +1007,19 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
slave->repl_ack_time = server.unixtime; /* Timeout otherwise. */
} else {
if (bgsaveerr != C_OK) {
freeClient(slave);
if (FCorrectThread(slave))
freeClient(slave);
else
freeClientAsync(slave);
serverLog(LL_WARNING,"SYNC failed. BGSAVE child returned an error");
continue;
}
if ((slave->repldbfd = open(server.rdb_filename,O_RDONLY)) == -1 ||
redis_fstat(slave->repldbfd,&buf) == -1) {
freeClient(slave);
if (FCorrectThread(slave))
freeClient(slave);
else
freeClientAsync(slave);
serverLog(LL_WARNING,"SYNC failed. Can't open/stat DB after BGSAVE: %s", strerror(errno));
continue;
}
@ -989,15 +1029,28 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
slave->replpreamble = sdscatprintf(sdsempty(),"$%lld\r\n",
(unsigned long long) slave->repldbsize);
aeDeleteFileEvent(server.el,slave->fd,AE_WRITABLE);
if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
freeClient(slave);
continue;
if (FCorrectThread(slave))
{
aeDeleteFileEvent(server.rgthreadvar[slave->iel].el,slave->fd,AE_WRITABLE);
if (aeCreateFileEvent(server.rgthreadvar[slave->iel].el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
freeClient(slave);
}
}
else
{
aePostFunction(server.rgthreadvar[slave->iel].el, [slave]{
aeDeleteFileEvent(server.rgthreadvar[slave->iel].el,slave->fd,AE_WRITABLE);
if (aeCreateFileEvent(server.rgthreadvar[slave->iel].el, slave->fd, AE_WRITABLE, sendBulkToSlave, slave) == AE_ERR) {
freeClient(slave);
}
});
}
}
}
}
if (startbgsave) startBgsaveForReplication(mincapa);
if (startbgsave)
startBgsaveForReplication(mincapa);
}
/* Change the current instance replication ID with a new, random one.
@ -1075,7 +1128,7 @@ void replicationEmptyDbCallback(void *privdata) {
* performed, this function materializes the master client we store
* at server.master, starting from the specified file descriptor. */
void replicationCreateMasterClient(int fd, int dbid) {
server.master = createClient(fd);
server.master = createClient(fd, serverTL - server.rgthreadvar);
server.master->flags |= CLIENT_MASTER;
server.master->authenticated = 1;
server.master->reploff = server.master_initial_offset;
@ -1112,12 +1165,18 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
UNUSED(privdata);
UNUSED(mask);
serverAssert(aeThreadOwnsLock());
/* Static vars used to hold the EOF mark, and the last bytes received
* form the server: when they match, we reached the end of the transfer. */
static char eofmark[CONFIG_RUN_ID_SIZE];
static char lastbytes[CONFIG_RUN_ID_SIZE];
static int usemark = 0;
/* When a mark is used, we want to detect EOF asap in order to avoid
* writing the EOF mark into the file... */
int eof_reached = 0;
/* If repl_transfer_size == -1 we still have to read the bulk length
* from the master reply. */
if (server.repl_transfer_size == -1) {
@ -1190,10 +1249,6 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
}
server.stat_net_input_bytes += nread;
/* When a mark is used, we want to detect EOF asap in order to avoid
* writing the EOF mark into the file... */
int eof_reached = 0;
if (usemark) {
/* Update the last bytes array, and check if it matches our delimiter.*/
if (nread >= CONFIG_RUN_ID_SIZE) {
@ -1276,7 +1331,7 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) {
* handler, otherwise it will get called recursively since
* rdbLoad() will call the event loop to process events from time to
* time for non blocking loading. */
aeDeleteFileEvent(server.el,server.repl_transfer_s,AE_READABLE);
aeDeleteFileEvent(el,server.repl_transfer_s,AE_READABLE);
serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Loading DB in memory");
rdbSaveInfo rsi = RDB_SAVE_INFO_INIT;
if (rdbLoad(server.rdb_filename,&rsi) != C_OK) {
@ -1435,8 +1490,8 @@ char *sendSynchronousCommand(int flags, int fd, ...) {
#define PSYNC_FULLRESYNC 3
#define PSYNC_NOT_SUPPORTED 4
#define PSYNC_TRY_LATER 5
int slaveTryPartialResynchronization(int fd, int read_reply) {
char *psync_replid;
int slaveTryPartialResynchronization(aeEventLoop *el, int fd, int read_reply) {
const char *psync_replid;
char psync_offset[32];
sds reply;
@ -1464,7 +1519,7 @@ int slaveTryPartialResynchronization(int fd, int read_reply) {
if (reply != NULL) {
serverLog(LL_WARNING,"Unable to send PSYNC to master: %s",reply);
sdsfree(reply);
aeDeleteFileEvent(server.el,fd,AE_READABLE);
aeDeleteFileEvent(el,fd,AE_READABLE);
return PSYNC_WRITE_ERROR;
}
return PSYNC_WAIT_REPLY;
@ -1479,7 +1534,7 @@ int slaveTryPartialResynchronization(int fd, int read_reply) {
return PSYNC_WAIT_REPLY;
}
aeDeleteFileEvent(server.el,fd,AE_READABLE);
aeDeleteFileEvent(el,fd,AE_READABLE);
if (!strncmp(reply,"+FULLRESYNC",11)) {
char *replid = NULL, *offset = NULL;
@ -1528,13 +1583,13 @@ int slaveTryPartialResynchronization(int fd, int read_reply) {
char *end = reply+9;
while(end[0] != '\r' && end[0] != '\n' && end[0] != '\0') end++;
if (end-start == CONFIG_RUN_ID_SIZE) {
char new[CONFIG_RUN_ID_SIZE+1];
memcpy(new,start,CONFIG_RUN_ID_SIZE);
new[CONFIG_RUN_ID_SIZE] = '\0';
char sznew[CONFIG_RUN_ID_SIZE+1];
memcpy(sznew,start,CONFIG_RUN_ID_SIZE);
sznew[CONFIG_RUN_ID_SIZE] = '\0';
if (strcmp(new,server.cached_master->replid)) {
if (strcmp(sznew,server.cached_master->replid)) {
/* Master ID changed. */
serverLog(LL_WARNING,"Master replication ID changed to %s",new);
serverLog(LL_WARNING,"Master replication ID changed to %s",sznew);
/* Set the old ID as our ID2, up to the current offset+1. */
memcpy(server.replid2,server.cached_master->replid,
@ -1543,8 +1598,8 @@ int slaveTryPartialResynchronization(int fd, int read_reply) {
/* Update the cached master ID and our own primary ID to the
* new one. */
memcpy(server.replid,new,sizeof(server.replid));
memcpy(server.cached_master->replid,new,sizeof(server.replid));
memcpy(server.replid,sznew,sizeof(server.replid));
memcpy(server.cached_master->replid,sznew,sizeof(server.replid));
/* Disconnect all the sub-slaves: they need to be notified. */
disconnectSlaves();
@ -1596,6 +1651,7 @@ int slaveTryPartialResynchronization(int fd, int read_reply) {
/* This handler fires when the non blocking connect was able to
* establish a connection with the master. */
void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
serverAssert(aeThreadOwnsLock());
char tmpfile[256], *err = NULL;
int dfd = -1, maxtries = 5;
int sockerr = 0, psync_result;
@ -1626,7 +1682,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
serverLog(LL_NOTICE,"Non blocking connect for SYNC fired the event.");
/* Delete the writable event so that the readable event remains
* registered and we can wait for the PONG reply. */
aeDeleteFileEvent(server.el,fd,AE_WRITABLE);
aeDeleteFileEvent(el,fd,AE_WRITABLE);
server.repl_state = REPL_STATE_RECEIVE_PONG;
/* Send the PING, don't check for errors at all, we have the timeout
* that will take care about this. */
@ -1661,7 +1717,13 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
/* AUTH with the master if required. */
if (server.repl_state == REPL_STATE_SEND_AUTH) {
if (server.masterauth) {
if (server.masteruser && server.masterauth) {
err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"AUTH",
server.masteruser,server.masterauth,NULL);
if (err) goto write_error;
server.repl_state = REPL_STATE_RECEIVE_AUTH;
return;
} else if (server.masterauth) {
err = sendSynchronousCommand(SYNC_CMD_WRITE,fd,"AUTH",server.masterauth,NULL);
if (err) goto write_error;
server.repl_state = REPL_STATE_RECEIVE_AUTH;
@ -1775,7 +1837,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
* and the global offset, to try a partial resync at the next
* reconnection attempt. */
if (server.repl_state == REPL_STATE_SEND_PSYNC) {
if (slaveTryPartialResynchronization(fd,0) == PSYNC_WRITE_ERROR) {
if (slaveTryPartialResynchronization(el,fd,0) == PSYNC_WRITE_ERROR) {
err = sdsnew("Write error sending the PSYNC command.");
goto write_error;
}
@ -1791,7 +1853,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
goto error;
}
psync_result = slaveTryPartialResynchronization(fd,1);
psync_result = slaveTryPartialResynchronization(el,fd,1);
if (psync_result == PSYNC_WAIT_REPLY) return; /* Try again later... */
/* If the master is in an transient error, we should try to PSYNC
@ -1841,7 +1903,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
}
/* Setup the non blocking download of the bulk file. */
if (aeCreateFileEvent(server.el,fd, AE_READABLE,readSyncBulkPayload,NULL)
if (aeCreateFileEvent(el,fd, AE_READABLE,readSyncBulkPayload,NULL)
== AE_ERR)
{
serverLog(LL_WARNING,
@ -1860,7 +1922,7 @@ void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
return;
error:
aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
aeDeleteFileEvent(el,fd,AE_READABLE|AE_WRITABLE);
if (dfd != -1) close(dfd);
close(fd);
server.repl_transfer_s = -1;
@ -1884,7 +1946,7 @@ int connectWithMaster(void) {
return C_ERR;
}
if (aeCreateFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE,syncWithMaster,NULL) ==
if (aeCreateFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,fd,AE_READABLE|AE_WRITABLE,syncWithMaster,NULL) ==
AE_ERR)
{
close(fd);
@ -1905,7 +1967,7 @@ int connectWithMaster(void) {
void undoConnectWithMaster(void) {
int fd = server.repl_transfer_s;
aeDeleteFileEvent(server.el,fd,AE_READABLE|AE_WRITABLE);
aeDeleteFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,fd,AE_READABLE|AE_WRITABLE);
close(fd);
server.repl_transfer_s = -1;
}
@ -1952,7 +2014,10 @@ void replicationSetMaster(char *ip, int port) {
server.masterhost = sdsnew(ip);
server.masterport = port;
if (server.master) {
freeClient(server.master);
if (FCorrectThread(server.master))
freeClient(server.master);
else
freeClientAsync(server.master);
}
disconnectAllBlockedClients(); /* Clients blocked in master, now slave. */
@ -1976,7 +2041,12 @@ void replicationUnsetMaster(void) {
* used as secondary ID up to the current offset, and a new replication
* ID is created to continue with a new replication history. */
shiftReplicationId();
if (server.master) freeClient(server.master);
if (server.master) {
if (FCorrectThread(server.master))
freeClient(server.master);
else
freeClientAsync(server.master);
}
replicationDiscardCachedMaster();
cancelReplicationHandshake();
/* Disconnecting all the slaves is required: we need to inform slaves
@ -2020,8 +2090,8 @@ void replicaofCommand(client *c) {
/* The special host/port combination "NO" "ONE" turns the instance
* into a master. Otherwise the new master address is set. */
if (!strcasecmp(ptrFromObj(c->argv[1]),"no") &&
!strcasecmp(ptrFromObj(c->argv[2]),"one")) {
if (!strcasecmp((const char*)ptrFromObj(c->argv[1]),"no") &&
!strcasecmp((const char*)ptrFromObj(c->argv[2]),"one")) {
if (server.masterhost) {
replicationUnsetMaster();
sds client = catClientInfoString(sdsempty(),c);
@ -2036,21 +2106,21 @@ void replicaofCommand(client *c) {
return;
/* Check if we are already attached to the specified slave */
if (server.masterhost && !strcasecmp(server.masterhost,ptrFromObj(c->argv[1]))
if (server.masterhost && !strcasecmp(server.masterhost,(const char*)ptrFromObj(c->argv[1]))
&& server.masterport == port) {
serverLog(LL_NOTICE,"REPLICAOF would result into synchronization with the master we are already connected with. No operation performed.");
addReplySds(c,sdsnew("+OK Already connected to specified master\r\n"));
addReplySdsAsync(c,sdsnew("+OK Already connected to specified master\r\n"));
return;
}
/* There was no previous master or the user specified a different one,
* we can continue. */
replicationSetMaster(ptrFromObj(c->argv[1]), port);
replicationSetMaster((char*)ptrFromObj(c->argv[1]), port);
sds client = catClientInfoString(sdsempty(),c);
serverLog(LL_NOTICE,"REPLICAOF %s:%d enabled (user request from '%s')",
server.masterhost, server.masterport, client);
sdsfree(client);
}
addReply(c,shared.ok);
addReplyAsync(c,shared.ok);
}
/* ROLE command: provide information about the role of the instance
@ -2069,7 +2139,7 @@ void roleCommand(client *c) {
mbcount = addReplyDeferredLen(c);
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
char ip[NET_IP_STR_LEN], *slaveip = slave->slave_ip;
if (slaveip[0] == '\0') {
@ -2086,7 +2156,7 @@ void roleCommand(client *c) {
}
setDeferredArrayLen(c,mbcount,slaves);
} else {
char *slavestate = NULL;
const char *slavestate = NULL;
addReplyArrayLen(c,5);
addReplyBulkCBuffer(c,"slave",5);
@ -2112,7 +2182,8 @@ void roleCommand(client *c) {
/* Send a REPLCONF ACK command to the master to inform it about the current
* processed offset. If we are not connected with a master, the command has
* no effects. */
void replicationSendAck(void) {
void replicationSendAck(void)
{
client *c = server.master;
if (c != NULL) {
@ -2148,6 +2219,8 @@ void replicationSendAck(void) {
void replicationCacheMaster(client *c) {
serverAssert(server.master != NULL && server.cached_master == NULL);
serverLog(LL_NOTICE,"Caching the disconnected master state.");
AssertCorrectThread(c);
std::lock_guard<decltype(c->lock)> clientlock(c->lock);
/* Unlink the client from the server structures. */
unlinkClient(c);
@ -2162,6 +2235,7 @@ void replicationCacheMaster(client *c) {
if (c->flags & CLIENT_MULTI) discardTransaction(c);
listEmpty(c->reply);
c->sentlen = 0;
c->sentlenAsync = 0;
c->reply_bytes = 0;
c->bufpos = 0;
resetClient(c);
@ -2196,6 +2270,7 @@ void replicationCacheMasterUsingMyself(void) {
* the new master will start its replication stream with SELECT. */
server.master_initial_offset = server.master_repl_offset;
replicationCreateMasterClient(-1,-1);
std::lock_guard<decltype(server.master->lock)> lock(server.master->lock);
/* Use our own ID / offset. */
memcpy(server.master->replid, server.replid, sizeof(server.replid));
@ -2214,7 +2289,10 @@ void replicationDiscardCachedMaster(void) {
serverLog(LL_NOTICE,"Discarding previously cached master state.");
server.cached_master->flags &= ~CLIENT_MASTER;
freeClient(server.cached_master);
if (FCorrectThread(server.cached_master))
freeClient(server.cached_master);
else
freeClientAsync(server.cached_master);
server.cached_master = NULL;
}
@ -2234,9 +2312,13 @@ void replicationResurrectCachedMaster(int newfd) {
server.repl_state = REPL_STATE_CONNECTED;
server.repl_down_since = 0;
/* Normally changing the thread of a client is a BIG NONO,
but this client was unlinked so its OK here */
server.master->iel = serverTL - server.rgthreadvar; // martial to this thread
/* Re-add to the list of clients. */
linkClient(server.master);
if (aeCreateFileEvent(server.el, newfd, AE_READABLE,
if (aeCreateFileEvent(server.rgthreadvar[server.master->iel].el, newfd, AE_READABLE|AE_READ_THREADSAFE,
readQueryFromClient, server.master)) {
serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the readable handler: %s", strerror(errno));
freeClientAsync(server.master); /* Close ASAP. */
@ -2245,7 +2327,7 @@ void replicationResurrectCachedMaster(int newfd) {
/* We may also need to install the write handler as well if there is
* pending data in the write buffers. */
if (clientHasPendingReplies(server.master)) {
if (aeCreateFileEvent(server.el, newfd, AE_WRITABLE,
if (aeCreateFileEvent(server.rgthreadvar[server.master->iel].el, newfd, AE_WRITABLE|AE_WRITE_THREADSAFE,
sendReplyToClient, server.master)) {
serverLog(LL_WARNING,"Error resurrecting the cached master, impossible to add the writable handler: %s", strerror(errno));
freeClientAsync(server.master); /* Close ASAP. */
@ -2268,7 +2350,7 @@ void refreshGoodSlavesCount(void) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
time_t lag = server.unixtime - slave->repl_ack_time;
if (slave->replstate == SLAVE_STATE_ONLINE &&
@ -2342,7 +2424,7 @@ void replicationScriptCacheAdd(sds sha1) {
if (listLength(server.repl_scriptcache_fifo) == server.repl_scriptcache_size)
{
listNode *ln = listLast(server.repl_scriptcache_fifo);
sds oldest = listNodeValue(ln);
sds oldest = (sds)listNodeValue(ln);
retval = dictDelete(server.repl_scriptcache_dict,oldest);
serverAssert(retval == DICT_OK);
@ -2404,7 +2486,7 @@ int replicationCountAcksByOffset(long long offset) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
if (slave->replstate != SLAVE_STATE_ONLINE) continue;
if (slave->repl_ack_off >= offset) count++;
@ -2471,7 +2553,8 @@ void processClientsWaitingReplicas(void) {
listRewind(server.clients_waiting_acks,&li);
while((ln = listNext(&li))) {
client *c = ln->value;
client *c = (client*)ln->value;
fastlock_lock(&c->lock);
/* Every time we find a client that is satisfied for a given
* offset and number of replicas, we remember it so the next client
@ -2481,7 +2564,7 @@ void processClientsWaitingReplicas(void) {
last_numreplicas > c->bpop.numreplicas)
{
unblockClient(c);
addReplyLongLong(c,last_numreplicas);
addReplyLongLongAsync(c,last_numreplicas);
} else {
int numreplicas = replicationCountAcksByOffset(c->bpop.reploffset);
@ -2489,9 +2572,10 @@ void processClientsWaitingReplicas(void) {
last_offset = c->bpop.reploffset;
last_numreplicas = numreplicas;
unblockClient(c);
addReplyLongLong(c,numreplicas);
addReplyLongLongAsync(c,numreplicas);
}
}
fastlock_unlock(&c->lock);
}
}
@ -2519,7 +2603,11 @@ long long replicationGetSlaveOffset(void) {
/* Replication cron function, called 1 time per second. */
void replicationCron(void) {
serverAssert(aeThreadOwnsLock());
static long long replication_cron_loops = 0;
std::unique_lock<decltype(server.master->lock)> ulock;
if (server.master != nullptr)
ulock = decltype(ulock)(server.master->lock);
/* Non blocking connection timeout? */
if (server.masterhost &&
@ -2544,7 +2632,10 @@ void replicationCron(void) {
(time(NULL)-server.master->lastinteraction) > server.repl_timeout)
{
serverLog(LL_WARNING,"MASTER timeout: no data nor PING received...");
freeClient(server.master);
if (FCorrectThread(server.master))
freeClient(server.master);
else
freeClientAsync(server.master);
}
/* Check if we should connect to a MASTER */
@ -2597,7 +2688,7 @@ void replicationCron(void) {
* timeouts are set at a few seconds (example: PSYNC response). */
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
int is_presync =
(slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START ||
@ -2618,7 +2709,7 @@ void replicationCron(void) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
if (slave->replstate != SLAVE_STATE_ONLINE) continue;
if (slave->flags & CLIENT_PRE_PSYNC) continue;
@ -2626,7 +2717,10 @@ void replicationCron(void) {
{
serverLog(LL_WARNING, "Disconnecting timedout replica: %s",
replicationGetSlaveName(slave));
freeClient(slave);
if (FCorrectThread(slave))
freeClient(slave);
else
freeClientAsync(slave);
}
}
}
@ -2693,7 +2787,7 @@ void replicationCron(void) {
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
client *slave = (client*)ln->value;
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START) {
idle = server.unixtime - slave->lastinteraction;
if (idle > max_idle) max_idle = idle;

View File

@ -32,11 +32,14 @@
#include "rand.h"
#include "cluster.h"
extern "C" {
#include <lua.h>
#include <lauxlib.h>
#include <lualib.h>
}
#include <ctype.h>
#include <math.h>
#include <mutex>
char *redisProtocolToLuaType_Int(lua_State *lua, char *reply);
char *redisProtocolToLuaType_Bulk(lua_State *lua, char *reply);
@ -89,7 +92,7 @@ struct ldbState {
void sha1hex(char *digest, char *script, size_t len) {
SHA1_CTX ctx;
unsigned char hash[20];
char *cset = "0123456789abcdef";
const char *cset = "0123456789abcdef";
int j;
SHA1Init(&ctx);
@ -223,7 +226,7 @@ char *redisProtocolToLuaType_MultiBulk(lua_State *lua, char *reply, int atype) {
* with a single "err" field set to the error string. Note that this
* table is never a valid reply by proper commands, since the returned
* tables are otherwise always indexed by integers, never by strings. */
void luaPushError(lua_State *lua, char *error) {
void luaPushError(lua_State *lua, const char *error) {
lua_Debug dbg;
/* If debugging is active and in step mode, log errors resulting from
@ -365,9 +368,17 @@ void luaReplyToRedisReply(client *c, lua_State *lua) {
#define LUA_CMD_OBJCACHE_MAX_LEN 64
int luaRedisGenericCommand(lua_State *lua, int raise_error) {
int j, argc = lua_gettop(lua);
int acl_retval = 0;
int call_flags = CMD_CALL_SLOWLOG | CMD_CALL_STATS;
struct redisCommand *cmd;
client *c = server.lua_client;
sds reply;
// Ensure our client is on the right thread
serverAssert(!(c->flags & CLIENT_PENDING_WRITE));
serverAssert(!(c->flags & CLIENT_UNBLOCKED));
serverAssert(aeThreadOwnsLock());
c->iel = serverTL - server.rgthreadvar;
/* Cached across calls. */
static robj **argv = NULL;
@ -388,7 +399,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
* To make this function reentrant is futile and makes it slower, but
* we should at least detect such a misuse, and abort. */
if (inuse) {
char *recursion_warning =
const char *recursion_warning =
"luaRedisGenericCommand() recursive call detected. "
"Are you doing funny stuff with Lua debug hooks?";
serverLog(LL_WARNING,"%s",recursion_warning);
@ -396,6 +407,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
return 1;
}
inuse++;
std::unique_lock<decltype(c->lock)> ulock(c->lock);
/* Require at least one argument */
if (argc == 0) {
@ -407,7 +419,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
/* Build the arguments vector */
if (argv_size < argc) {
argv = zrealloc(argv,sizeof(robj*)*argc, MALLOC_LOCAL);
argv = (robj**)zrealloc(argv,sizeof(robj*)*argc, MALLOC_LOCAL);
argv_size = argc;
}
@ -432,7 +444,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
if (j < LUA_CMD_OBJCACHE_SIZE && cached_objects[j] &&
cached_objects_len[j] >= obj_len)
{
sds s = ptrFromObj(cached_objects[j]);
sds s = (sds)ptrFromObj(cached_objects[j]);
argv[j] = cached_objects[j];
cached_objects[j] = NULL;
memcpy(s,obj_s,obj_len+1);
@ -472,14 +484,14 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
break;
} else {
cmdlog = sdscatlen(cmdlog," ",1);
cmdlog = sdscatsds(cmdlog,ptrFromObj(c->argv[j]));
cmdlog = sdscatsds(cmdlog,(sds)ptrFromObj(c->argv[j]));
}
}
ldbLog(cmdlog);
}
/* Command lookup */
cmd = lookupCommand(ptrFromObj(argv[0]));
cmd = lookupCommand((sds)ptrFromObj(argv[0]));
if (!cmd || ((cmd->arity > 0 && cmd->arity != argc) ||
(argc < -cmd->arity)))
{
@ -499,7 +511,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
}
/* Check the ACLs. */
int acl_retval = ACLCheckCommandPerm(c);
acl_retval = ACLCheckCommandPerm(c);
if (acl_retval != ACL_OK) {
if (acl_retval == ACL_DENIED_CMD)
luaPushError(lua, "The user executing the script can't run this "
@ -524,11 +536,11 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
!server.loading &&
!(server.lua_caller->flags & CLIENT_MASTER))
{
luaPushError(lua, ptrFromObj(shared.roslaveerr));
luaPushError(lua, (char*)ptrFromObj(shared.roslaveerr));
goto cleanup;
} else if (deny_write_type != DISK_ERROR_TYPE_NONE) {
if (deny_write_type == DISK_ERROR_TYPE_RDB) {
luaPushError(lua, ptrFromObj(shared.bgsaveerr));
luaPushError(lua, (char*)ptrFromObj(shared.bgsaveerr));
} else {
sds aof_write_err = sdscatfmt(sdsempty(),
"-MISCONF Errors writing to the AOF file: %s\r\n",
@ -551,7 +563,7 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
(cmd->flags & CMD_DENYOOM))
{
if (getMaxmemoryState(NULL,NULL,NULL,NULL) != C_OK) {
luaPushError(lua, ptrFromObj(shared.oomerr));
luaPushError(lua, (char*)ptrFromObj(shared.oomerr));
goto cleanup;
}
}
@ -592,7 +604,6 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
}
/* Run the command */
int call_flags = CMD_CALL_SLOWLOG | CMD_CALL_STATS;
if (server.lua_replicate_commands) {
/* Set flags according to redis.set_repl() settings. */
if (server.lua_repl & PROPAGATE_AOF)
@ -616,9 +627,9 @@ int luaRedisGenericCommand(lua_State *lua, int raise_error) {
reply = sdsnewlen(c->buf,c->bufpos);
c->bufpos = 0;
while(listLength(c->reply)) {
clientReplyBlock *o = listNodeValue(listFirst(c->reply));
clientReplyBlock *o = (clientReplyBlock*)listNodeValue(listFirst(c->reply));
reply = sdscatlen(reply,o->buf,o->used);
reply = sdscatlen(reply,o->buf(),o->used);
listDelNode(c->reply,listFirst(c->reply));
}
}
@ -652,9 +663,9 @@ cleanup:
o->refcount == 1 &&
(o->encoding == OBJ_ENCODING_RAW ||
o->encoding == OBJ_ENCODING_EMBSTR) &&
sdslen(ptrFromObj(o)) <= LUA_CMD_OBJCACHE_MAX_LEN)
sdslen((sds)ptrFromObj(o)) <= LUA_CMD_OBJCACHE_MAX_LEN)
{
sds s = ptrFromObj(o);
sds s = (sds)ptrFromObj(o);
if (cached_objects[j]) decrRefCount(cached_objects[j]);
cached_objects[j] = o;
cached_objects_len[j] = sdsalloc(s);
@ -718,7 +729,7 @@ int luaRedisSha1hexCommand(lua_State *lua) {
* return redis.error_reply("ERR Some Error")
* return redis.status_reply("ERR Some Error")
*/
int luaRedisReturnSingleFieldTable(lua_State *lua, char *field) {
int luaRedisReturnSingleFieldTable(lua_State *lua, const char *field) {
if (lua_gettop(lua) != 1 || lua_type(lua,-1) != LUA_TSTRING) {
luaPushError(lua, "wrong number or type of arguments");
return 1;
@ -864,10 +875,12 @@ void luaLoadLib(lua_State *lua, const char *libname, lua_CFunction luafunc) {
lua_call(lua, 1, 0);
}
extern "C" {
LUALIB_API int (luaopen_cjson) (lua_State *L);
LUALIB_API int (luaopen_struct) (lua_State *L);
LUALIB_API int (luaopen_cmsgpack) (lua_State *L);
LUALIB_API int (luaopen_bit) (lua_State *L);
}
void luaLoadLibraries(lua_State *lua) {
luaLoadLib(lua, "", luaopen_base);
@ -901,7 +914,7 @@ void luaRemoveUnsupportedFunctions(lua_State *lua) {
* It should be the last to be called in the scripting engine initialization
* sequence, because it may interact with creation of globals. */
void scriptingEnableGlobalsProtection(lua_State *lua) {
char *s[32];
const char *s[32];
sds code = sdsempty();
int j = 0;
@ -1069,7 +1082,7 @@ void scriptingInit(int setup) {
/* Add a helper function that we use to sort the multi bulk output of non
* deterministic commands, when containing 'false' elements. */
{
char *compare_func = "function __redis__compare_helper(a,b)\n"
const char *compare_func = "function __redis__compare_helper(a,b)\n"
" if a == false then a = '' end\n"
" if b == false then b = '' end\n"
" return a<b\n"
@ -1083,7 +1096,7 @@ void scriptingInit(int setup) {
* information about the caller, that's what makes sense from the point
* of view of the user debugging a script. */
{
char *errh_func = "local dbg = debug\n"
const char *errh_func = "local dbg = debug\n"
"function __redis__err__handler(err)\n"
" local i = dbg.getinfo(2,'nSl')\n"
" if i and i.what == 'C' then\n"
@ -1104,7 +1117,7 @@ void scriptingInit(int setup) {
* Note: there is no need to create it again when this function is called
* by scriptingReset(). */
if (server.lua_client == NULL) {
server.lua_client = createClient(-1);
server.lua_client = createClient(-1, IDX_EVENT_LOOP_MAIN);
server.lua_client->flags |= CLIENT_LUA;
}
@ -1131,12 +1144,12 @@ void scriptingReset(void) {
/* Set an array of Redis String Objects as a Lua array (table) stored into a
* global variable. */
void luaSetGlobalArray(lua_State *lua, char *var, robj **elev, int elec) {
void luaSetGlobalArray(lua_State *lua, const char *var, robj **elev, int elec) {
int j;
lua_newtable(lua);
for (j = 0; j < elec; j++) {
lua_pushlstring(lua,(char*)ptrFromObj(elev[j]),sdslen(ptrFromObj(elev[j])));
lua_pushlstring(lua,(char*)ptrFromObj(elev[j]),sdslen((sds)ptrFromObj(elev[j])));
lua_rawseti(lua,-2,j+1);
}
lua_setglobal(lua,var);
@ -1212,19 +1225,19 @@ sds luaCreateFunction(client *c, lua_State *lua, robj *body) {
funcname[0] = 'f';
funcname[1] = '_';
sha1hex(funcname+2,ptrFromObj(body),sdslen(ptrFromObj(body)));
sha1hex(funcname+2,(char*)ptrFromObj(body),sdslen((sds)ptrFromObj(body)));
sds sha = sdsnewlen(funcname+2,40);
if ((de = dictFind(server.lua_scripts,sha)) != NULL) {
sdsfree(sha);
return dictGetKey(de);
return (sds)dictGetKey(de);
}
sds funcdef = sdsempty();
funcdef = sdscat(funcdef,"function ");
funcdef = sdscatlen(funcdef,funcname,42);
funcdef = sdscatlen(funcdef,"() ",3);
funcdef = sdscatlen(funcdef,ptrFromObj(body),sdslen(ptrFromObj(body)));
funcdef = sdscatlen(funcdef,ptrFromObj(body),sdslen((sds)ptrFromObj(body)));
funcdef = sdscatlen(funcdef,"\nend",4);
if (luaL_loadbuffer(lua,funcdef,sdslen(funcdef),"@user_script")) {
@ -1278,7 +1291,7 @@ void luaMaskCountHook(lua_State *lua, lua_Debug *ar) {
* here when the EVAL command will return. */
protectClient(server.lua_caller);
}
if (server.lua_timedout) processEventsWhileBlocked();
if (server.lua_timedout) processEventsWhileBlocked(serverTL - server.rgthreadvar);
if (server.lua_kill) {
serverLog(LL_WARNING,"Lua script killed by user with SCRIPT KILL.");
lua_pushstring(lua,"Script killed by user with SCRIPT KILL...");
@ -1328,11 +1341,11 @@ void evalGenericCommand(client *c, int evalsha) {
funcname[1] = '_';
if (!evalsha) {
/* Hash the code if this is an EVAL call */
sha1hex(funcname+2,ptrFromObj(c->argv[1]),sdslen(ptrFromObj(c->argv[1])));
sha1hex(funcname+2,(char*)ptrFromObj(c->argv[1]),sdslen((sds)ptrFromObj(c->argv[1])));
} else {
/* We already have the SHA if it is a EVALSHA */
int j;
char *sha = ptrFromObj(c->argv[1]);
char *sha = (char*)ptrFromObj(c->argv[1]);
/* Convert to lowercase. We don't use tolower since the function
* managed to always show up in the profiler output consuming
@ -1464,13 +1477,13 @@ void evalGenericCommand(client *c, int evalsha) {
* flush our cache of scripts that can be replicated as EVALSHA, while
* for AOF we need to do so every time we rewrite the AOF file. */
if (evalsha && !server.lua_replicate_commands) {
if (!replicationScriptCacheExists(ptrFromObj(c->argv[1]))) {
if (!replicationScriptCacheExists((sds)ptrFromObj(c->argv[1]))) {
/* This script is not in our script cache, replicate it as
* EVAL, then add it into the script cache, as from now on
* slaves and AOF know about it. */
robj *script = dictFetchValue(server.lua_scripts,ptrFromObj(c->argv[1]));
robj *script = (robj*)dictFetchValue(server.lua_scripts,ptrFromObj(c->argv[1]));
replicationScriptCacheAdd(ptrFromObj(c->argv[1]));
replicationScriptCacheAdd((sds)ptrFromObj(c->argv[1]));
serverAssertWithInfo(c,NULL,script != NULL);
/* If the script did not produce any changes in the dataset we want
@ -1500,7 +1513,7 @@ void evalCommand(client *c) {
}
void evalShaCommand(client *c) {
if (sdslen(ptrFromObj(c->argv[1])) != 40) {
if (sdslen((sds)ptrFromObj(c->argv[1])) != 40) {
/* We know that a match is not possible if the provided SHA is
* not the right length. So we return an error ASAP, this way
* evalGenericCommand() can be implemented without string length
@ -1517,7 +1530,7 @@ void evalShaCommand(client *c) {
}
void scriptCommand(client *c) {
if (c->argc == 2 && !strcasecmp(ptrFromObj(c->argv[1]),"help")) {
if (c->argc == 2 && !strcasecmp((const char*)ptrFromObj(c->argv[1]),"help")) {
const char *help[] = {
"DEBUG (yes|sync|no) -- Set the debug mode for subsequent scripts executed.",
"EXISTS <sha1> [<sha1> ...] -- Return information about the existence of the scripts in the script cache.",
@ -1527,12 +1540,12 @@ void scriptCommand(client *c) {
NULL
};
addReplyHelp(c, help);
} else if (c->argc == 2 && !strcasecmp(ptrFromObj(c->argv[1]),"flush")) {
} else if (c->argc == 2 && !strcasecmp((const char*)ptrFromObj(c->argv[1]),"flush")) {
scriptingReset();
addReply(c,shared.ok);
replicationScriptCacheFlush();
server.dirty++; /* Propagating this command is a good idea. */
} else if (c->argc >= 2 && !strcasecmp(ptrFromObj(c->argv[1]),"exists")) {
} else if (c->argc >= 2 && !strcasecmp((const char*)ptrFromObj(c->argv[1]),"exists")) {
int j;
addReplyArrayLen(c, c->argc-2);
@ -1542,12 +1555,12 @@ NULL
else
addReply(c,shared.czero);
}
} else if (c->argc == 3 && !strcasecmp(ptrFromObj(c->argv[1]),"load")) {
} else if (c->argc == 3 && !strcasecmp((const char*)ptrFromObj(c->argv[1]),"load")) {
sds sha = luaCreateFunction(c,server.lua,c->argv[2]);
if (sha == NULL) return; /* The error was sent by luaCreateFunction(). */
addReplyBulkCBuffer(c,sha,40);
forceCommandPropagation(c,PROPAGATE_REPL|PROPAGATE_AOF);
} else if (c->argc == 2 && !strcasecmp(ptrFromObj(c->argv[1]),"kill")) {
} else if (c->argc == 2 && !strcasecmp((const char*)ptrFromObj(c->argv[1]),"kill")) {
if (server.lua_caller == NULL) {
addReplySds(c,sdsnew("-NOTBUSY No scripts in execution right now.\r\n"));
} else if (server.lua_caller->flags & CLIENT_MASTER) {
@ -1558,18 +1571,18 @@ NULL
server.lua_kill = 1;
addReply(c,shared.ok);
}
} else if (c->argc == 3 && !strcasecmp(ptrFromObj(c->argv[1]),"debug")) {
} else if (c->argc == 3 && !strcasecmp((const char*)ptrFromObj(c->argv[1]),"debug")) {
if (clientHasPendingReplies(c)) {
addReplyError(c,"SCRIPT DEBUG must be called outside a pipeline");
return;
}
if (!strcasecmp(ptrFromObj(c->argv[2]),"no")) {
if (!strcasecmp((const char*)ptrFromObj(c->argv[2]),"no")) {
ldbDisable(c);
addReply(c,shared.ok);
} else if (!strcasecmp(ptrFromObj(c->argv[2]),"yes")) {
} else if (!strcasecmp((const char*)ptrFromObj(c->argv[2]),"yes")) {
ldbEnable(c);
addReply(c,shared.ok);
} else if (!strcasecmp(ptrFromObj(c->argv[2]),"sync")) {
} else if (!strcasecmp((const char*)ptrFromObj(c->argv[2]),"sync")) {
ldbEnable(c);
addReply(c,shared.ok);
c->flags |= CLIENT_LUA_DEBUG_SYNC;
@ -1660,8 +1673,8 @@ void ldbSendLogs(void) {
while(listLength(ldb.logs)) {
listNode *ln = listFirst(ldb.logs);
proto = sdscatlen(proto,"+",1);
sdsmapchars(ln->value,"\r\n"," ",2);
proto = sdscatsds(proto,ln->value);
sdsmapchars((sds)ln->value,"\r\n"," ",2);
proto = sdscatsds(proto,(sds)ln->value);
proto = sdscatlen(proto,"\r\n",2);
listDelNode(ldb.logs,ln);
}
@ -1724,7 +1737,7 @@ int ldbStartSession(client *c) {
/* First argument of EVAL is the script itself. We split it into different
* lines since this is the way the debugger accesses the source code. */
sds srcstring = sdsdup(ptrFromObj(c->argv[1]));
sds srcstring = sdsdup((sds)ptrFromObj(c->argv[1]));
size_t srclen = sdslen(srcstring);
while(srclen && (srcstring[srclen-1] == '\n' ||
srcstring[srclen-1] == '\r'))
@ -1814,7 +1827,7 @@ void evalGenericCommandWithDebugging(client *c, int evalsha) {
/* Return a pointer to ldb.src source code line, considering line to be
* one-based, and returning a special string for out of range lines. */
char *ldbGetSourceLine(int line) {
const char *ldbGetSourceLine(int line) {
int idx = line-1;
if (idx < 0 || idx >= ldb.lines) return "<out of range source code line>";
return ldb.src[idx];
@ -1862,6 +1875,7 @@ int ldbDelBreakpoint(int line) {
sds *ldbReplParseCommand(int *argcp) {
sds *argv = NULL;
int argc = 0;
char *plen = NULL;
if (sdslen(ldb.cbuf) == 0) return NULL;
/* Working on a copy is simpler in this case. We can modify it freely
@ -1875,14 +1889,14 @@ sds *ldbReplParseCommand(int *argcp) {
/* Seek and parse *<count>\r\n. */
p = strchr(p,'*'); if (!p) goto protoerr;
char *plen = p+1; /* Multi bulk len pointer. */
plen = p+1; /* Multi bulk len pointer. */
p = strstr(p,"\r\n"); if (!p) goto protoerr;
*p = '\0'; p += 2;
*argcp = atoi(plen);
if (*argcp <= 0 || *argcp > 1024) goto protoerr;
/* Parse each argument. */
argv = zmalloc(sizeof(sds)*(*argcp), MALLOC_LOCAL);
argv = (sds*)zmalloc(sizeof(sds)*(*argcp), MALLOC_LOCAL);
argc = 0;
while(argc < *argcp) {
if (*p != '$') goto protoerr;
@ -1907,8 +1921,8 @@ protoerr:
/* Log the specified line in the Lua debugger output. */
void ldbLogSourceLine(int lnum) {
char *line = ldbGetSourceLine(lnum);
char *prefix;
const char *line = ldbGetSourceLine(lnum);
const char *prefix;
int bp = ldbIsBreakpoint(lnum);
int current = ldb.currentline == lnum;
@ -2014,12 +2028,12 @@ sds ldbCatStackValueRec(sds s, lua_State *lua, int idx, int level) {
case LUA_TLIGHTUSERDATA:
{
const void *p = lua_topointer(lua,idx);
char *typename = "unknown";
if (t == LUA_TFUNCTION) typename = "function";
else if (t == LUA_TUSERDATA) typename = "userdata";
else if (t == LUA_TTHREAD) typename = "thread";
else if (t == LUA_TLIGHTUSERDATA) typename = "light-userdata";
s = sdscatprintf(s,"\"%s@%p\"",typename,p);
const char *tname = "unknown";
if (t == LUA_TFUNCTION) tname = "function";
else if (t == LUA_TUSERDATA) tname = "userdata";
else if (t == LUA_TTHREAD) tname = "thread";
else if (t == LUA_TLIGHTUSERDATA) tname = "light-userdata";
s = sdscatprintf(s,"\"%s@%p\"",tname,p);
}
break;
default:
@ -2038,7 +2052,7 @@ sds ldbCatStackValue(sds s, lua_State *lua, int idx) {
/* Produce a debugger log entry representing the value of the Lua object
* currently on the top of the stack. The element is ot popped nor modified.
* Check ldbCatStackValue() for the actual implementation. */
void ldbLogStackValue(lua_State *lua, char *prefix) {
void ldbLogStackValue(lua_State *lua, const char *prefix) {
sds s = sdsnew(prefix);
s = ldbCatStackValue(s,lua,-1);
ldbLogWithMaxLen(s);
@ -2460,7 +2474,7 @@ void luaLdbLineHook(lua_State *lua, lua_Debug *ar) {
}
if (ldb.step || bp) {
char *reason = "step over";
const char *reason = "step over";
if (bp) reason = ldb.luabp ? "redis.breakpoint() called" :
"break point";
else if (timeout) reason = "timeout reached, infinite loop?";

View File

@ -41,9 +41,7 @@ extern const char *SDS_NOINIT;
#include <stdint.h>
#ifdef __cplusplus
#define ZERO_LENGTH_ARRAY_LENGTH 1
#else
#define ZERO_LENGTH_ARRAY_LENGTH
extern "C" {
#endif
typedef char *sds;
@ -52,31 +50,41 @@ typedef char *sds;
* However is here to document the layout of type 5 SDS strings. */
struct __attribute__ ((__packed__)) sdshdr5 {
unsigned char flags; /* 3 lsb of type, and 5 msb of string length */
char buf[ZERO_LENGTH_ARRAY_LENGTH];
#ifndef __cplusplus
char buf[];
#endif
};
struct __attribute__ ((__packed__)) sdshdr8 {
uint8_t len; /* used */
uint8_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[ZERO_LENGTH_ARRAY_LENGTH];
#ifndef __cplusplus
char buf[];
#endif
};
struct __attribute__ ((__packed__)) sdshdr16 {
uint16_t len; /* used */
uint16_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[ZERO_LENGTH_ARRAY_LENGTH];
#ifndef __cplusplus
char buf[];
#endif
};
struct __attribute__ ((__packed__)) sdshdr32 {
uint32_t len; /* used */
uint32_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[ZERO_LENGTH_ARRAY_LENGTH];
#ifndef __cplusplus
char buf[];
#endif
};
struct __attribute__ ((__packed__)) sdshdr64 {
uint64_t len; /* used */
uint64_t alloc; /* excluding the header and null terminator */
unsigned char flags; /* 3 lsb of type, 5 unused bits */
char buf[ZERO_LENGTH_ARRAY_LENGTH];
#ifndef __cplusplus
char buf[];
#endif
};
#define SDS_TYPE_5 0
@ -284,4 +292,8 @@ void sds_free(void *ptr);
int sdsTest(int argc, char *argv[]);
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -2013,7 +2013,7 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
link->pending_commands = 0;
link->cc_conn_time = mstime();
link->cc->data = link;
redisAeAttach(server.el,link->cc);
redisAeAttach(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,link->cc);
redisAsyncSetConnectCallback(link->cc,
sentinelLinkEstablishedCallback);
redisAsyncSetDisconnectCallback(link->cc,
@ -2037,7 +2037,7 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
link->pc_conn_time = mstime();
link->pc->data = link;
redisAeAttach(server.el,link->pc);
redisAeAttach(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,link->pc);
redisAsyncSetConnectCallback(link->pc,
sentinelLinkEstablishedCallback);
redisAsyncSetDisconnectCallback(link->pc,
@ -3976,6 +3976,7 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
/* Setup the master state to start a failover. */
void sentinelStartFailover(sentinelRedisInstance *master) {
serverAssert(aeThreadOwnsLock());
serverAssert(master->flags & SRI_MASTER);
master->failover_state = SENTINEL_FAILOVER_STATE_WAIT_START;
@ -4168,6 +4169,7 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
}
void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
serverAssert(aeThreadOwnsLock());
sentinelRedisInstance *slave = sentinelSelectSlave(ri);
/* We don't handle the timeout in this state as the function aborts
@ -4292,6 +4294,7 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
dictIterator *di;
dictEntry *de;
int in_progress = 0;
serverAssert(aeThreadOwnsLock());
di = dictGetIterator(master->slaves);
while((de = dictNext(di)) != NULL) {

View File

@ -1,5 +1,6 @@
/*
* Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
* Copyright (c) 2019 John Sully <john at eqalpha dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -71,6 +72,7 @@ double R_Zero, R_PosInf, R_NegInf, R_Nan;
/* Global vars */
struct redisServer server; /* Server global state */
__thread struct redisServerThreadVars *serverTL = NULL; // thread local server vars
volatile unsigned long lru_clock; /* Server global current LRU time. */
/* Our command table.
@ -659,7 +661,7 @@ struct redisCommand redisCommandTable[] = {
0,NULL,0,0,0,0,0,0},
{"lastsave",lastsaveCommand,1,
"read-only random fast @admin",
"read-only random fast @admin @dangerous",
0,NULL,0,0,0,0,0,0},
{"type",typeCommand,2,
@ -1525,6 +1527,7 @@ int clientsCronHandleTimeout(client *c, mstime_t now_ms) {
*
* The function always returns 0 as it never terminates the client. */
int clientsCronResizeQueryBuffer(client *c) {
AssertCorrectThread(c);
size_t querybuf_size = sdsAllocSize(c->querybuf);
time_t idletime = server.unixtime - c->lastinteraction;
@ -1635,7 +1638,7 @@ void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) {
* of clients per second, turning this function into a source of latency.
*/
#define CLIENTS_CRON_MIN_ITERATIONS 5
void clientsCron(void) {
void clientsCron(int iel) {
/* Try to process at least numclients/server.hz of clients
* per call. Since normally (if there are no big latency events) this
* function is called server.hz times per second, in the average case we
@ -1661,12 +1664,18 @@ void clientsCron(void) {
listRotate(server.clients);
head = listFirst(server.clients);
c = listNodeValue(head);
/* The following functions do different service checks on the client.
* The protocol is that they return non-zero if the client was
* terminated. */
if (clientsCronHandleTimeout(c,now)) continue;
if (clientsCronResizeQueryBuffer(c)) continue;
if (clientsCronTrackExpansiveClients(c)) continue;
if (c->iel == iel)
{
fastlock_lock(&c->lock);
/* The following functions do different service checks on the client.
* The protocol is that they return non-zero if the client was
* terminated. */
if (clientsCronHandleTimeout(c,now)) goto LContinue;
if (clientsCronResizeQueryBuffer(c)) goto LContinue;
if (clientsCronTrackExpansiveClients(c)) goto LContinue;
LContinue:
fastlock_unlock(&c->lock);
}
}
}
@ -1768,6 +1777,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
UNUSED(id);
UNUSED(clientData);
ProcessPendingAsyncWrites(); // This is really a bug, but for now catch any laggards that didn't clean up
/* Software watchdog: deliver the SIGALRM that will reach the signal
* handler if we don't return here fast enough. */
if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
@ -1879,7 +1890,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
}
/* We need to do a few operations on clients asynchronously. */
clientsCron();
clientsCron(IDX_EVENT_LOOP_MAIN);
/* Handle background operations on Redis databases. */
databasesCron();
@ -1984,7 +1995,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
}
/* Close clients that need to be closed asynchronous */
freeClientsInAsyncFreeQueue();
freeClientsInAsyncFreeQueue(IDX_EVENT_LOOP_MAIN);
/* Clear the paused clients flag if needed. */
clientsArePaused(); /* Don't check return value, just use the side effect.*/
@ -2028,6 +2039,25 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
return 1000/server.hz;
}
// serverCron for worker threads other than the main thread
int serverCronLite(struct aeEventLoop *eventLoop, long long id, void *clientData)
{
UNUSED(id);
UNUSED(clientData);
int iel = ielFromEventLoop(eventLoop);
serverAssert(iel != IDX_EVENT_LOOP_MAIN);
aeAcquireLock();
ProcessPendingAsyncWrites(); // A bug but leave for now, events should clean up after themselves
clientsCron(iel);
freeClientsInAsyncFreeQueue(iel);
aeReleaseLock();
return 1000/server.hz;
}
/* This function gets called every time Redis is entering the
* main loop of the event driven library, that is, before to sleep
* for ready file descriptors. */
@ -2070,14 +2100,16 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
moduleHandleBlockedClients();
/* Try to process pending commands for clients that were just unblocked. */
if (listLength(server.unblocked_clients))
processUnblockedClients();
if (listLength(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].unblocked_clients))
{
processUnblockedClients(IDX_EVENT_LOOP_MAIN);
}
/* Write the AOF buffer on disk */
flushAppendOnlyFile(0);
/* Handle writes with pending output buffers. */
handleClientsWithPendingWrites();
handleClientsWithPendingWrites(IDX_EVENT_LOOP_MAIN);
/* Before we are going to sleep, let the threads access the dataset by
* releasing the GIL. Redis main thread will not touch anything at this
@ -2085,6 +2117,21 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
if (moduleCount()) moduleReleaseGIL();
}
void beforeSleepLite(struct aeEventLoop *eventLoop)
{
int iel = ielFromEventLoop(eventLoop);
/* Try to process pending commands for clients that were just unblocked. */
aeAcquireLock();
if (listLength(server.rgthreadvar[iel].unblocked_clients)) {
processUnblockedClients(iel);
}
/* Handle writes with pending output buffers. */
handleClientsWithPendingWrites(iel);
aeReleaseLock();
}
/* This function is called immadiately after the event loop multiplexing
* API returned, and the control is going to soon return to Redis by invoking
* the different events callbacks. */
@ -2221,7 +2268,6 @@ void initServerConfig(void) {
server.bindaddr_count = 0;
server.unixsocket = NULL;
server.unixsocketperm = CONFIG_DEFAULT_UNIX_SOCKET_PERM;
server.ipfd_count = 0;
server.sofd = -1;
server.protected_mode = CONFIG_DEFAULT_PROTECTED_MODE;
server.dbnum = CONFIG_DEFAULT_DBNUM;
@ -2409,6 +2455,9 @@ void initServerConfig(void) {
* script to the slave / AOF. This is the new way starting from
* Redis 5. However it is possible to revert it via redis.conf. */
server.lua_always_replicate_commands = 1;
/* Multithreading */
server.cthreads = CONFIG_DEFAULT_THREADS;
}
extern char **environ;
@ -2595,7 +2644,7 @@ void checkTcpBacklogSettings(void) {
* impossible to bind, or no bind addresses were specified in the server
* configuration but the function is not able to bind * for at least
* one of the IPv4 or IPv6 protocols. */
int listenToPort(int port, int *fds, int *count) {
int listenToPort(int port, int *fds, int *count, int fReusePort) {
int j;
/* Force binding of 0.0.0.0 if no bind address is specified, always
@ -2607,7 +2656,7 @@ int listenToPort(int port, int *fds, int *count) {
/* Bind * for both IPv6 and IPv4, we enter here only if
* server.bindaddr_count == 0. */
fds[*count] = anetTcp6Server(server.neterr,port,NULL,
server.tcp_backlog);
server.tcp_backlog, fReusePort);
if (fds[*count] != ANET_ERR) {
anetNonBlock(NULL,fds[*count]);
(*count)++;
@ -2619,7 +2668,7 @@ int listenToPort(int port, int *fds, int *count) {
if (*count == 1 || unsupported) {
/* Bind the IPv4 address as well. */
fds[*count] = anetTcpServer(server.neterr,port,NULL,
server.tcp_backlog);
server.tcp_backlog, fReusePort);
if (fds[*count] != ANET_ERR) {
anetNonBlock(NULL,fds[*count]);
(*count)++;
@ -2635,11 +2684,11 @@ int listenToPort(int port, int *fds, int *count) {
} else if (strchr(server.bindaddr[j],':')) {
/* Bind IPv6 address. */
fds[*count] = anetTcp6Server(server.neterr,port,server.bindaddr[j],
server.tcp_backlog);
server.tcp_backlog, fReusePort);
} else {
/* Bind IPv4 address. */
fds[*count] = anetTcpServer(server.neterr,port,server.bindaddr[j],
server.tcp_backlog);
server.tcp_backlog, fReusePort);
}
if (fds[*count] == ANET_ERR) {
serverLog(LL_WARNING,
@ -2695,50 +2744,38 @@ void resetServerStats(void) {
server.aof_delayed_fsync = 0;
}
void initServer(void) {
int j;
signal(SIGHUP, SIG_IGN);
signal(SIGPIPE, SIG_IGN);
setupSignalHandlers();
if (server.syslog_enabled) {
openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
server.syslog_facility);
}
server.hz = server.config_hz;
server.pid = getpid();
server.current_client = NULL;
server.clients = listCreate();
server.clients_index = raxNew();
server.clients_to_close = listCreate();
server.slaves = listCreate();
server.monitors = listCreate();
server.clients_pending_write = listCreate();
server.slaveseldb = -1; /* Force to emit the first SELECT command. */
server.unblocked_clients = listCreate();
server.ready_keys = listCreate();
server.clients_waiting_acks = listCreate();
server.get_ack_from_slaves = 0;
server.clients_paused = 0;
server.system_memory_size = zmalloc_get_memory_size();
createSharedObjects();
adjustOpenFilesLimit();
server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
if (server.el == NULL) {
serverLog(LL_WARNING,
"Failed creating the event loop. Error message: '%s'",
strerror(errno));
exit(1);
}
server.db = zmalloc(sizeof(redisDb)*server.dbnum, MALLOC_LOCAL);
static void initNetworkingThread(int iel, int fReusePort)
{
/* Open the TCP listening socket for the user commands. */
if (server.port != 0 &&
listenToPort(server.port,server.ipfd,&server.ipfd_count) == C_ERR)
exit(1);
if (fReusePort || (iel == IDX_EVENT_LOOP_MAIN))
{
if (server.port != 0 &&
listenToPort(server.port,server.rgthreadvar[iel].ipfd,&server.rgthreadvar[iel].ipfd_count, fReusePort) == C_ERR)
exit(1);
}
else
{
// We use the main threads file descriptors
memcpy(server.rgthreadvar[iel].ipfd, server.rgthreadvar[IDX_EVENT_LOOP_MAIN].ipfd, sizeof(int)*CONFIG_BINDADDR_MAX);
server.rgthreadvar[iel].ipfd_count = server.rgthreadvar[IDX_EVENT_LOOP_MAIN].ipfd_count;
}
/* Create an event handler for accepting new connections in TCP */
for (int j = 0; j < server.rgthreadvar[iel].ipfd_count; j++) {
if (aeCreateFileEvent(server.rgthreadvar[iel].el, server.rgthreadvar[iel].ipfd[j], AE_READABLE|AE_READ_THREADSAFE,
acceptTcpHandler,NULL) == AE_ERR)
{
serverPanic(
"Unrecoverable error creating server.ipfd file event.");
}
}
}
static void initNetworking(int fReusePort)
{
int celListen = (fReusePort) ? server.cthreads : 1;
for (int iel = 0; iel < celListen; ++iel)
initNetworkingThread(iel, fReusePort);
/* Open the listening Unix domain socket. */
if (server.unixsocket != NULL) {
@ -2753,13 +2790,72 @@ void initServer(void) {
}
/* Abort if there are no listening sockets at all. */
if (server.ipfd_count == 0 && server.sofd < 0) {
if (server.rgthreadvar[IDX_EVENT_LOOP_MAIN].ipfd_count == 0 && server.sofd < 0) {
serverLog(LL_WARNING, "Configured to not listen anywhere, exiting.");
exit(1);
}
if (server.sofd > 0 && aeCreateFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el,server.sofd,AE_READABLE|AE_READ_THREADSAFE,
acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating server.sofd file event.");
}
static void initServerThread(struct redisServerThreadVars *pvar, int fMain)
{
pvar->clients_pending_write = listCreate();
pvar->unblocked_clients = listCreate();
pvar->clients_pending_asyncwrite = listCreate();
pvar->ipfd_count = 0;
pvar->el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
if (pvar->el == NULL) {
serverLog(LL_WARNING,
"Failed creating the event loop. Error message: '%s'",
strerror(errno));
exit(1);
}
if (!fMain)
{
if (aeCreateTimeEvent(pvar->el, 1, serverCronLite, NULL, NULL) == AE_ERR) {
serverPanic("Can't create event loop timers.");
exit(1);
}
}
}
void initServer(void) {
signal(SIGHUP, SIG_IGN);
signal(SIGPIPE, SIG_IGN);
setupSignalHandlers();
fastlock_init(&server.flock);
if (server.syslog_enabled) {
openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
server.syslog_facility);
}
server.hz = server.config_hz;
server.pid = getpid();
server.current_client = NULL;
server.clients = listCreate();
server.clients_index = raxNew();
server.clients_to_close = listCreate();
server.slaves = listCreate();
server.monitors = listCreate();
server.slaveseldb = -1; /* Force to emit the first SELECT command. */
server.ready_keys = listCreate();
server.clients_waiting_acks = listCreate();
server.get_ack_from_slaves = 0;
server.clients_paused = 0;
server.system_memory_size = zmalloc_get_memory_size();
createSharedObjects();
adjustOpenFilesLimit();
server.db = zmalloc(sizeof(redisDb)*server.dbnum, MALLOC_LOCAL);
/* Create the Redis databases, and initialize other internal state. */
for (j = 0; j < server.dbnum; j++) {
for (int j = 0; j < server.dbnum; j++) {
server.db[j].pdict = dictCreate(&dbDictType,NULL);
server.db[j].expires = dictCreate(&keyptrDictType,NULL);
server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
@ -2808,28 +2904,24 @@ void initServer(void) {
/* Create the timer callback, this is our way to process many background
* operations incrementally, like clients timeout, eviction of unaccessed
* expired keys and so forth. */
if (aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
if (aeCreateTimeEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el, 1, serverCron, NULL, NULL) == AE_ERR) {
serverPanic("Can't create event loop timers.");
exit(1);
}
/* Create an event handler for accepting new connections in TCP and Unix
* domain sockets. */
for (j = 0; j < server.ipfd_count; j++) {
if (aeCreateFileEvent(server.el, server.ipfd[j], AE_READABLE,
acceptTcpHandler,NULL) == AE_ERR)
{
serverPanic(
"Unrecoverable error creating server.ipfd file event.");
}
/* Register a readable event for the pipe used to awake the event loop
* when a blocked client in a module needs attention. */
if (aeCreateFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el, server.module_blocked_pipe[0], AE_READABLE,
moduleBlockedClientPipeReadable,NULL) == AE_ERR) {
serverPanic(
"Error registering the readable event for the module "
"blocked clients subsystem.");
}
if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating server.sofd file event.");
/* Register a readable event for the pipe used to awake the event loop
* when a blocked client in a module needs attention. */
if (aeCreateFileEvent(server.el, server.module_blocked_pipe[0], AE_READABLE,
if (aeCreateFileEvent(server.rgthreadvar[IDX_EVENT_LOOP_MAIN].el, server.module_blocked_pipe[0], AE_READABLE,
moduleBlockedClientPipeReadable,NULL) == AE_ERR) {
serverPanic(
"Error registering the readable event for the module "
@ -2917,10 +3009,10 @@ int populateCommandTableParseFlags(struct redisCommand *c, char *strflags) {
return C_ERR;
}
}
/* If it's not @fast is @slow in this binary world. */
if (!(c->flags & CMD_CATEGORY_FAST)) c->flags |= CMD_CATEGORY_SLOW;
}
/* If it's not @fast is @slow in this binary world. */
if (!(c->flags & CMD_CATEGORY_FAST)) c->flags |= CMD_CATEGORY_SLOW;
sdsfreesplitres(argv,argc);
return C_OK;
}
@ -3044,6 +3136,7 @@ struct redisCommand *lookupCommandOrOriginal(sds name) {
void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
int flags)
{
serverAssert(aeThreadOwnsLock());
if (server.aof_state != AOF_OFF && flags & PROPAGATE_AOF)
feedAppendOnlyFile(cmd,dbid,argv,argc);
if (flags & PROPAGATE_REPL)
@ -3144,6 +3237,7 @@ void call(client *c, int flags) {
long long dirty, start, duration;
int client_old_flags = c->flags;
struct redisCommand *real_cmd = c->cmd;
serverAssert(aeThreadOwnsLock());
/* Sent the command to clients in MONITOR mode, only if the commands are
* not generated from reading an AOF. */
@ -3257,6 +3351,9 @@ void call(client *c, int flags) {
}
redisOpArrayFree(&server.also_propagate);
}
ProcessPendingAsyncWrites();
server.also_propagate = prev_also_propagate;
server.stat_numcommands++;
}
@ -3270,6 +3367,7 @@ void call(client *c, int flags) {
* other operations can be performed by the caller. Otherwise
* if C_ERR is returned the client was destroyed (i.e. after QUIT). */
int processCommand(client *c) {
serverAssert(aeThreadOwnsLock());
/* The QUIT command is handled separately. Normal command procs will
* go through checking for replication and QUIT will cause trouble
* when FORCE_REPLICATION is enabled and would be implemented in
@ -3280,6 +3378,9 @@ int processCommand(client *c) {
return C_ERR;
}
AssertCorrectThread(c);
serverAssert(aeThreadOwnsLock());
/* Now lookup the command and check ASAP about trivial error conditions
* such as wrong arity, bad command name and so forth. */
c->cmd = c->lastcmd = lookupCommand(ptrFromObj(c->argv[0]));
@ -3301,14 +3402,17 @@ int processCommand(client *c) {
return C_OK;
}
/* Check if the user is authenticated */
if (!(DefaultUser->flags & USER_FLAG_NOPASS) &&
!c->authenticated &&
(c->cmd->proc != authCommand || c->cmd->proc == helloCommand))
{
flagTransaction(c);
addReply(c,shared.noautherr);
return C_OK;
/* Check if the user is authenticated. This check is skipped in case
* the default user is flagged as "nopass" and is active. */
int auth_required = !(DefaultUser->flags & USER_FLAG_NOPASS) &&
!c->authenticated;
if (auth_required || DefaultUser->flags & USER_FLAG_DISABLED) {
/* AUTH and HELLO are valid even in non authenticated state. */
if (c->cmd->proc != authCommand || c->cmd->proc == helloCommand) {
flagTransaction(c);
addReply(c,shared.noautherr);
return C_OK;
}
}
/* Check if the user can run this command according to the current
@ -3490,7 +3594,11 @@ int processCommand(client *c) {
void closeListeningSockets(int unlink_unix_socket) {
int j;
for (j = 0; j < server.ipfd_count; j++) close(server.ipfd[j]);
for (int iel = 0; iel < server.cthreads; ++iel)
{
for (j = 0; j < server.rgthreadvar[iel].ipfd_count; j++)
close(server.rgthreadvar[iel].ipfd[j]);
}
if (server.sofd != -1) close(server.sofd);
if (server.cluster_enabled)
for (j = 0; j < server.cfd_count; j++) close(server.cfd[j]);
@ -3567,7 +3675,7 @@ int prepareForShutdown(int flags) {
/* Close the listening sockets. Apparently this allows faster restarts. */
closeListeningSockets(1);
serverLog(LL_WARNING,"%s is now ready to exit, bye bye...",
server.sentinel_mode ? "Sentinel" : "Redis");
server.sentinel_mode ? "Sentinel" : "KeyDB");
return C_OK;
}
@ -3698,8 +3806,8 @@ void addReplyCommand(client *c, struct redisCommand *cmd) {
if (!cmd) {
addReplyNull(c);
} else {
/* We are adding: command name, arg count, flags, first, last, offset */
addReplyArrayLen(c, 6);
/* We are adding: command name, arg count, flags, first, last, offset, categories */
addReplyArrayLen(c, 7);
addReplyBulkCString(c, cmd->name);
addReplyLongLong(c, cmd->arity);
@ -3729,6 +3837,8 @@ void addReplyCommand(client *c, struct redisCommand *cmd) {
addReplyLongLong(c, cmd->firstkey);
addReplyLongLong(c, cmd->lastkey);
addReplyLongLong(c, cmd->keystep);
addReplyCommandCategories(c,cmd);
}
}
@ -3953,6 +4063,7 @@ sds genRedisInfoString(char *section) {
bytesToHuman(maxmemory_hmem,server.maxmemory);
if (sections++) info = sdscat(info,"\r\n");
serverLog(LL_WARNING, "OOM max sent used_memory: %zu", zmalloc_used);
info = sdscatprintf(info,
"# Memory\r\n"
"used_memory:%zu\r\n"
@ -4387,10 +4498,12 @@ void infoCommand(client *c) {
return;
}
addReplyBulkSds(c, genRedisInfoString(section));
serverLog(LL_WARNING, "OOM max info command %zu", zmalloc_used_memory());
}
void monitorCommand(client *c) {
/* ignore MONITOR if already slave or in monitor mode */
serverAssert(aeThreadOwnsLock());
if (c->flags & CLIENT_SLAVE) return;
c->flags |= (CLIENT_SLAVE|CLIENT_MONITOR);
@ -4420,7 +4533,7 @@ void linuxMemoryWarnings(void) {
serverLog(LL_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
}
if (THPIsEnabled()) {
serverLog(LL_WARNING,"WARNING you have Transparent Huge Pages (THP) support enabled in your kernel. This will create latency and memory usage issues with Redis. To fix this issue run the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' as root, and add it to your /etc/rc.local in order to retain the setting after a reboot. Redis must be restarted after THP is disabled.");
serverLog(LL_WARNING,"WARNING you have Transparent Huge Pages (THP) support enabled in your kernel. This will create latency and memory usage issues with KeyDB. To fix this issue run the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' as root, and add it to your /etc/rc.local in order to retain the setting after a reboot. KeyDB must be restarted after THP is disabled.");
}
}
#endif /* __linux__ */
@ -4738,12 +4851,28 @@ int redisIsSupervised(int mode) {
return 0;
}
void *workerThreadMain(void *parg)
{
int iel = (int)((int64_t)parg);
serverLog(LOG_INFO, "Thread %d alive.", iel);
serverTL = server.rgthreadvar+iel; // set the TLS threadsafe global
int isMainThread = (iel == IDX_EVENT_LOOP_MAIN);
aeEventLoop *el = server.rgthreadvar[iel].el;
aeSetBeforeSleepProc(el, isMainThread ? beforeSleep : beforeSleepLite, isMainThread ? 0 : AE_SLEEP_THREADSAFE);
aeSetAfterSleepProc(el, isMainThread ? afterSleep : NULL, 0);
aeMain(el);
aeDeleteEventLoop(el);
return NULL;
}
int main(int argc, char **argv) {
struct timeval tv;
int j;
#ifdef USE_MEMKIND
storage_init(NULL, 0);
#endif
#ifdef REDIS_TEST
if (argc == 3 && !strcasecmp(argv[1], "test")) {
@ -4788,6 +4917,13 @@ int main(int argc, char **argv) {
dictSetHashFunctionSeed((uint8_t*)hashseed);
server.sentinel_mode = checkForSentinelMode(argc,argv);
initServerConfig();
for (int iel = 0; iel < MAX_EVENT_LOOPS; ++iel)
{
initServerThread(server.rgthreadvar+iel, iel == IDX_EVENT_LOOP_MAIN);
}
serverTL = &server.rgthreadvar[IDX_EVENT_LOOP_MAIN];
aeAcquireLock(); // We own the lock on boot
ACLInit(); /* The ACL subsystem must be initialized ASAP because the
basic networking code and client creation depends on it. */
moduleInitModulesSystem();
@ -4881,9 +5017,9 @@ int main(int argc, char **argv) {
sdsfree(options);
}
serverLog(LL_WARNING, "oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo");
serverLog(LL_WARNING, "oO0OoO0OoO0Oo KeyDB is starting oO0OoO0OoO0Oo");
serverLog(LL_WARNING,
"Redis version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started",
"KeyDB version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started",
REDIS_VERSION,
(sizeof(long) == 8) ? 64 : 32,
redisGitSHA1(),
@ -4901,6 +5037,8 @@ int main(int argc, char **argv) {
if (background) daemonize();
initServer();
initNetworking(server.cthreads > 1 /* fReusePort */);
if (background || server.pidfile) createPidFile();
redisSetProcTitle(argv[0]);
redisAsciiArt();
@ -4913,11 +5051,7 @@ int main(int argc, char **argv) {
linuxMemoryWarnings();
#endif
moduleLoadFromQueue();
if (ACLLoadConfiguredUsers() == C_ERR) {
serverLog(LL_WARNING,
"Critical error while loading ACLs. Exiting.");
exit(1);
}
ACLLoadUsersAtStartup();
loadDataFromDisk();
if (server.cluster_enabled) {
if (verifyClusterConfigWithData() == C_ERR) {
@ -4927,7 +5061,7 @@ int main(int argc, char **argv) {
exit(1);
}
}
if (server.ipfd_count > 0)
if (server.rgthreadvar[IDX_EVENT_LOOP_MAIN].ipfd_count > 0)
serverLog(LL_NOTICE,"Ready to accept connections");
if (server.sofd > 0)
serverLog(LL_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
@ -4935,15 +5069,24 @@ int main(int argc, char **argv) {
sentinelIsRunning();
}
if (server.cthreads > 4) {
serverLog(LL_WARNING, "Warning: server-threads is set to %d. This is above the maximum recommend value of 4, please ensure you've verified this is actually faster on your machine.", server.cthreads);
}
/* Warning the user about suspicious maxmemory setting. */
if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
}
aeSetBeforeSleepProc(server.el,beforeSleep);
aeSetAfterSleepProc(server.el,afterSleep);
aeMain(server.el);
aeDeleteEventLoop(server.el);
aeReleaseLock(); //Finally we can dump the lock
serverAssert(server.cthreads > 0 && server.cthreads <= MAX_EVENT_LOOPS);
pthread_t rgthread[MAX_EVENT_LOOPS];
for (int iel = 1; iel < server.cthreads; ++iel)
{
pthread_create(rgthread + iel, NULL, workerThreadMain, (void*)((int64_t)iel));
}
workerThreadMain((void*)((int64_t)IDX_EVENT_LOOP_MAIN));
return 0;
}

View File

@ -30,6 +30,9 @@
#ifndef __REDIS_H
#define __REDIS_H
#define TRUE 1
#define FALSE 0
#include "fmacros.h"
#include "config.h"
#include "solarisfixes.h"
@ -46,11 +49,18 @@
#include <pthread.h>
#include <syslog.h>
#include <netinet/in.h>
#ifdef __cplusplus
extern "C" {
#include <lua.h>
}
#else
#include <lua.h>
#endif
#include <signal.h>
typedef long long mstime_t; /* millisecond time type. */
#include "fastlock.h"
#include "ae.h" /* Event driven programming library */
#include "sds.h" /* Dynamic safe strings */
#include "dict.h" /* Hash tables */
@ -73,6 +83,10 @@ typedef long long mstime_t; /* millisecond time type. */
#include "endianconv.h"
#include "crc64.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Error codes */
#define C_OK 0
#define C_ERR -1
@ -168,6 +182,8 @@ typedef long long mstime_t; /* millisecond time type. */
#define CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS 1000 /* keys with more than 1000 fields will be processed separately */
#define CONFIG_DEFAULT_PROTO_MAX_BULK_LEN (512ll*1024*1024) /* Bulk request max size */
#define CONFIG_DEFAULT_THREADS 1
#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */
#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* CPU max % for keys collection */
@ -660,7 +676,14 @@ struct evictionPoolEntry; /* Defined in evict.c */
* which is actually a linked list of blocks like that, that is: client->reply. */
typedef struct clientReplyBlock {
size_t size, used;
char buf[ZERO_LENGTH_ARRAY_LENGTH];
#ifndef __cplusplus
char buf[];
#else
__attribute__((always_inline)) char *buf()
{
return reinterpret_cast<char*>(this+1);
}
#endif
} clientReplyBlock;
/* Redis database representation. There are multiple databases identified
@ -810,10 +833,12 @@ typedef struct client {
unsigned long long reply_bytes; /* Tot bytes of objects in reply list. */
size_t sentlen; /* Amount of bytes already sent in the current
buffer or object being sent. */
size_t sentlenAsync; /* same as sentlen buf for async buffers (which are a different stream) */
time_t ctime; /* Client creation time. */
time_t lastinteraction; /* Time of the last interaction, used for timeout */
time_t obuf_soft_limit_reached_time;
int flags; /* Client flags: CLIENT_* macros. */
int fPendingAsyncWrite; /* NOTE: Not a flag because it is written to outside of the client lock (locked by the global lock instead) */
int authenticated; /* Needed when the default user requires auth. */
int replstate; /* Replication state if this is a slave. */
int repl_put_online_on_ack; /* Install slave write handler on ACK. */
@ -845,6 +870,14 @@ typedef struct client {
/* Response buffer */
int bufpos;
char buf[PROTO_REPLY_CHUNK_BYTES];
/* Async Response Buffer - other threads write here */
int bufposAsync;
int buflenAsync;
char *bufAsync;
int iel; /* the event loop index we're registered with */
struct fastlock lock;
} client;
struct saveparam {
@ -879,10 +912,12 @@ typedef struct zskiplistNode {
sds ele;
double score;
struct zskiplistNode *backward;
#ifndef __cplusplus
struct zskiplistLevel {
struct zskiplistNode *forward;
unsigned long span;
} level[ZERO_LENGTH_ARRAY_LENGTH];
} level[];
#endif
} zskiplistNode;
typedef struct zskiplist {
@ -1005,6 +1040,19 @@ struct clusterState;
#define CHILD_INFO_TYPE_RDB 0
#define CHILD_INFO_TYPE_AOF 1
#define MAX_EVENT_LOOPS 16
#define IDX_EVENT_LOOP_MAIN 0
// Per-thread variabels that may be accessed without a lock
struct redisServerThreadVars {
aeEventLoop *el;
int ipfd[CONFIG_BINDADDR_MAX]; /* TCP socket file descriptors */
int ipfd_count; /* Used slots in ipfd[] */
list *clients_pending_write; /* There is to write or install handler. */
list *unblocked_clients; /* list of clients to unblock before next loop NOT THREADSAFE */
list *clients_pending_asyncwrite;
};
struct redisServer {
/* General */
pid_t pid; /* Main process pid. */
@ -1019,7 +1067,10 @@ struct redisServer {
redisDb *db;
dict *commands; /* Command table */
dict *orig_commands; /* Command table before command renaming. */
aeEventLoop *el;
int cthreads; /* Number of main worker threads */
struct redisServerThreadVars rgthreadvar[MAX_EVENT_LOOPS];
unsigned int lruclock; /* Clock for LRU eviction */
int shutdown_asap; /* SHUTDOWN needed ASAP */
int activerehashing; /* Incremental rehash in serverCron() */
@ -1044,14 +1095,11 @@ struct redisServer {
int bindaddr_count; /* Number of addresses in server.bindaddr[] */
char *unixsocket; /* UNIX socket path */
mode_t unixsocketperm; /* UNIX socket permission */
int ipfd[CONFIG_BINDADDR_MAX]; /* TCP socket file descriptors */
int ipfd_count; /* Used slots in ipfd[] */
int sofd; /* Unix socket file descriptor */
int cfd[CONFIG_BINDADDR_MAX];/* Cluster bus listening socket */
int cfd_count; /* Used slots in cfd[] */
list *clients; /* List of active clients */
list *clients_to_close; /* Clients to close asynchronously */
list *clients_pending_write; /* There is to write or install handler. */
list *slaves, *monitors; /* List of slaves and MONITORs */
client *current_client; /* Current client, only used on crash report */
rax *clients_index; /* Active clients dictionary by client ID. */
@ -1162,6 +1210,7 @@ struct redisServer {
int aof_pipe_read_data_from_parent;
int aof_pipe_write_ack_to_parent;
int aof_pipe_read_ack_from_child;
aeEventLoop *el_alf_pip_read_ack_from_child;
int aof_pipe_write_ack_to_child;
int aof_pipe_read_ack_from_parent;
int aof_stop_sending_diff; /* If true stop sending accumulated diffs
@ -1225,6 +1274,7 @@ struct redisServer {
int repl_diskless_sync; /* Send RDB to slaves sockets directly. */
int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
/* Replication (slave) */
char *masteruser; /* AUTH with this user and masterauth with master */
char *masterauth; /* AUTH with this password with master */
char *masterhost; /* Hostname of master */
int masterport; /* Port of master */
@ -1272,7 +1322,6 @@ struct redisServer {
/* Blocked clients */
unsigned int blocked_clients; /* # of clients executing a blocking cmd.*/
unsigned int blocked_clients_by_type[BLOCKED_NUM];
list *unblocked_clients; /* list of clients to unblock before next loop */
list *ready_keys; /* List of readyList structures for BLPOP & co */
/* Sort parameters - qsort_r() is only available under BSD so we
* have to take this state global, in order to pass it to sortCompare() */
@ -1362,6 +1411,8 @@ struct redisServer {
pthread_mutex_t lruclock_mutex;
pthread_mutex_t next_client_id_mutex;
pthread_mutex_t unixtime_mutex;
struct fastlock flock;
};
typedef struct pubsubPattern {
@ -1456,6 +1507,7 @@ typedef struct {
*----------------------------------------------------------------------------*/
extern struct redisServer server;
extern __thread struct redisServerThreadVars *serverTL; // thread local server vars
extern struct sharedObjectsStruct shared;
extern dictType objectKeyPointerValueDictType;
extern dictType objectKeyHeapPointerValueDictType;
@ -1504,7 +1556,7 @@ size_t redisPopcount(void *s, long count);
void redisSetProcTitle(char *title);
/* networking.c -- Networking and Client related operations */
client *createClient(int fd);
client *createClient(int fd, int iel);
void closeTimedoutClients(void);
void freeClient(client *c);
void freeClientAsync(client *c);
@ -1561,18 +1613,18 @@ void rewriteClientCommandVector(client *c, int argc, ...);
void rewriteClientCommandArgument(client *c, int i, robj *newval);
void replaceClientCommandVector(client *c, int argc, robj **argv);
unsigned long getClientOutputBufferMemoryUsage(client *c);
void freeClientsInAsyncFreeQueue(void);
void freeClientsInAsyncFreeQueue(int iel);
void asyncCloseClientOnOutputBufferLimitReached(client *c);
int getClientType(client *c);
int getClientTypeByName(char *name);
char *getClientTypeName(int cclass);
int getClientTypeByName(const char *name);
const char *getClientTypeName(int cclass);
void flushSlavesOutputBuffers(void);
void disconnectSlaves(void);
int listenToPort(int port, int *fds, int *count);
int listenToPort(int port, int *fds, int *count, int fReusePort);
void pauseClients(mstime_t duration);
int clientsArePaused(void);
int processEventsWhileBlocked(void);
int handleClientsWithPendingWrites(void);
int processEventsWhileBlocked(int iel);
int handleClientsWithPendingWrites(int iel);
int clientHasPendingReplies(client *c);
void unlinkClient(client *c);
int writeToClient(int fd, client *c, int handler_installed);
@ -1580,6 +1632,25 @@ void linkClient(client *c);
void protectClient(client *c);
void unprotectClient(client *c);
// Special Thread-safe addReply() commands for posting messages to clients from a different thread
void addReplyAsync(client *c, robj *obj);
void addReplyArrayLenAsync(client *c, long length);
void addReplyProtoAsync(client *c, const char *s, size_t len);
void addReplyBulkAsync(client *c, robj *obj);
void addReplyBulkCBufferAsync(client *c, const void *p, size_t len);
void addReplyErrorAsync(client *c, const char *err);
void addReplyMapLenAsync(client *c, long length);
void addReplyNullAsync(client *c);
void addReplyDoubleAsync(client *c, double d);
void *addReplyDeferredLenAsync(client *c);
void setDeferredArrayLenAsync(client *c, void *node, long length);
void addReplySdsAsync(client *c, sds s);
void addReplyBulkSdsAsync(client *c, sds s);
void addReplyPushLenAsync(client *c, long length);
void addReplyLongLongAsync(client *c, long long ll);
void ProcessPendingAsyncWrites(void);
#ifdef __GNUC__
void addReplyErrorFormat(client *c, const char *fmt, ...)
__attribute__((format(printf, 2, 3)));
@ -1666,7 +1737,7 @@ unsigned long long estimateObjectIdleTime(robj *o);
#define sdsEncodedObject(objptr) (objptr->encoding == OBJ_ENCODING_RAW || objptr->encoding == OBJ_ENCODING_EMBSTR)
/* Synchronous I/O with timeout */
ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
ssize_t syncWrite(int fd, const char *ptr, ssize_t size, long long timeout);
ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout);
ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout);
@ -1754,6 +1825,8 @@ int ACLAppendUserForLoading(sds *argv, int argc, int *argc_err);
char *ACLSetUserStringError(void);
int ACLLoadConfiguredUsers(void);
sds ACLDescribeUser(user *u);
void ACLLoadUsersAtStartup(void);
void addReplyCommandCategories(client *c, struct redisCommand *cmd);
/* Sorted sets data type */
@ -2014,7 +2087,7 @@ int ldbPendingChildren(void);
sds luaCreateFunction(client *c, lua_State *lua, robj *body);
/* Blocked clients */
void processUnblockedClients(void);
void processUnblockedClients(int iel);
void blockClient(client *c, int btype);
void unblockClient(client *c);
void queueClientForReprocessing(client *c);
@ -2270,9 +2343,31 @@ int memtest_preserving_test(unsigned long *m, size_t bytes, int passes);
void mixDigest(unsigned char *digest, void *ptr, size_t len);
void xorDigest(unsigned char *digest, void *ptr, size_t len);
inline int ielFromEventLoop(const aeEventLoop *eventLoop)
{
int iel = 0;
for (; iel < server.cthreads; ++iel)
{
if (server.rgthreadvar[iel].el == eventLoop)
break;
}
serverAssert(iel < server.cthreads);
return iel;
}
inline int FCorrectThread(client *c)
{
return server.rgthreadvar[c->iel].el == serverTL->el;
}
#define AssertCorrectThread(c) serverAssert(FCorrectThread(c))
#define redisDebug(fmt, ...) \
printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
#define redisDebugMark() \
printf("-- MARK %s:%d --\n", __FILE__, __LINE__)
#ifdef __cplusplus
}
#endif
#endif

View File

@ -7,6 +7,10 @@ By Steve Reid <steve@edmweb.com>
100% Public Domain
*/
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
uint32_t state[5];
uint32_t count[2];
@ -21,4 +25,9 @@ void SHA1Final(unsigned char digest[20], SHA1_CTX* context);
#ifdef REDIS_TEST
int sha1Test(int argc, char **argv);
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,5 +1,7 @@
#include "server.h"
#ifdef USE_MEMKIND
#include <stdlib.h>
#include <stdio.h>
#include <memkind.h>
@ -264,4 +266,6 @@ void handle_postfork_child()
int fdOriginal = memkind_fd(mkdisk);
memkind_pmem_remapfd(mkdisk, fdNew);
close(fdOriginal);
}
}
#endif // USE_MEMKIND

View File

@ -46,7 +46,7 @@
* done within 'timeout' milliseconds the operation succeeds and 'size' is
* returned. Otherwise the operation fails, -1 is returned, and an unspecified
* partial write could be performed against the file descriptor. */
ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout) {
ssize_t syncWrite(int fd, const char *ptr, ssize_t size, long long timeout) {
ssize_t nwritten, ret = size;
long long start = mstime();
long long remaining = timeout;

View File

@ -547,7 +547,7 @@ void lremCommand(client *c) {
* as well. This command was originally proposed by Ezra Zygmuntowicz.
*/
void rpoplpushHandlePush(client *c, robj *dstkey, robj *dstobj, robj *value) {
static void rpoplpushHandlePush(client *c, robj *dstkey, robj *dstobj, robj *value) {
/* Create the list if the key does not exist */
if (!dstobj) {
dstobj = createQuicklistObject();
@ -559,7 +559,7 @@ void rpoplpushHandlePush(client *c, robj *dstkey, robj *dstobj, robj *value) {
listTypePush(dstobj,value,LIST_HEAD);
notifyKeyspaceEvent(NOTIFY_LIST,"lpush",dstkey,c->db->id);
/* Always send the pushed value to the client. */
addReplyBulk(c,value);
addReplyBulkAsync(c,value);
}
void rpoplpushCommand(client *c) {
@ -630,6 +630,7 @@ int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb
robj *argv[3];
if (dstkey == NULL) {
fastlock_lock(&receiver->lock);
/* Propagate the [LR]POP operation. */
argv[0] = (where == LIST_HEAD) ? shared.lpop :
shared.rpop;
@ -637,16 +638,18 @@ int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb
propagate((where == LIST_HEAD) ?
server.lpopCommand : server.rpopCommand,
db->id,argv,2,PROPAGATE_AOF|PROPAGATE_REPL);
/* BRPOP/BLPOP */
addReplyArrayLen(receiver,2);
addReplyBulk(receiver,key);
addReplyBulk(receiver,value);
addReplyArrayLenAsync(receiver,2);
addReplyBulkAsync(receiver,key);
addReplyBulkAsync(receiver,value);
/* Notify event. */
char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
notifyKeyspaceEvent(NOTIFY_LIST,event,key,receiver->db->id);
fastlock_unlock(&receiver->lock);
} else {
fastlock_lock(&receiver->lock);
/* BRPOPLPUSH */
robj *dstobj =
lookupKeyWrite(receiver->db,dstkey);
@ -673,9 +676,11 @@ int serveClientBlockedOnList(client *receiver, robj *key, robj *dstkey, redisDb
/* Notify event ("lpush" was notified by rpoplpushHandlePush). */
notifyKeyspaceEvent(NOTIFY_LIST,"rpop",key,receiver->db->id);
fastlock_unlock(&receiver->lock);
} else {
/* BRPOPLPUSH failed because of wrong
* destination type. */
fastlock_unlock(&receiver->lock);
return C_ERR;
}
}

View File

@ -207,7 +207,7 @@ sds setTypeNextObject(setTypeIterator *si) {
* used field with values which are easy to trap if misused. */
int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele) {
if (setobj->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictGetRandomKey(setobj->m_ptr);
dictEntry *de = dictGetFairRandomKey(setobj->m_ptr);
*sdsele = dictGetKey(de);
*llele = -123456789; /* Not needed. Defensive. */
} else if (setobj->encoding == OBJ_ENCODING_INTSET) {

View File

@ -776,11 +776,16 @@ int streamDeleteItem(stream *s, streamID *id) {
/* Emit a reply in the client output buffer by formatting a Stream ID
* in the standard <ms>-<seq> format, using the simple string protocol
* of REPL. */
void addReplyStreamID(client *c, streamID *id) {
static void addReplyStreamID(client *c, streamID *id) {
sds replyid = sdscatfmt(sdsempty(),"%U-%U",id->ms,id->seq);
addReplyBulkSds(c,replyid);
}
static void addReplyStreamIDAsync(client *c, streamID *id) {
sds replyid = sdscatfmt(sdsempty(),"%U-%U",id->ms,id->seq);
addReplyBulkSdsAsync(c,replyid);
}
/* Similar to the above function, but just creates an object, usually useful
* for replication purposes to create arguments. */
robj *createObjectFromStreamID(streamID *id) {
@ -914,7 +919,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
}
if (!(flags & STREAM_RWR_RAWENTRIES))
arraylen_ptr = addReplyDeferredLen(c);
arraylen_ptr = addReplyDeferredLenAsync(c);
streamIteratorStart(&si,s,start,end,rev);
while(streamIteratorGetID(&si,&id,&numfields)) {
/* Update the group last_id if needed. */
@ -925,18 +930,18 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
/* Emit a two elements array for each item. The first is
* the ID, the second is an array of field-value pairs. */
addReplyArrayLen(c,2);
addReplyStreamID(c,&id);
addReplyArrayLenAsync(c,2);
addReplyStreamIDAsync(c,&id);
addReplyMapLen(c,numfields);
addReplyMapLenAsync(c,numfields);
/* Emit the field-value pairs. */
while(numfields--) {
unsigned char *key, *value;
int64_t key_len, value_len;
streamIteratorGetField(&si,&key,&value,&key_len,&value_len);
addReplyBulkCBuffer(c,key,key_len);
addReplyBulkCBuffer(c,value,value_len);
addReplyBulkCBufferAsync(c,key,key_len);
addReplyBulkCBufferAsync(c,value,value_len);
}
/* If a group is passed, we need to create an entry in the
@ -994,7 +999,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
if (count && count == arraylen) break;
}
streamIteratorStop(&si);
if (arraylen_ptr) setDeferredArrayLen(c,arraylen_ptr,arraylen);
if (arraylen_ptr) setDeferredArrayLenAsync(c,arraylen_ptr,arraylen);
return arraylen;
}

View File

@ -3155,15 +3155,15 @@ void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey
/* No candidate for zpopping, return empty. */
if (!zobj) {
addReplyNull(c);
addReplyNullAsync(c);
return;
}
void *arraylen_ptr = addReplyDeferredLen(c);
void *arraylen_ptr = addReplyDeferredLenAsync(c);
long arraylen = 0;
/* We emit the key only for the blocking variant. */
if (emitkey) addReplyBulk(c,key);
if (emitkey) addReplyBulkAsync(c,key);
/* Remove the element. */
do {
@ -3213,8 +3213,8 @@ void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey
signalModifiedKey(c->db,key);
}
addReplyBulkCBuffer(c,ele,sdslen(ele));
addReplyDouble(c,score);
addReplyBulkCBufferAsync(c,ele,sdslen(ele));
addReplyDoubleAsync(c,score);
sdsfree(ele);
arraylen += 2;
@ -3226,7 +3226,7 @@ void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey
}
} while(--count);
setDeferredArrayLen(c,arraylen_ptr,arraylen + (emitkey != 0));
setDeferredArrayLenAsync(c,arraylen_ptr,arraylen + (emitkey != 0));
}
/* ZPOPMIN key [<count>] */

View File

@ -33,6 +33,10 @@
#include <stdint.h>
#include "sds.h"
#ifdef __cplusplus
extern "C" {
#endif
/* The maximum number of characters needed to represent a long double
* as a string (long double has a huge range).
* This should be the size of the buffer given to ld2string */
@ -58,4 +62,8 @@ int pathIsBaseName(char *path);
int utilTest(int argc, char **argv);
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -63,17 +63,21 @@ void zlibc_free(void *ptr) {
#define realloc(ptr, size, type) srealloc(ptr, size, type)
#define free(ptr) sfree(ptr)
#elif defined(USE_TCMALLOC)
#define malloc(size) tc_malloc(size)
#define calloc(count,size) tc_calloc(count,size)
#define realloc(ptr,size) tc_realloc(ptr,size)
#define malloc(size, type) tc_malloc(size)
#define calloc(count,size, type) tc_calloc(count,size)
#define realloc(ptr,size, type) tc_realloc(ptr,size)
#define free(ptr) tc_free(ptr)
#elif defined(USE_JEMALLOC)
#define malloc(size) je_malloc(size)
#define calloc(count,size) je_calloc(count,size)
#define realloc(ptr,size) je_realloc(ptr,size)
#define malloc(size, type) je_malloc(size)
#define calloc(count,size,type) je_calloc(count,size)
#define realloc(ptr,size,type) je_realloc(ptr,size)
#define free(ptr) je_free(ptr)
#define mallocx(size,flags) je_mallocx(size,flags)
#define dallocx(ptr,flags) je_dallocx(ptr,flags)
#else
#define malloc(size, type) malloc(size)
#define calloc(count,size,type) calloc(count,size)
#define realloc(ptr,size,type) realloc(ptr,size)
#endif
#define update_zmalloc_stat_alloc(__n) do { \
@ -101,12 +105,8 @@ static void zmalloc_default_oom(size_t size) {
static void (*zmalloc_oom_handler)(size_t) = zmalloc_default_oom;
void *zmalloc(size_t size, enum MALLOC_CLASS class) {
#ifdef USE_MEMKIND
void *ptr = malloc(size+PREFIX_SIZE, class);
#else
(void)class;
void *ptr = malloc(size+PREFIX_SIZE);
#endif
void *ptr = malloc(size+PREFIX_SIZE, class);
if (!ptr) zmalloc_oom_handler(size);
#ifdef HAVE_MALLOC_SIZE
@ -138,12 +138,8 @@ void zfree_no_tcache(void *ptr) {
#endif
void *zcalloc(size_t size, enum MALLOC_CLASS class) {
#ifdef USE_MEMKIND
(void)(class);
void *ptr = calloc(1, size+PREFIX_SIZE, class);
#else
(void)class;
void *ptr = calloc(1, size+PREFIX_SIZE);
#endif
if (!ptr) zmalloc_oom_handler(size);
#ifdef HAVE_MALLOC_SIZE

View File

@ -36,14 +36,10 @@
#define __str(s) #s
#include "storage.h"
#define USE_MEMKIND 1
#if defined(USE_MEMKIND)
#define ZMALLOC_LIB ("memkind")
#undef USE_JEMALLOC
#define USE_MALLOC_CLASS 1
// Even though memkind supports malloc_usable_size we don't use it for performance reasons
//#define HAVE_MALLOC_SIZE 0
//#define zmalloc_size(p) salloc_usable_size(p)
#elif defined(USE_TCMALLOC)
#define ZMALLOC_LIB ("tcmalloc-" __xstr(TC_VERSION_MAJOR) "." __xstr(TC_VERSION_MINOR))
#include <google/tcmalloc.h>
@ -86,6 +82,10 @@
#define HAVE_DEFRAG
#endif
#ifdef __cplusplus
extern "C" {
#endif
void *zmalloc(size_t size, enum MALLOC_CLASS mclass);
void *zcalloc(size_t size, enum MALLOC_CLASS mclass);
void *zrealloc(void *ptr, size_t size, enum MALLOC_CLASS mclass);
@ -116,4 +116,8 @@ size_t zmalloc_usable(void *ptr);
int zmalloc_test(int argc, char **argv);
#endif
#ifdef __cplusplus
}
#endif
#endif /* __ZMALLOC_H */

View File

@ -19,6 +19,7 @@ start_server {tags {"lazyfree"}} {
}
test "FLUSHDB ASYNC can reclaim memory in background" {
after 500 # Sometimes Redis is busy with a prior operation
set orig_mem [s used_memory]
set args {}
for {set i 0} {$i < 100000} {incr i} {

View File

@ -0,0 +1,14 @@
The utilities in this directory plot the distribution of SRANDMEMBER to
evaluate how fair it is.
See http://theshfl.com/redis_sets for more information on the topic that lead
to such investigation fix.
showdist.rb -- shows the distribution of the frequency elements are returned.
The x axis is the number of times elements were returned, and
the y axis is how many elements were returned with such
frequency.
showfreq.rb -- shows the frequency each element was returned.
The x axis is the element number.
The y axis is the times it was returned.

View File

@ -0,0 +1,33 @@
require 'redis'
r = Redis.new
r.select(9)
r.del("myset");
r.sadd("myset",(0..999).to_a)
freq = {}
100.times {
res = r.pipelined {
1000.times {
r.srandmember("myset")
}
}
res.each{|ele|
freq[ele] = 0 if freq[ele] == nil
freq[ele] += 1
}
}
# Convert into frequency distribution
dist = {}
freq.each{|item,count|
dist[count] = 0 if dist[count] == nil
dist[count] += 1
}
min = dist.keys.min
max = dist.keys.max
(min..max).each{|x|
count = dist[x]
count = 0 if count == nil
puts "#{x} -> #{"*"*count}"
}

View File

@ -0,0 +1,23 @@
require 'redis'
r = Redis.new
r.select(9)
r.del("myset");
r.sadd("myset",(0..999).to_a)
freq = {}
500.times {
res = r.pipelined {
1000.times {
r.srandmember("myset")
}
}
res.each{|ele|
freq[ele] = 0 if freq[ele] == nil
freq[ele] += 1
}
}
# Print the frequency each element was yeld to process it with gnuplot
freq.each{|item,count|
puts "#{item} #{count}"
}