diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 3b8b1263f..6e4f88ef3 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -100,6 +100,7 @@ jobs: make valgrind - name: test run: | + sudo apt-get update sudo apt-get install tcl8.5 valgrind -y ./runtest --valgrind --verbose --clients 1 - name: module api test @@ -169,7 +170,7 @@ jobs: run: make - name: test run: | - ./runtest --accurate --verbose + ./runtest --accurate --verbose --no-latency - name: module api test run: ./runtest-moduleapi --verbose - name: sentinel tests diff --git a/00-RELEASENOTES b/00-RELEASENOTES index c9c09f76e..bff270e77 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -11,6 +11,422 @@ CRITICAL: There is a critical bug affecting MOST USERS. Upgrade ASAP. SECURITY: There are security fixes in the release. -------------------------------------------------------------------------------- +================================================================================ +Redis 6.0.10 Released Tue Jan 12 16:20:20 IST 2021 +================================================================================ + +Upgrade urgency MODERATE: several bugs with moderate impact are fixed, +Here is a comprehensive list of changes in this release compared to 6.0.9. + +Command behavior changes: +* SWAPDB invalidates WATCHed keys (#8239) +* SORT command behaves differently when used on a writable replica (#8283) +* EXISTS should not alter LRU (#8016) + In Redis 5.0 and 6.0 it would have touched the LRU/LFU of the key. +* OBJECT should not reveal logically expired keys (#8016) + Will now behave the same TYPE or any other non-DEBUG command. +* GEORADIUS[BYMEMBER] can fail with -OOM if Redis is over the memory limit (#8107) + +Other behavior changes: +* Sentinel: Fix missing updates to the config file after SENTINEL SET command (#8229) +* CONFIG REWRITE is atomic and safer, but requires write access to the config file's folder (#7824, #8051) + This change was already present in 6.0.9, but was missing from the release notes. + +Bug fixes with compatibility implications (bugs introduced in Redis 6.0): +* Fix RDB CRC64 checksum on big-endian systems (#8270) + If you're using big-endian please consider the compatibility implications with + RESTORE, replication and persistence. +* Fix wrong order of key/value in Lua's map response (#8266) + If your scripts use redis.setresp() or return a map (new in Redis 6.0), please + consider the implications. + +Bug fixes: +* Fix an issue where a forked process deletes the parent's pidfile (#8231) +* Fix crashes when enabling io-threads-do-reads (#8230) +* Fix a crash in redis-cli after executing cluster backup (#8267) +* Handle output buffer limits for module blocked clients (#8141) + Could result in a module sending reply to a blocked client to go beyond the limit. +* Fix setproctitle related crashes. (#8150, #8088) + Caused various crashes on startup, mainly on Apple M1 chips or under instrumentation. +* Backup/restore cluster mode keys to slots map for repl-diskless-load=swapdb (#8108) + In cluster mode with repl-diskless-load, when loading failed, slot map wouldn't + have been restored. +* Fix oom-score-adj-values range, and bug when used in config file (#8046) + Enabling setting this in the config file in a line after enabling it, would + have been buggy. +* Reset average ttl when empty databases (#8106) + Just causing misleading metric in INFO +* Disable rehash when Redis has child process (#8007) + This could have caused excessive CoW during BGSAVE, replication or AOFRW. +* Further improved ACL algorithm for picking categories (#7966) + Output of ACL GETUSER is now more similar to the one provided by ACL SETUSER. +* Fix bug with module GIL being released prematurely (#8061) + Could in theory (and rarely) cause multi-threaded modules to corrupt memory. +* Reduce effect of client tracking causing feedback loop in key eviction (#8100) +* Fix cluster access to unaligned memory (SIGBUS on old ARM) (#7958) +* Fix saving of strings larger than 2GB into RDB files (#8306) + +Additional improvements: +* Avoid wasteful transient memory allocation in certain cases (#8286, #5954) + +Platform / toolchain support related improvements: +* Fix crash log registers output on ARM. (#8020) +* Add a check for an ARM64 Linux kernel bug (#8224) + Due to the potential severity of this issue, Redis will print log warning on startup. +* Raspberry build fix. (#8095) + +New configuration options: +* oom-score-adj-values config can now take absolute values (besides relative ones) (#8046) + +Module related fixes: +* Moved RMAPI_FUNC_SUPPORTED so that it's usable (#8037) +* Improve timer accuracy (#7987) +* Allow '\0' inside of result of RM_CreateStringPrintf (#6260) + +================================================================================ +Redis 6.0.9 Released Mon Oct 26 10:37:47 IST 2020 +================================================================================ + +Upgrade urgency: SECURITY if you use an affected platform (see below). + Otherwise the upgrade urgency is MODERATE. + +This release fixes a potential heap overflow when using a heap allocator other +than jemalloc or glibc's malloc. See: +https://github.com/redis/redis/pull/7963 + +Other fixes in this release: + +New: +* Memory reporting of clients argv (#7874) +* Add redis-cli control on raw format line delimiter (#7841) +* Add redis-cli support for rediss:// -u prefix (#7900) +* Get rss size support for NetBSD and DragonFlyBSD + +Behavior changes: +* WATCH no longer ignores keys which have expired for MULTI/EXEC (#7920) +* Correct OBJECT ENCODING response for stream type (#7797) +* Allow blocked XREAD on a cluster replica (#7881) +* TLS: Do not require CA config if not used (#7862) + +Bug fixes: +* INFO report real peak memory (before eviction) (#7894) +* Allow requirepass config to clear the password (#7899) +* Fix config rewrite file handling to make it really atomic (#7824) +* Fix excessive categories being displayed from ACLs (#7889) +* Add fsync in replica when full RDB payload was received (#7839) +* Don't write replies to socket when output buffer limit reached (#7202) +* Fix redis-check-rdb support for modules aux data (#7826) +* Other smaller bug fixes + +Modules API: +* Add APIs for version and compatibility checks (#7865) +* Add RM_GetClientCertificate (#7866) +* Add RM_GetDetachedThreadSafeContext (#7886) +* Add RM_GetCommandKeys (#7884) +* Add Swapdb Module Event (#7804) +* RM_GetContextFlags provides indication of being in a fork child (#7783) +* RM_GetContextFlags document missing flags: MULTI_DIRTY, IS_CHILD (#7821) +* Expose real client on connection events (#7867) +* Minor improvements to module blocked on keys (#7903) + +Full list of commits: + +Yossi Gottlieb in commit ce0d74d8f: + Fix wrong zmalloc_size() assumption. (#7963) + 1 file changed, 3 deletions(-) + +Oran Agra in commit d3ef26822: + Attempt to fix sporadic test failures due to wait_for_log_messages (#7955) + 1 file changed, 2 insertions(+) + +David CARLIER in commit 76993a0d4: + cpu affinity: DragonFlyBSD support (#7956) + 2 files changed, 9 insertions(+), 2 deletions(-) + +Zach Fewtrell in commit b23cdc14a: + fix invalid 'failover' identifier in cluster slave selection test (#7942) + 1 file changed, 1 insertion(+), 1 deletion(-) + +WuYunlong in commit 99a4cb401: + Update rdb_last_bgsave_time_sec in INFO on diskless replication (#7917) + 1 file changed, 11 insertions(+), 14 deletions(-) + +Wen Hui in commit 258287c35: + do not add save parameter during config rewrite in sentinel mode (#7945) + 1 file changed, 6 insertions(+) + +Qu Chen in commit 6134279e2: + WATCH no longer ignores keys which have expired for MULTI/EXEC. (#7920) + 2 files changed, 3 insertions(+), 3 deletions(-) + +Oran Agra in commit d15ec67c6: + improve verbose logging on failed test. print log file lines (#7938) + 1 file changed, 4 insertions(+) + +Yossi Gottlieb in commit 8a2e6d24f: + Add a --no-latency tests flag. (#7939) + 5 files changed, 23 insertions(+), 9 deletions(-) + +filipe oliveira in commit 0a1737dc5: + Fixed bug concerning redis-benchmark non clustered benchmark forcing always the same hash tag {tag} (#7931) + 1 file changed, 31 insertions(+), 24 deletions(-) + +Oran Agra in commit 6d9b3df71: + fix 32bit build warnings (#7926) + 2 files changed, 3 insertions(+), 3 deletions(-) + +Wen Hui in commit ed6f7a55e: + fix double fclose in aofrewrite (#7919) + 1 file changed, 6 insertions(+), 5 deletions(-) + +Oran Agra in commit 331d73c92: + INFO report peak memory before eviction (#7894) + 1 file changed, 11 insertions(+), 1 deletion(-) + +Yossi Gottlieb in commit e88e13528: + Fix tests failure on busybox systems. (#7916) + 2 files changed, 2 insertions(+), 2 deletions(-) + +Oran Agra in commit b7f53738e: + Allow requirepass config to clear the password (#7899) + 1 file changed, 18 insertions(+), 8 deletions(-) + +Wang Yuan in commit 2ecb28b68: + Remove temporary aof and rdb files in a background thread (#7905) + 2 files changed, 3 insertions(+), 3 deletions(-) + +guybe7 in commit 7bc605e6b: + Minor improvements to module blocked on keys (#7903) + 3 files changed, 15 insertions(+), 9 deletions(-) + +Andreas Lind in commit 1b484608d: + Support redis-cli -u rediss://... (#7900) + 1 file changed, 9 insertions(+), 1 deletion(-) + +Yossi Gottlieb in commit 95095d680: + Modules: fix RM_GetCommandKeys API. (#7901) + 3 files changed, 4 insertions(+), 7 deletions(-) + +Meir Shpilraien (Spielrein) in commit cd3ae2f2c: + Add Module API for version and compatibility checks (#7865) + 9 files changed, 180 insertions(+), 3 deletions(-) + +Yossi Gottlieb in commit 1d723f734: + Module API: Add RM_GetClientCertificate(). (#7866) + 6 files changed, 88 insertions(+) + +Yossi Gottlieb in commit d72172752: + Modules: Add RM_GetDetachedThreadSafeContext(). (#7886) + 4 files changed, 52 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit e4f9aff19: + Modules: add RM_GetCommandKeys(). + 6 files changed, 238 insertions(+), 1 deletion(-) + +Yossi Gottlieb in commit 6682b913e: + Introduce getKeysResult for getKeysFromCommand. + 7 files changed, 170 insertions(+), 121 deletions(-) + +Madelyn Olson in commit 9db65919c: + Fixed excessive categories being displayed from acls (#7889) + 2 files changed, 29 insertions(+), 2 deletions(-) + +Oran Agra in commit f34c50cf6: + Add some additional signal info to the crash log (#7891) + 1 file changed, 4 insertions(+), 1 deletion(-) + +Oran Agra in commit 300bb4701: + Allow blocked XREAD on a cluster replica (#7881) + 3 files changed, 43 insertions(+) + +Oran Agra in commit bc5cf0f1a: + memory reporting of clients argv (#7874) + 5 files changed, 55 insertions(+), 5 deletions(-) + +DvirDukhan in commit 13d2e6a57: + redis-cli add control on raw format line delimiter (#7841) + 1 file changed, 8 insertions(+), 6 deletions(-) + +Oran Agra in commit d54e25620: + Include internal sds fragmentation in MEMORY reporting (#7864) + 2 files changed, 7 insertions(+), 7 deletions(-) + +Oran Agra in commit ac2c2b74e: + Fix crash in script timeout during AOF loading (#7870) + 2 files changed, 47 insertions(+), 4 deletions(-) + +Rafi Einstein in commit 00d2082e7: + Makefile: enable program suffixes via PROG_SUFFIX (#7868) + 2 files changed, 10 insertions(+), 6 deletions(-) + +nitaicaro in commit d2c2c26e7: + Fixed Tracking test “The other connection is able to get invalidations” (#7871) + 1 file changed, 3 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit 2c172556f: + Modules: expose real client on conn events. + 1 file changed, 11 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit 2972d0c1f: + Module API: Fail ineffective auth calls. + 1 file changed, 5 insertions(+) + +Yossi Gottlieb in commit aeb2a3b6a: + TLS: Do not require CA config if not used. (#7862) + 1 file changed, 5 insertions(+), 3 deletions(-) + +Oran Agra in commit d8e64aeb8: + warning: comparison between signed and unsigned integer in 32bit build (#7838) + 1 file changed, 2 insertions(+), 2 deletions(-) + +David CARLIER in commit 151209982: + Add support for Haiku OS (#7435) + 3 files changed, 16 insertions(+) + +Gavrie Philipson in commit b1d3e169f: + Fix typo in module API docs (#7861) + 1 file changed, 2 insertions(+), 2 deletions(-) + +David CARLIER in commit 08e3b8d13: + getting rss size implementation for netbsd (#7293) + 1 file changed, 20 insertions(+) + +Oran Agra in commit 0377a889b: + Fix new obuf-limits tests to work with TLS (#7848) + 2 files changed, 29 insertions(+), 13 deletions(-) + +caozb in commit a057ad9b1: + ignore slaveof no one in redis.conf (#7842) + 1 file changed, 10 insertions(+), 1 deletion(-) + +Wang Yuan in commit 87ecee645: + Don't support Gopher if enable io threads to read queries (#7851) + 2 files changed, 8 insertions(+), 5 deletions(-) + +Wang Yuan in commit b92902236: + Set 'loading' and 'shutdown_asap' to volatile sig_atomic_t type (#7845) + 1 file changed, 2 insertions(+), 2 deletions(-) + +Uri Shachar in commit ee0875a02: + Fix config rewrite file handling to make it really atomic (#7824) + 1 file changed, 49 insertions(+), 47 deletions(-) + +WuYunlong in commit d577519e1: + Add fsync to readSyncBulkPayload(). (#7839) + 1 file changed, 11 insertions(+) + +Wen Hui in commit 104e0ea3e: + rdb.c: handle fclose error case differently to avoid double fclose (#7307) + 1 file changed, 7 insertions(+), 6 deletions(-) + +Wang Yuan in commit 0eb015ac6: + Don't write replies if close the client ASAP (#7202) + 7 files changed, 144 insertions(+), 2 deletions(-) + +Guy Korland in commit 08a03e32c: + Fix RedisModule_HashGet examples (#6697) + 1 file changed, 4 insertions(+), 4 deletions(-) + +Oran Agra in commit 09551645d: + fix recently broken TLS build error, and add coverage for CI (#7833) + 2 files changed, 4 insertions(+), 3 deletions(-) + +David CARLIER in commit c545ba5d0: + Further NetBSD update and build fixes. (#7831) + 3 files changed, 72 insertions(+), 3 deletions(-) + +WuYunlong in commit ec9050053: + Fix redundancy use of semicolon in do-while macros in ziplist.c. (#7832) + 1 file changed, 3 insertions(+), 3 deletions(-) + +yixiang in commit 27a4d1314: + Fix connGetSocketError usage (#7811) + 2 files changed, 6 insertions(+), 4 deletions(-) + +Oran Agra in commit 30795dcae: + RM_GetContextFlags - document missing flags (#7821) + 1 file changed, 6 insertions(+) + +Yossi Gottlieb in commit 14a12849f: + Fix occasional hangs on replication reconnection. (#7830) + 2 files changed, 14 insertions(+), 3 deletions(-) + +Ariel Shtul in commit d5a1b06dc: + Fix redis-check-rdb support for modules aux data (#7826) + 3 files changed, 21 insertions(+), 1 deletion(-) + +Wen Hui in commit 39f793693: + refactor rewriteStreamObject code for adding missing streamIteratorStop call (#7829) + 1 file changed, 36 insertions(+), 18 deletions(-) + +WuYunlong in commit faad29bfb: + Make IO threads killable so that they can be canceled at any time. + 1 file changed, 1 insertion(+) + +WuYunlong in commit b3f1b5830: + Make main thread killable so that it can be canceled at any time. Refine comment of makeThreadKillable(). + 3 files changed, 11 insertions(+), 4 deletions(-) + +Oran Agra in commit 0f43d1f55: + RM_GetContextFlags provides indication that we're in a fork child (#7783) + 8 files changed, 28 insertions(+), 18 deletions(-) + +Wen Hui in commit a55ea9cdf: + Add Swapdb Module Event (#7804) + 5 files changed, 52 insertions(+) + +Daniel Dai in commit 1d8f72bef: + fix make warnings in debug.c MacOS (#7805) + 2 files changed, 3 insertions(+), 2 deletions(-) + +David CARLIER in commit 556953d93: + debug.c: NetBSD build warning fix. (#7810) + 1 file changed, 4 insertions(+), 3 deletions(-) + +Wang Yuan in commit d02435b66: + Remove tmp rdb file in background thread (#7762) + 6 files changed, 82 insertions(+), 8 deletions(-) + +Oran Agra in commit 1bd7bfdc0: + Add printf attribute and fix warnings and a minor bug (#7803) + 2 files changed, 12 insertions(+), 4 deletions(-) + +WuYunlong in commit d25147b4c: + bio: doFastMemoryTest should try to kill io threads as well. + 3 files changed, 19 insertions(+) + +WuYunlong in commit 4489ba081: + bio: fix doFastMemoryTest. + 4 files changed, 25 insertions(+), 3 deletions(-) + +Wen Hui in commit cf85def67: + correct OBJECT ENCODING response for stream type (#7797) + 1 file changed, 1 insertion(+) + +WuYunlong in commit cf5bcf892: + Clarify help text of tcl scripts. (#7798) + 1 file changed, 1 insertion(+) + +Mykhailo Pylyp in commit f72665c65: + Recalculate hardcoded variables from $::instances_count in sentinel tests (#7561) + 3 files changed, 15 insertions(+), 13 deletions(-) + +Oran Agra in commit c67b19e7a: + Fix failing valgrind installation in github actions (#7792) + 1 file changed, 1 insertion(+) + +Oran Agra in commit 92763fd2a: + fix broken PEXPIREAT test (#7791) + 1 file changed, 10 insertions(+), 6 deletions(-) + +Wang Yuan in commit f5b4c0ccb: + Remove dead global variable 'lru_clock' (#7782) + 1 file changed, 1 deletion(-) + +Oran Agra in commit 82d431fd6: + Squash merging 125 typo/grammar/comment/doc PRs (#7773) + 80 files changed, 436 insertions(+), 416 deletions(-) + ================================================================================ Redis 6.0.8 Released Wed Sep 09 23:34:17 IDT 2020 ================================================================================ diff --git a/README.md b/README.md index de80070ce..1decad4b0 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,9 @@ as libsystemd-dev on Debian/Ubuntu or systemd-devel on CentOS) and run: % make USE_SYSTEMD=yes +To append a suffix to KeyDB program names, use: + + % make PROG_SUFFIX="-alt" ***Note that the following dependencies may be needed: % sudo apt-get install autoconf autotools-dev libnuma-dev libtool @@ -120,7 +123,7 @@ installed): Fixing build problems with dependencies or cached build options --------- -KeyDB has some dependencies which are included into the `deps` directory. +KeyDB has some dependencies which are included in the `deps` directory. `make` does not automatically rebuild dependencies even if something in the source code of dependencies changes. @@ -147,7 +150,7 @@ with a 64 bit target, or the other way around, you need to perform a In case of build errors when trying to build a 32 bit binary of KeyDB, try the following steps: -* Install the packages libc6-dev-i386 (also try g++-multilib). +* Install the package libc6-dev-i386 (also try g++-multilib). * Try using the following command line instead of `make 32bit`: `make CFLAGS="-m32 -march=native" LDFLAGS="-m32"` @@ -172,14 +175,14 @@ Verbose build ------------- KeyDB will build with a user friendly colorized output by default. -If you want to see a more verbose output use the following: +If you want to see a more verbose output, use the following: % make V=1 Running KeyDB ------------- -To run KeyDB with the default configuration just type: +To run KeyDB with the default configuration, just type: % cd src % ./keydb-server @@ -232,7 +235,7 @@ You can find the list of all the available commands at https://docs.keydb.dev/do Installing KeyDB ----------------- -In order to install KeyDB binaries into /usr/local/bin just use: +In order to install KeyDB binaries into /usr/local/bin, just use: % make install @@ -241,8 +244,8 @@ different destination. Make install will just install binaries in your system, but will not configure init scripts and configuration files in the appropriate place. This is not -needed if you want just to play a bit with KeyDB, but if you are installing -it the proper way for a production system, we have a script doing this +needed if you just want to play a bit with KeyDB, but if you are installing +it the proper way for a production system, we have a script that does this for Ubuntu and Debian systems: % cd utils diff --git a/deps/README.md b/deps/README.md index f923c06ad..02c99052f 100644 --- a/deps/README.md +++ b/deps/README.md @@ -21,7 +21,7 @@ just following tose steps: 1. Remove the jemalloc directory. 2. Substitute it with the new jemalloc source tree. -3. Edit the Makefile localted in the same directory as the README you are +3. Edit the Makefile located in the same directory as the README you are reading, and change the --with-version in the Jemalloc configure script options with the version you are using. This is required because otherwise Jemalloc configuration script is broken and will not work nested in another @@ -33,7 +33,7 @@ If you want to upgrade Jemalloc while also providing support for active defragmentation, in addition to the above steps you need to perform the following additional steps: -5. In Jemalloc three, file `include/jemalloc/jemalloc_macros.h.in`, make sure +5. In Jemalloc tree, file `include/jemalloc/jemalloc_macros.h.in`, make sure to add `#define JEMALLOC_FRAG_HINT`. 6. Implement the function `je_get_defrag_hint()` inside `src/jemalloc.c`. You can see how it is implemented in the current Jemalloc source tree shipped @@ -49,7 +49,7 @@ Hiredis uses the SDS string library, that must be the same version used inside R 1. Check with diff if hiredis API changed and what impact it could have in Redis. 2. Make sure that the SDS library inside Hiredis and inside Redis are compatible. 3. After the upgrade, run the Redis Sentinel test. -4. Check manually that redis-cli and redis-benchmark behave as expecteed, since we have no tests for CLI utilities currently. +4. Check manually that redis-cli and redis-benchmark behave as expected, since we have no tests for CLI utilities currently. Linenoise --- @@ -77,6 +77,6 @@ and our version: 1. Makefile is modified to allow a different compiler than GCC. 2. We have the implementation source code, and directly link to the following external libraries: `lua_cjson.o`, `lua_struct.o`, `lua_cmsgpack.o` and `lua_bit.o`. -3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order toa void direct bytecode execution. +3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order to avoid direct bytecode execution. diff --git a/deps/linenoise/linenoise.c b/deps/linenoise/linenoise.c index cfe51e768..ccf5c5548 100644 --- a/deps/linenoise/linenoise.c +++ b/deps/linenoise/linenoise.c @@ -625,7 +625,7 @@ static void refreshMultiLine(struct linenoiseState *l) { rpos2 = (plen+l->pos+l->cols)/l->cols; /* current cursor relative row. */ lndebug("rpos2 %d", rpos2); - /* Go up till we reach the expected positon. */ + /* Go up till we reach the expected position. */ if (rows-rpos2 > 0) { lndebug("go-up %d", rows-rpos2); snprintf(seq,64,"\x1b[%dA", rows-rpos2); @@ -767,7 +767,7 @@ void linenoiseEditBackspace(struct linenoiseState *l) { } } -/* Delete the previosu word, maintaining the cursor at the start of the +/* Delete the previous word, maintaining the cursor at the start of the * current word. */ void linenoiseEditDeletePrevWord(struct linenoiseState *l) { size_t old_pos = l->pos; diff --git a/keydb.conf b/keydb.conf index 73907e6ce..3874c5acd 100644 --- a/keydb.conf +++ b/keydb.conf @@ -24,7 +24,7 @@ # to customize a few per-server settings. Include files can include # other files, so use this wisely. # -# Notice option "include" won't be rewritten by command "CONFIG REWRITE" +# Note that option "include" won't be rewritten by command "CONFIG REWRITE" # from admin or KeyDB Sentinel. Since KeyDB always uses the last processed # line as value of a configuration directive, you'd better put includes # at the beginning of this file to avoid overwriting config change at runtime. @@ -46,7 +46,7 @@ ################################## NETWORK ##################################### # By default, if no "bind" configuration directive is specified, KeyDB listens -# for connections from all the network interfaces available on the server. +# for connections from all available network interfaces on the host machine. # It is possible to listen to just one or multiple selected interfaces using # the "bind" configuration directive, followed by one or more IP addresses. # @@ -58,13 +58,12 @@ # ~~~ WARNING ~~~ If the computer running KeyDB is directly exposed to the # internet, binding to all the interfaces is dangerous and will expose the # instance to everybody on the internet. So by default we uncomment the -# following bind directive, that will force KeyDB to listen only into -# the IPv4 loopback interface address (this means KeyDB will be able to -# accept connections only from clients running into the same computer it -# is running). +# following bind directive, that will force KeyDB to listen only on the +# IPv4 loopback interface address (this means KeyDB will only be able to +# accept client connections from the same host that it is running on). # # IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES -# JUST COMMENT THE FOLLOWING LINE. +# JUST COMMENT OUT THE FOLLOWING LINE. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bind 127.0.0.1 @@ -93,8 +92,8 @@ port 6379 # TCP listen() backlog. # -# In high requests-per-second environments you need an high backlog in order -# to avoid slow clients connections issues. Note that the Linux kernel +# In high requests-per-second environments you need a high backlog in order +# to avoid slow clients connection issues. Note that the Linux kernel # will silently truncate it to the value of /proc/sys/net/core/somaxconn so # make sure to raise both the value of somaxconn and tcp_max_syn_backlog # in order to get the desired effect. @@ -118,8 +117,8 @@ timeout 0 # of communication. This is useful for two reasons: # # 1) Detect dead peers. -# 2) Take the connection alive from the point of view of network -# equipment in the middle. +# 2) Force network equipment in the middle to consider the connection to be +# alive. # # On Linux, the specified value (in seconds) is the period used to send ACKs. # Note that to close the connection the double of the time is needed. @@ -166,7 +165,7 @@ tcp-keepalive 300 # tls-auth-clients no # tls-auth-clients optional -# By default, a Redis replica does not attempt to establish a TLS connection +# By default, a KeyDB replica does not attempt to establish a TLS connection # with its master. # # Use the following directive to enable TLS on replication links. @@ -228,11 +227,12 @@ daemonize no # supervision tree. Options: # supervised no - no supervision interaction # supervised upstart - signal upstart by putting KeyDB into SIGSTOP mode +# requires "expect stop" in your upstart job config # supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET # supervised auto - detect upstart or systemd method based on # UPSTART_JOB or NOTIFY_SOCKET environment variables # Note: these supervision methods only signal "process is ready." -# They do not enable continuous liveness pings back to your supervisor. +# They do not enable continuous pings back to your supervisor. supervised no # If a pid file is specified, KeyDB writes it where specified at startup @@ -294,7 +294,7 @@ always-show-logo yes # Will save the DB if both the given number of seconds and the given # number of write operations against the DB occurred. # -# In the example below the behaviour will be to save: +# In the example below the behavior will be to save: # after 900 sec (15 min) if at least 1 key changed # after 300 sec (5 min) if at least 10 keys changed # after 60 sec if at least 10000 keys changed @@ -327,7 +327,7 @@ save 60 10000 stop-writes-on-bgsave-error yes # Compress string objects using LZF when dump .rdb databases? -# For default that's set to 'yes' as it's almost always a win. +# By default compression is enabled as it's almost always a win. # If you want to save some CPU in the saving child set it to 'no' but # the dataset will likely be bigger if you have compressible values or keys. rdbcompression yes @@ -415,11 +415,11 @@ dir ./ # still reply to client requests, possibly with out of date data, or the # data set may just be empty if this is the first synchronization. # -# 2) if replica-serve-stale-data is set to 'no' the replica will reply with -# an error "SYNC with master in progress" to all the kind of commands -# but to INFO, replicaOF, AUTH, PING, SHUTDOWN, REPLCONF, ROLE, CONFIG, -# SUBSCRIBE, UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB, -# COMMAND, POST, HOST: and LATENCY. +# 2) If replica-serve-stale-data is set to 'no' the replica will reply with +# an error "SYNC with master in progress" to all commands except: +# INFO, REPLICAOF, AUTH, PING, SHUTDOWN, REPLCONF, ROLE, CONFIG, SUBSCRIBE, +# UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB, COMMAND, POST, +# HOST and LATENCY. # replica-serve-stale-data yes @@ -497,14 +497,14 @@ repl-diskless-sync-delay 5 # ----------------------------------------------------------------------------- # WARNING: RDB diskless load is experimental. Since in this setup the replica # does not immediately store an RDB on disk, it may cause data loss during -# failovers. RDB diskless load + Redis modules not handling I/O reads may also -# cause Redis to abort in case of I/O errors during the initial synchronization +# failovers. RDB diskless load + KeyDB modules not handling I/O reads may also +# cause KeyDB to abort in case of I/O errors during the initial synchronization # stage with the master. Use only if your do what you are doing. # ----------------------------------------------------------------------------- # # Replica can load the RDB it reads from the replication link directly from the # socket, or store the RDB to a file and read that file after it was completely -# recived from the master. +# received from the master. # # In many cases the disk is slower than the network, and storing and loading # the RDB file may increase replication time (and even increase the master's @@ -534,7 +534,8 @@ repl-diskless-load disabled # # It is important to make sure that this value is greater than the value # specified for repl-ping-replica-period otherwise a timeout will be detected -# every time there is low traffic between the master and the replica. +# every time there is low traffic between the master and the replica. The default +# value is 60 seconds. # # repl-timeout 60 @@ -559,28 +560,28 @@ repl-disable-tcp-nodelay no # partial resync is enough, just passing the portion of data the replica # missed while disconnected. # -# The bigger the replication backlog, the longer the time the replica can be -# disconnected and later be able to perform a partial resynchronization. +# The bigger the replication backlog, the longer the replica can endure the +# disconnect and later be able to perform a partial resynchronization. # -# The backlog is only allocated once there is at least a replica connected. +# The backlog is only allocated if there is at least one replica connected. # # repl-backlog-size 1mb -# After a master has no longer connected replicas for some time, the backlog -# will be freed. The following option configures the amount of seconds that -# need to elapse, starting from the time the last replica disconnected, for -# the backlog buffer to be freed. +# After a master has no connected replicas for some time, the backlog will be +# freed. The following option configures the amount of seconds that need to +# elapse, starting from the time the last replica disconnected, for the backlog +# buffer to be freed. # # Note that replicas never free the backlog for timeout, since they may be # promoted to masters later, and should be able to correctly "partially -# resynchronize" with the replicas: hence they should always accumulate backlog. +# resynchronize" with other replicas: hence they should always accumulate backlog. # # A value of 0 means to never release the backlog. # # repl-backlog-ttl 3600 # The replica priority is an integer number published by KeyDB in the INFO -# output. It is used by Redis Sentinel in order to select a replica to promote +# output. It is used by KeyDB Sentinel in order to select a replica to promote # into a master if the master is no longer working correctly. # # A replica with a low priority number is considered better for promotion, so @@ -623,8 +624,8 @@ replica-priority 100 # Another place where this info is available is in the output of the # "ROLE" command of a master. # -# The listed IP and address normally reported by a replica is obtained -# in the following way: +# The listed IP address and port normally reported by a replica is +# obtained in the following way: # # IP: The address is auto detected by checking the peer address # of the socket used by the replica to connect with the master. @@ -634,7 +635,7 @@ replica-priority 100 # listen for connections. # # However when port forwarding or Network Address Translation (NAT) is -# used, the replica may be actually reachable via different IP and port +# used, the replica may actually be reachable via different IP and port # pairs. The following two options can be used by a replica in order to # report to its master a specific set of IP and port, so that both INFO # and ROLE will report those values. @@ -647,31 +648,31 @@ replica-priority 100 ############################### KEYS TRACKING ################################# -# Redis implements server assisted support for client side caching of values. +# KeyDB implements server assisted support for client side caching of values. # This is implemented using an invalidation table that remembers, using # 16 millions of slots, what clients may have certain subsets of keys. In turn # this is used in order to send invalidation messages to clients. Please -# to understand more about the feature check this page: +# check this page to understand more about the feature: # # https://redis.io/topics/client-side-caching # # When tracking is enabled for a client, all the read only queries are assumed -# to be cached: this will force Redis to store information in the invalidation +# to be cached: this will force KeyDB to store information in the invalidation # table. When keys are modified, such information is flushed away, and # invalidation messages are sent to the clients. However if the workload is -# heavily dominated by reads, Redis could use more and more memory in order +# heavily dominated by reads, KeyDB could use more and more memory in order # to track the keys fetched by many clients. # # For this reason it is possible to configure a maximum fill value for the # invalidation table. By default it is set to 1M of keys, and once this limit -# is reached, Redis will start to evict keys in the invalidation table +# is reached, KeyDB will start to evict keys in the invalidation table # even if they were not modified, just to reclaim memory: this will in turn # force the clients to invalidate the cached values. Basically the table # maximum size is a trade off between the memory you want to spend server # side to track information about who cached what, and the ability of clients # to retain cached objects in memory. # -# If you set the value to 0, it means there are no limits, and Redis will +# If you set the value to 0, it means there are no limits, and KeyDB will # retain as many keys as needed in the invalidation table. # In the "stats" INFO section, you can find information about the number of # keys in the invalidation table at every given moment. @@ -683,7 +684,7 @@ replica-priority 100 ################################## SECURITY ################################### -# Warning: since KeyDB is pretty fast an outside user can try up to +# Warning: since KeyDB is pretty fast, an outside user can try up to # 1 million passwords per second against a modern box. This means that you # should use very strong passwords, otherwise they will be very easy to break. # Note that because the password is really a shared secret between the client @@ -707,7 +708,7 @@ replica-priority 100 # AUTH (or the HELLO command AUTH option) in order to be authenticated and # start to work. # -# The ACL rules that describe what an user can do are the following: +# The ACL rules that describe what a user can do are the following: # # on Enable the user: it is possible to authenticate as this user. # off Disable the user: it's no longer possible to authenticate @@ -717,7 +718,7 @@ replica-priority 100 # - Disallow the execution of that command # +@ Allow the execution of all the commands in such category # with valid categories are like @admin, @set, @sortedset, ... -# and so forth, see the full list in the server.c file where +# and so forth, see the full list in the server.cpp file where # the KeyDB command table is described and defined. # The special category @all means all the commands, but currently # present in the server, and that will be loaded in the future @@ -735,7 +736,7 @@ replica-priority 100 # It is possible to specify multiple patterns. # allkeys Alias for ~* # resetkeys Flush the list of allowed keys patterns. -# > Add this passowrd to the list of valid password for the user. +# > Add this password to the list of valid password for the user. # For example >mypass will add "mypass" to the list. # This directive clears the "nopass" flag (see later). # < Remove this password from the list of valid passwords. @@ -789,7 +790,7 @@ acllog-max-len 128 # # Instead of configuring users here in this file, it is possible to use # a stand-alone file just listing users. The two methods cannot be mixed: -# if you configure users here and at the same time you activate the exteranl +# if you configure users here and at the same time you activate the external # ACL file, the server will refuse to start. # # The format of the external ACL user file is exactly the same as the @@ -797,7 +798,7 @@ acllog-max-len 128 # # aclfile /etc/keydb/users.acl -# IMPORTANT NOTE: starting with Redis 6 "requirepass" is just a compatiblity +# IMPORTANT NOTE: starting with Redis 6 "requirepass" is just a compatibility # layer on top of the new ACL system. The option effect will be just setting # the password for the default user. Clients will still authenticate using # AUTH as usually, or more explicitly with AUTH default @@ -908,8 +909,8 @@ acllog-max-len 128 # LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated # algorithms (in order to save memory), so you can tune it for speed or -# accuracy. For default KeyDB will check five keys and pick the one that was -# used less recently, you can change the sample size using the following +# accuracy. By default KeyDB will check five keys and pick the one that was +# used least recently, you can change the sample size using the following # configuration directive. # # The default of 5 produces good enough results. 10 Approximates very closely @@ -937,7 +938,7 @@ acllog-max-len 128 # # replica-ignore-maxmemory yes -# Redis reclaims expired keys in two ways: upon access when those keys are +# KeyDB reclaims expired keys in two ways: upon access when those keys are # found to be expired, and also in background, in what is called the # "active expire key". The key space is slowly and interactively scanned # looking for expired keys to reclaim, so that it is possible to free memory @@ -949,8 +950,8 @@ acllog-max-len 128 # it is possible to increase the expire "effort" that is normally set to # "1", to a greater value, up to the value "10". At its maximum value the # system will use more CPU, longer cycles (and technically may introduce -# more latency), and will tollerate less already expired keys still present -# in the system. It's a tradeoff betweeen memory, CPU and latecy. +# more latency), and will tolerate less already expired keys still present +# in the system. It's a tradeoff between memory, CPU and latency. # # active-expire-effort 1 @@ -1010,76 +1011,35 @@ replica-lazy-flush no lazyfree-lazy-user-del no -################################ THREADED I/O ################################# - -# Redis is mostly single threaded, however there are certain threaded -# operations such as UNLINK, slow I/O accesses and other things that are -# performed on side threads. -# -# Now it is also possible to handle Redis clients socket reads and writes -# in different I/O threads. Since especially writing is so slow, normally -# Redis users use pipelining in order to speedup the Redis performances per -# core, and spawn multiple instances in order to scale more. Using I/O -# threads it is possible to easily speedup two times Redis without resorting -# to pipelining nor sharding of the instance. -# -# By default threading is disabled, we suggest enabling it only in machines -# that have at least 4 or more cores, leaving at least one spare core. -# Using more than 8 threads is unlikely to help much. We also recommend using -# threaded I/O only if you actually have performance problems, with Redis -# instances being able to use a quite big percentage of CPU time, otherwise -# there is no point in using this feature. -# -# So for instance if you have a four cores boxes, try to use 2 or 3 I/O -# threads, if you have a 8 cores, try to use 6 threads. In order to -# enable I/O threads use the following configuration directive: -# -# io-threads 4 -# -# Setting io-threads to 1 will just use the main thread as usually. -# When I/O threads are enabled, we only use threads for writes, that is -# to thread the write(2) syscall and transfer the client buffers to the -# socket. However it is also possible to enable threading of reads and -# protocol parsing using the following configuration directive, by setting -# it to yes: -# -# io-threads-do-reads no -# -# Usually threading reads doesn't help much. -# -# NOTE 1: This configuration directive cannot be changed at runtime via -# CONFIG SET. Aso this feature currently does not work when SSL is -# enabled. -# -# NOTE 2: If you want to test the Redis speedup using redis-benchmark, make -# sure you also run the benchmark itself in threaded mode, using the -# --threads option to match the number of Redis theads, otherwise you'll not -# be able to notice the improvements. - ############################ KERNEL OOM CONTROL ############################## # On Linux, it is possible to hint the kernel OOM killer on what processes # should be killed first when out of memory. # -# Enabling this feature makes Redis actively control the oom_score_adj value +# Enabling this feature makes KeyDB actively control the oom_score_adj value # for all its processes, depending on their role. The default scores will # attempt to have background child processes killed before all others, and # replicas killed before masters. - +# +# KeyDB supports three options: +# +# no: Don't make changes to oom-score-adj (default). +# yes: Alias to "relative" see below. +# absolute: Values in oom-score-adj-values are written as is to the kernel. +# relative: Values are used relative to the initial value of oom_score_adj when +# the server starts and are then clamped to a range of -1000 to 1000. +# Because typically the initial value is 0, they will often match the +# absolute values. oom-score-adj no # When oom-score-adj is used, this directive controls the specific values used -# for master, replica and background child processes. Values range -1000 to -# 1000 (higher means more likely to be killed). +# for master, replica and background child processes. Values range -2000 to +# 2000 (higher means more likely to be killed). # # Unprivileged processes (not root, and without CAP_SYS_RESOURCE capabilities) # can freely increase their value, but not decrease it below its initial -# settings. -# -# Values are used relative to the initial value of oom_score_adj when the server -# starts. Because typically the initial value is 0, they will often match the -# absolute values. - +# settings. This means that setting oom-score-adj to "relative" and setting the +# oom-score-adj-values to positive values will always succeed. oom-score-adj-values 0 200 800 ############################## APPEND ONLY MODE ############################### @@ -1206,8 +1166,8 @@ aof-load-truncated yes # # [RDB file][AOF tail] # -# When loading KeyDB recognizes that the AOF file starts with the "REDIS" -# string and loads the prefixed RDB file, and continues loading the AOF +# When loading, KeyDB recognizes that the AOF file starts with the "REDIS" +# string and loads the prefixed RDB file, then continues loading the AOF # tail. aof-use-rdb-preamble yes @@ -1221,7 +1181,7 @@ aof-use-rdb-preamble yes # # When a long running script exceeds the maximum execution time only the # SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be -# used to stop a script that did not yet called write commands. The second +# used to stop a script that did not yet call any write commands. The second # is the only way to shut down the server in the case a write command was # already issued by the script but the user doesn't want to wait for the natural # termination of the script. @@ -1247,7 +1207,7 @@ lua-time-limit 5000 # Cluster node timeout is the amount of milliseconds a node must be unreachable # for it to be considered in failure state. -# Most other internal time limits are multiple of the node timeout. +# Most other internal time limits are a multiple of the node timeout. # # cluster-node-timeout 15000 @@ -1274,18 +1234,18 @@ lua-time-limit 5000 # the failover if, since the last interaction with the master, the time # elapsed is greater than: # -# (node-timeout * replica-validity-factor) + repl-ping-replica-period +# (node-timeout * cluster-replica-validity-factor) + repl-ping-replica-period # -# So for example if node-timeout is 30 seconds, and the replica-validity-factor +# So for example if node-timeout is 30 seconds, and the cluster-replica-validity-factor # is 10, and assuming a default repl-ping-replica-period of 10 seconds, the # replica will not try to failover if it was not able to talk with the master # for longer than 310 seconds. # -# A large replica-validity-factor may allow replicas with too old data to failover +# A large cluster-replica-validity-factor may allow replicas with too old data to failover # a master, while a too small value may prevent the cluster from being able to # elect a replica at all. # -# For maximum availability, it is possible to set the replica-validity-factor +# For maximum availability, it is possible to set the cluster-replica-validity-factor # to a value of 0, which means, that replicas will always try to failover the # master regardless of the last time they interacted with the master. # (However they'll always try to apply a delay proportional to their @@ -1316,7 +1276,7 @@ lua-time-limit 5000 # cluster-migration-barrier 1 # By default KeyDB Cluster nodes stop accepting queries if they detect there -# is at least an hash slot uncovered (no available node is serving it). +# is at least a hash slot uncovered (no available node is serving it). # This way if the cluster is partially down (for example a range of hash slots # are no longer covered) all the cluster becomes, eventually, unavailable. # It automatically returns available as soon as all the slots are covered again. @@ -1371,7 +1331,7 @@ lua-time-limit 5000 # * cluster-announce-port # * cluster-announce-bus-port # -# Each instruct the node about its address, client port, and cluster message +# Each instructs the node about its address, client port, and cluster message # bus port. The information is then published in the header of the bus packets # so that other nodes will be able to correctly map the address of the node # publishing the information. @@ -1382,7 +1342,7 @@ lua-time-limit 5000 # Note that when remapped, the bus port may not be at the fixed offset of # clients port + 10000, so you can specify any port and bus-port depending # on how they get remapped. If the bus-port is not set, a fixed offset of -# 10000 will be used as usually. +# 10000 will be used as usual. # # Example: # @@ -1485,61 +1445,6 @@ latency-monitor-threshold 0 # specify at least one of K or E, no events will be delivered. notify-keyspace-events "" -############################### GOPHER SERVER ################################# - -# KeyDB contains an implementation of the Gopher protocol, as specified in -# the RFC 1436 (https://www.ietf.org/rfc/rfc1436.txt). -# -# The Gopher protocol was very popular in the late '90s. It is an alternative -# to the web, and the implementation both server and client side is so simple -# that the KeyDB server has just 100 lines of code in order to implement this -# support. -# -# What do you do with Gopher nowadays? Well Gopher never *really* died, and -# lately there is a movement in order for the Gopher more hierarchical content -# composed of just plain text documents to be resurrected. Some want a simpler -# internet, others believe that the mainstream internet became too much -# controlled, and it's cool to create an alternative space for people that -# want a bit of fresh air. -# -# Anyway for the 10nth birthday of the KeyDB, we gave it the Gopher protocol -# as a gift. -# -# --- HOW IT WORKS? --- -# -# The KeyDB Gopher support uses the inline protocol of KeyDB, and specifically -# two kind of inline requests that were anyway illegal: an empty request -# or any request that starts with "/" (there are no KeyDB commands starting -# with such a slash). Normal RESP2/RESP3 requests are completely out of the -# path of the Gopher protocol implementation and are served as usually as well. -# -# If you open a connection to KeyDB when Gopher is enabled and send it -# a string like "/foo", if there is a key named "/foo" it is served via the -# Gopher protocol. -# -# In order to create a real Gopher "hole" (the name of a Gopher site in Gopher -# talking), you likely need a script like the following: -# -# https://github.com/antirez/gopher2redis -# -# --- SECURITY WARNING --- -# -# If you plan to put KeyDB on the internet in a publicly accessible address -# to server Gopher pages MAKE SURE TO SET A PASSWORD to the instance. -# Once a password is set: -# -# 1. The Gopher server (when enabled, not by default) will still serve -# content via Gopher. -# 2. However other commands cannot be called before the client will -# authenticate. -# -# So use the 'requirepass' option to protect your instance. -# -# To enable Gopher support uncomment the following line and set -# the option from no (the default) to yes. -# -# gopher-enabled no - ############################### ADVANCED CONFIG ############################### # Hashes are encoded using a memory efficient data structure when they have a @@ -1683,7 +1588,7 @@ client-output-buffer-limit pubsub 32mb 8mb 60 # client-query-buffer-limit 1gb # In the KeyDB protocol, bulk requests, that are, elements representing single -# strings, are normally limited ot 512 mb. However you can change this limit +# strings, are normally limited to 512 mb. However you can change this limit # here, but must be 1mb or greater # # proto-max-bulk-len 512mb @@ -1712,7 +1617,7 @@ hz 10 # # Since the default HZ value by default is conservatively set to 10, KeyDB # offers, and enables by default, the ability to use an adaptive HZ value -# which will temporary raise when there are many connected clients. +# which will temporarily raise when there are many connected clients. # # When dynamic HZ is enabled, the actual configured HZ will be used # as a baseline, but multiples of the configured HZ value will be actually @@ -1779,7 +1684,7 @@ rdb-save-incremental-fsync yes # for the key counter to be divided by two (or decremented if it has a value # less <= 10). # -# The default value for the lfu-decay-time is 1. A Special value of 0 means to +# The default value for the lfu-decay-time is 1. A special value of 0 means to # decay the counter every time it happens to be scanned. # # lfu-log-factor 10 @@ -1799,7 +1704,7 @@ rdb-save-incremental-fsync yes # restart is needed in order to lower the fragmentation, or at least to flush # away all the data and create it again. However thanks to this feature # implemented by Oran Agra for Redis 4.0 this process can happen at runtime -# in an "hot" way, while the server is running. +# in a "hot" way, while the server is running. # # Basically when the fragmentation is over a certain level (see the # configuration options below) KeyDB will start to create new copies of the @@ -1852,14 +1757,14 @@ rdb-save-incremental-fsync yes # Jemalloc background thread for purging will be enabled by default jemalloc-bg-thread yes -# It is possible to pin different threads and processes of Redis to specific +# It is possible to pin different threads and processes of KeyDB to specific # CPUs in your system, in order to maximize the performances of the server. -# This is useful both in order to pin different Redis threads in different -# CPUs, but also in order to make sure that multiple Redis instances running +# This is useful both in order to pin different KeyDB threads in different +# CPUs, but also in order to make sure that multiple KeyDB instances running # in the same host will be pinned to different CPUs. # # Normally you can do this using the "taskset" command, however it is also -# possible to this via Redis configuration directly, both in Linux and FreeBSD. +# possible to this via KeyDB configuration directly, both in Linux and FreeBSD. # # You can pin the server/IO threads, bio threads, aof rewrite child process, and # the bgsave child process. The syntax to specify the cpu list is the same as @@ -1877,6 +1782,13 @@ jemalloc-bg-thread yes # Set bgsave child process to cpu affinity 1,10,11 # bgsave_cpulist 1,10-11 +# In some cases KeyDB will emit warnings and even refuse to start if it detects +# that the system is in bad state, it is possible to suppress these warnings +# by setting the following config which takes a space delimited list of warnings +# to suppress +# +# ignore-warnings ARM64-COW-BUG + # The minimum number of clients on a thread before KeyDB assigns new connections to a different thread # Tuning this parameter is a tradeoff between locking overhead and distributing the workload over multiple cores # min-clients-per-thread 50 @@ -1903,6 +1815,8 @@ jemalloc-bg-thread yes # Number of worker threads serving requests. This number should be related to the performance # of your network hardware, not the number of cores on your machine. We don't recommend going # above 4 at this time. By default this is set 1. +# +# Note: KeyDB does not use io-threads, but io-threads is a config alias for server-threads server-threads 2 # Should KeyDB pin threads to CPUs? By default this is disabled, and KeyDB will not bind threads. diff --git a/runtest-moduleapi b/runtest-moduleapi index 8b2a12806..268506160 100755 --- a/runtest-moduleapi +++ b/runtest-moduleapi @@ -28,4 +28,5 @@ $TCLSH tests/test_helper.tcl \ --single unit/moduleapi/keyspace_events \ --single unit/moduleapi/blockedclient \ --single unit/moduleapi/moduleloadsave \ +--single unit/moduleapi/getkeys \ "${@}" diff --git a/sentinel.conf b/sentinel.conf index 7cec3c356..2ec6717d5 100644 --- a/sentinel.conf +++ b/sentinel.conf @@ -259,6 +259,6 @@ sentinel deny-scripts-reconfig yes # SENTINEL SET can also be used in order to perform this configuration at runtime. # # In order to set a command back to its original name (undo the renaming), it -# is possible to just rename a command to itsef: +# is possible to just rename a command to itself: # # SENTINEL rename-command mymaster CONFIG CONFIG diff --git a/src/Makefile b/src/Makefile index 27dc5e4c5..3af99b3b4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -152,12 +152,21 @@ ifeq ($(uname_S),OpenBSD) endif else +ifeq ($(uname_S),NetBSD) + # NetBSD + FINAL_LIBS+= -lpthread + ifeq ($(USE_BACKTRACE),yes) + FINAL_CFLAGS+= -DUSE_BACKTRACE -I/usr/pkg/include + FINAL_LDFLAGS+= -L/usr/pkg/lib + FINAL_LIBS+= -lexecinfo + endif +else ifeq ($(uname_S),FreeBSD) # FreeBSD FINAL_LIBS+= -lpthread -lexecinfo else ifeq ($(uname_S),DragonFly) - # FreeBSD + # DragonFly FINAL_LIBS+= -lpthread -lexecinfo else ifeq ($(uname_S),OpenBSD) @@ -167,6 +176,12 @@ else ifeq ($(uname_S),NetBSD) # NetBSD FINAL_LIBS+= -lpthread -lexecinfo +else +ifeq ($(uname_S),Haiku) + # Haiku + FINAL_CFLAGS+= -DBSD_SOURCE + FINAL_LDFLAGS+= -lbsd -lnetwork + FINAL_LIBS+= -lpthread else # All the other OSes (notably Linux) FINAL_LDFLAGS+= -rdynamic @@ -184,6 +199,8 @@ endif endif endif endif +endif +endif # Include paths to dependencies FINAL_CFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src FINAL_CXXFLAGS+= -I../deps/hiredis -I../deps/linenoise -I../deps/lua/src @@ -277,15 +294,15 @@ QUIET_LINK = @printf ' %b %b\n' $(LINKCOLOR)LINK$(ENDCOLOR) $(BINCOLOR)$@$(EN QUIET_INSTALL = @printf ' %b %b\n' $(LINKCOLOR)INSTALL$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR); endif -REDIS_SERVER_NAME=keydb-server -REDIS_SENTINEL_NAME=keydb-sentinel +REDIS_SERVER_NAME=keydb-server$(PROG_SUFFIX) +REDIS_SENTINEL_NAME=keydb-sentinel$(PROG_SUFFIX) REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o t_nhash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o acl.o storage.o rdb-s3.o fastlock.o new.o tracking.o cron.o connection.o tls.o sha256.o motd.o timeout.o setcpuaffinity.o $(ASM_OBJ) -REDIS_CLI_NAME=keydb-cli +REDIS_CLI_NAME=keydb-cli$(PROG_SUFFIX) REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o redis-cli-cpphelper.o zmalloc.o release.o anet.o ae.o crcspeed.o crc64.o siphash.o crc16.o storage-lite.o fastlock.o new.o motd.o $(ASM_OBJ) -REDIS_BENCHMARK_NAME=keydb-benchmark +REDIS_BENCHMARK_NAME=keydb-benchmark$(PROG_SUFFIX) REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o siphash.o redis-benchmark.o storage-lite.o fastlock.o new.o $(ASM_OBJ) -REDIS_CHECK_RDB_NAME=keydb-check-rdb -REDIS_CHECK_AOF_NAME=keydb-check-aof +REDIS_CHECK_RDB_NAME=keydb-check-rdb$(PROG_SUFFIX) +REDIS_CHECK_AOF_NAME=keydb-check-aof$(PROG_SUFFIX) all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) @echo "" diff --git a/src/acl.cpp b/src/acl.cpp index a363b97eb..7f8ab74bd 100644 --- a/src/acl.cpp +++ b/src/acl.cpp @@ -300,7 +300,13 @@ void ACLFreeUserAndKillClients(user *u) { * it in non authenticated mode. */ c->puser = DefaultUser; c->authenticated = 0; - freeClientAsync(c); + /* We will write replies to this client later, so we can't + * close it directly even if async. */ + if (c == serverTL->current_client) { + c->flags |= CLIENT_CLOSE_AFTER_COMMAND; + } else { + freeClientAsync(c); + } } } ACLFreeUser(u); @@ -377,7 +383,7 @@ int ACLUserCanExecuteFutureCommands(user *u) { * zero, the user flag ALLCOMMANDS is cleared since it is no longer possible * to skip the command bit explicit test. */ void ACLSetUserCommandBit(user *u, unsigned long id, int value) { - uint64_t word, bit; + uint64_t word=0, bit=0; if (ACLGetCommandBitCoordinates(id,&word,&bit) == C_ERR) return; if (value) { u->allowed_commands[word] |= bit; @@ -472,21 +478,68 @@ sds ACLDescribeUserCommandRules(user *u) { ACLSetUser(fakeuser,"-@all",-1); } - /* Try to add or subtract each category one after the other. Often a - * single category will not perfectly match the set of commands into - * it, so at the end we do a final pass adding/removing the single commands - * needed to make the bitmap exactly match. */ - for (int j = 0; ACLCommandCategories[j].flag != 0; j++) { - unsigned long on, off; - ACLCountCategoryBitsForUser(u,&on,&off,ACLCommandCategories[j].name); - if ((additive && on > off) || (!additive && off > on)) { - sds op = sdsnewlen(additive ? "+@" : "-@", 2); - op = sdscat(op,ACLCommandCategories[j].name); - ACLSetUser(fakeuser,op,-1); - rules = sdscatsds(rules,op); - rules = sdscatlen(rules," ",1); - sdsfree(op); + /* Attempt to find a good approximation for categories and commands + * based on the current bits used, by looping over the category list + * and applying the best fit each time. Often a set of categories will not + * perfectly match the set of commands into it, so at the end we do a + * final pass adding/removing the single commands needed to make the bitmap + * exactly match. A temp user is maintained to keep track of categories + * already applied. */ + user tu = {0}; + user *tempuser = &tu; + + /* Keep track of the categories that have been applied, to prevent + * applying them twice. */ + char applied[sizeof(ACLCommandCategories)/sizeof(ACLCommandCategories[0])]; + memset(applied, 0, sizeof(applied)); + + memcpy(tempuser->allowed_commands, + u->allowed_commands, + sizeof(u->allowed_commands)); + while (1) { + int best = -1; + unsigned long mindiff = INT_MAX, maxsame = 0; + for (int j = 0; ACLCommandCategories[j].flag != 0; j++) { + if (applied[j]) continue; + + unsigned long on, off, diff, same; + ACLCountCategoryBitsForUser(tempuser,&on,&off,ACLCommandCategories[j].name); + /* Check if the current category is the best this loop: + * * It has more commands in common with the user than commands + * that are different. + * AND EITHER + * * It has the fewest number of differences + * than the best match we have found so far. + * * OR it matches the fewest number of differences + * that we've seen but it has more in common. */ + diff = additive ? off : on; + same = additive ? on : off; + if (same > diff && + ((diff < mindiff) || (diff == mindiff && same > maxsame))) + { + best = j; + mindiff = diff; + maxsame = same; + } } + + /* We didn't find a match */ + if (best == -1) break; + + sds op = sdsnewlen(additive ? "+@" : "-@", 2); + op = sdscat(op,ACLCommandCategories[best].name); + ACLSetUser(fakeuser,op,-1); + + sds invop = sdsnewlen(additive ? "-@" : "+@", 2); + invop = sdscat(invop,ACLCommandCategories[best].name); + ACLSetUser(tempuser,invop,-1); + + rules = sdscatsds(rules,op); + rules = sdscatlen(rules," ",1); + sdsfree(op); + sdsfree(invop); + + applied[best] = 1; } /* Fix the final ACLs with single commands differences. */ @@ -670,8 +723,8 @@ void ACLAddAllowedSubcommand(user *u, unsigned long id, const char *sub) { * - Disallow the execution of that command * +@ Allow the execution of all the commands in such category * with valid categories are like @admin, @set, @sortedset, ... - * and so forth, see the full list in the server.c file where - * the Redis command table is described and defined. + * and so forth, see the full list in the server.cpp file where + * the KeyDB command table is described and defined. * The special category @all means all the commands, but currently * present in the server, and that will be loaded in the future * via modules. @@ -1099,8 +1152,9 @@ int ACLCheckCommandPerm(client *c, int *keyidxptr) { if (!(c->puser->flags & USER_FLAG_ALLKEYS) && (c->cmd->getkeys_proc || c->cmd->firstkey)) { - int numkeys; - int *keyidx = getKeysFromCommand(c->cmd,c->argv,c->argc,&numkeys); + getKeysResult result = GETKEYS_RESULT_INIT; + int numkeys = getKeysFromCommand(c->cmd,c->argv,c->argc,&result); + int *keyidx = result.keys; for (int j = 0; j < numkeys; j++) { listIter li; listNode *ln; @@ -1121,11 +1175,11 @@ int ACLCheckCommandPerm(client *c, int *keyidxptr) { } if (!match) { if (keyidxptr) *keyidxptr = keyidx[j]; - getKeysFreeResult(keyidx); + getKeysFreeResult(&result); return ACL_DENIED_KEY; } } - getKeysFreeResult(keyidx); + getKeysFreeResult(&result); } /* If we survived all the above checks, the user can execute the diff --git a/src/adlist.c b/src/adlist.c index 7b7b012ce..6d5d77fb3 100644 --- a/src/adlist.c +++ b/src/adlist.c @@ -34,8 +34,9 @@ #include "zmalloc.h" /* Create a new list. The created list can be freed with - * AlFreeList(), but private value of every node need to be freed - * by the user before to call AlFreeList(). + * listRelease(), but private value of every node need to be freed + * by the user before to call listRelease(), or by setting a free method using + * listSetFreeMethod. * * On error, NULL is returned. Otherwise the pointer to the new list. */ list *listCreate(void) @@ -217,8 +218,8 @@ void listRewindTail(list *list, listIter *li) { * listDelNode(), but not to remove other elements. * * The function returns a pointer to the next element of the list, - * or NULL if there are no more elements, so the classical usage patter - * is: + * or NULL if there are no more elements, so the classical usage + * pattern is: * * iter = listGetIterator(list,); * while ((node = listNext(iter)) != NULL) { diff --git a/src/ae_evport.c b/src/ae_evport.c index 7b7fbe28e..744e1a6bb 100644 --- a/src/ae_evport.c +++ b/src/ae_evport.c @@ -232,7 +232,7 @@ static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { /* * ENOMEM is a potentially transient condition, but the kernel won't * generally return it unless things are really bad. EAGAIN indicates - * we've reached an resource limit, for which it doesn't make sense to + * we've reached a resource limit, for which it doesn't make sense to * retry (counter-intuitively). All other errors indicate a bug. In any * of these cases, the best we can do is to abort. */ diff --git a/src/aof.cpp b/src/aof.cpp index 34feea237..48ac1ac4d 100644 --- a/src/aof.cpp +++ b/src/aof.cpp @@ -566,7 +566,7 @@ sds catAppendOnlyGenericCommand(sds dst, int argc, robj **argv) { return dst; } -/* Create the sds representation of an PEXPIREAT command, using +/* Create the sds representation of a PEXPIREAT command, using * 'seconds' as time to live and 'cmd' to understand what command * we are translating into a PEXPIREAT. * @@ -752,6 +752,7 @@ struct client *createAOFClient(void) { c->querybuf_peak = 0; c->argc = 0; c->argv = NULL; + c->argv_len_sum = 0; c->bufpos = 0; c->flags = 0; c->fPendingAsyncWrite = FALSE; @@ -781,6 +782,7 @@ void freeFakeClientArgv(struct client *c) { for (j = 0; j < c->argc; j++) decrRefCount(c->argv[j]); zfree(c->argv); + c->argv_len_sum = 0; } void freeFakeClient(struct client *c) { @@ -1159,7 +1161,7 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) { } } else if (o->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = (zset*)ptrFromObj(o); - dictIterator *di = dictGetIterator(zs->pdict); + dictIterator *di = dictGetIterator(zs->dict); dictEntry *de; while((de = dictNext(di)) != NULL) { @@ -1292,16 +1294,24 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { * the ID, the second is an array of field-value pairs. */ /* Emit the XADD ...fields... command. */ - if (rioWriteBulkCount(r,'*',3+numfields*2) == 0) return 0; - if (rioWriteBulkString(r,"XADD",4) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - if (rioWriteBulkStreamID(r,&id) == 0) return 0; + if (!rioWriteBulkCount(r,'*',3+numfields*2) || + !rioWriteBulkString(r,"XADD",4) || + !rioWriteBulkObject(r,key) || + !rioWriteBulkStreamID(r,&id)) + { + streamIteratorStop(&si); + return 0; + } while(numfields--) { unsigned char *field, *value; int64_t field_len, value_len; streamIteratorGetField(&si,&field,&value,&field_len,&value_len); - if (rioWriteBulkString(r,(char*)field,field_len) == 0) return 0; - if (rioWriteBulkString(r,(char*)value,value_len) == 0) return 0; + if (!rioWriteBulkString(r,(char*)field,field_len) || + !rioWriteBulkString(r,(char*)value,value_len)) + { + streamIteratorStop(&si); + return 0; + } } } } else { @@ -1309,22 +1319,30 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { * the key we are serializing is an empty string, which is possible * for the Stream type. */ id.ms = 0; id.seq = 1; - if (rioWriteBulkCount(r,'*',7) == 0) return 0; - if (rioWriteBulkString(r,"XADD",4) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - if (rioWriteBulkString(r,"MAXLEN",6) == 0) return 0; - if (rioWriteBulkString(r,"0",1) == 0) return 0; - if (rioWriteBulkStreamID(r,&id) == 0) return 0; - if (rioWriteBulkString(r,"x",1) == 0) return 0; - if (rioWriteBulkString(r,"y",1) == 0) return 0; + if (!rioWriteBulkCount(r,'*',7) || + !rioWriteBulkString(r,"XADD",4) || + !rioWriteBulkObject(r,key) || + !rioWriteBulkString(r,"MAXLEN",6) || + !rioWriteBulkString(r,"0",1) || + !rioWriteBulkStreamID(r,&id) || + !rioWriteBulkString(r,"x",1) || + !rioWriteBulkString(r,"y",1)) + { + streamIteratorStop(&si); + return 0; + } } /* Append XSETID after XADD, make sure lastid is correct, * in case of XDEL lastid. */ - if (rioWriteBulkCount(r,'*',3) == 0) return 0; - if (rioWriteBulkString(r,"XSETID",6) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - if (rioWriteBulkStreamID(r,&s->last_id) == 0) return 0; + if (!rioWriteBulkCount(r,'*',3) || + !rioWriteBulkString(r,"XSETID",6) || + !rioWriteBulkObject(r,key) || + !rioWriteBulkStreamID(r,&s->last_id)) + { + streamIteratorStop(&si); + return 0; + } /* Create all the stream consumer groups. */ @@ -1343,6 +1361,7 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { !rioWriteBulkStreamID(r,&group->last_id)) { raxStop(&ri); + streamIteratorStop(&si); return 0; } @@ -1368,6 +1387,7 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { raxStop(&ri_pel); raxStop(&ri_cons); raxStop(&ri); + streamIteratorStop(&si); return 0; } } @@ -1422,7 +1442,7 @@ int rewriteAppendOnlyFileRio(rio *aof) { for (j = 0; j < cserver.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; redisDb *db = g_pserver->db+j; - dict *d = db->pdict; + dict *d = db->dict; if (dictSize(d) == 0) continue; di = dictGetSafeIterator(d); @@ -1509,7 +1529,7 @@ werr: * are inserted using a single command. */ int rewriteAppendOnlyFile(char *filename) { rio aof; - FILE *fp; + FILE *fp = NULL; char tmpfile[256]; char byte; int nodata = 0; @@ -1587,9 +1607,10 @@ int rewriteAppendOnlyFile(char *filename) { goto werr; /* Make sure data will not remain on the OS's output buffers */ - if (fflush(fp) == EOF) goto werr; - if (fsync(fileno(fp)) == -1) goto werr; - if (fclose(fp) == EOF) goto werr; + if (fflush(fp)) goto werr; + if (fsync(fileno(fp))) goto werr; + if (fclose(fp)) { fp = NULL; goto werr; } + fp = NULL; /* Use RENAME to make sure the DB file is changed atomically only * if the generate DB file is ok. */ @@ -1605,7 +1626,7 @@ int rewriteAppendOnlyFile(char *filename) { werr: serverLog(LL_WARNING,"Write error writing append only file on disk: %s", strerror(errno)); - fclose(fp); + if (fp) fclose(fp); unlink(tmpfile); stopSaving(0); return C_ERR; @@ -1719,7 +1740,7 @@ int rewriteAppendOnlyFileBackground(void) { if (hasActiveChildProcess()) return C_ERR; if (aofCreatePipes() != C_OK) return C_ERR; openChildInfoPipe(); - if ((childpid = redisFork()) == 0) { + if ((childpid = redisFork(CHILD_TYPE_AOF)) == 0) { char tmpfile[256]; /* Child */ @@ -1727,7 +1748,7 @@ int rewriteAppendOnlyFileBackground(void) { redisSetCpuAffinity(g_pserver->aof_rewrite_cpulist); snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); if (rewriteAppendOnlyFile(tmpfile) == C_OK) { - sendChildCOWInfo(CHILD_INFO_TYPE_AOF, "AOF rewrite"); + sendChildCOWInfo(CHILD_TYPE_AOF, "AOF rewrite"); exitFromChild(0); } else { exitFromChild(1); @@ -1747,6 +1768,7 @@ int rewriteAppendOnlyFileBackground(void) { g_pserver->aof_rewrite_scheduled = 0; g_pserver->aof_rewrite_time_start = time(NULL); g_pserver->aof_child_pid = childpid; + updateDictResizePolicy(); /* We set appendseldb to -1 in order to force the next call to the * feedAppendOnlyFile() to issue a SELECT command, so the differences * accumulated by the parent into g_pserver->aof_rewrite_buf will start @@ -1776,10 +1798,10 @@ void aofRemoveTempFile(pid_t childpid) { char tmpfile[256]; snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid); - unlink(tmpfile); + bg_unlink(tmpfile); snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) childpid); - unlink(tmpfile); + bg_unlink(tmpfile); } /* Update the g_pserver->aof_current_size field explicitly using stat(2) @@ -1934,7 +1956,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { "Background AOF rewrite terminated with error"); } else { /* SIGUSR1 is whitelisted, so we have a way to kill a child without - * tirggering an error condition. */ + * triggering an error condition. */ if (bysignal != SIGUSR1) g_pserver->aof_lastbgrewrite_status = C_ERR; diff --git a/src/atomicvar.h b/src/atomicvar.h index 160056cd7..ecd26ad70 100644 --- a/src/atomicvar.h +++ b/src/atomicvar.h @@ -21,7 +21,7 @@ * * Never use return value from the macros, instead use the AtomicGetIncr() * if you need to get the current value and increment it atomically, like - * in the followign example: + * in the following example: * * long oldvalue; * atomicGetIncr(myvar,oldvalue,1); diff --git a/src/bio.cpp b/src/bio.cpp index b3e9d0927..2c28e57a0 100644 --- a/src/bio.cpp +++ b/src/bio.cpp @@ -168,10 +168,7 @@ void *bioProcessBackgroundJobs(void *arg) { redisSetCpuAffinity(g_pserver->bio_cpulist); - /* Make the thread killable at any time, so that bioKillThreads() - * can work reliably. */ - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); + makeThreadKillable(); pthread_mutex_lock(&bio_mutex[type]); /* Block SIGALRM so we are sure that only the main thread will @@ -206,7 +203,7 @@ void *bioProcessBackgroundJobs(void *arg) { /* What we free changes depending on what arguments are set: * arg1 -> free the object at pointer. * arg2 & arg3 -> free two dictionaries (a Redis DB). - * only arg3 -> free the skiplist. */ + * only arg3 -> free the radix tree. */ if (job->arg1) lazyfreeFreeObjectFromBioThread((robj*)job->arg1); else if (job->arg2 && job->arg3) @@ -268,10 +265,11 @@ void bioKillThreads(void) { int err, j; for (j = 0; j < BIO_NUM_OPS; j++) { + if (bio_threads[j] == pthread_self()) continue; if (bio_threads[j] && pthread_cancel(bio_threads[j]) == 0) { if ((err = pthread_join(bio_threads[j],NULL)) != 0) { serverLog(LL_WARNING, - "Bio thread for job type #%d can be joined: %s", + "Bio thread for job type #%d can not be joined: %s", j, strerror(err)); } else { serverLog(LL_WARNING, diff --git a/src/bitops.cpp b/src/bitops.cpp index 44094497b..8b26114db 100644 --- a/src/bitops.cpp +++ b/src/bitops.cpp @@ -36,7 +36,7 @@ /* Count number of bits set in the binary array pointed by 's' and long * 'count' bytes. The implementation of this function is required to - * work with a input string length up to 512 MB. */ + * work with an input string length up to 512 MB. */ size_t redisPopcount(const void *s, long count) { size_t bits = 0; unsigned char *p = (unsigned char*)s; @@ -107,7 +107,7 @@ long redisBitpos(const void *s, unsigned long count, int bit) { int found; /* Process whole words first, seeking for first word that is not - * all ones or all zeros respectively if we are lookig for zeros + * all ones or all zeros respectively if we are looking for zeros * or ones. This is much faster with large strings having contiguous * blocks of 1 or 0 bits compared to the vanilla bit per bit processing. * @@ -498,7 +498,7 @@ robj *lookupStringForBitCommand(client *c, size_t maxbit) { * in 'len'. The user is required to pass (likely stack allocated) buffer * 'llbuf' of at least LONG_STR_SIZE bytes. Such a buffer is used in the case * the object is integer encoded in order to provide the representation - * without usign heap allocation. + * without using heap allocation. * * The function returns the pointer to the object array of bytes representing * the string it contains, that may be a pointer to 'llbuf' or to the diff --git a/src/blocked.cpp b/src/blocked.cpp index 34c1d60ef..ecde109dc 100644 --- a/src/blocked.cpp +++ b/src/blocked.cpp @@ -53,7 +53,7 @@ * to 0, no timeout is processed). * It usually just needs to send a reply to the client. * - * When implementing a new type of blocking opeation, the implementation + * When implementing a new type of blocking operation, the implementation * should modify unblockClient() and replyToBlockedClientTimedOut() in order * to handle the btype-specific behavior of this two functions. * If the blocking operation waits for certain keys to change state, the @@ -128,7 +128,7 @@ void processUnblockedClients(int iel) { /* This function will schedule the client for reprocessing at a safe time. * - * This is useful when a client was blocked for some reason (blocking opeation, + * This is useful when a client was blocked for some reason (blocking operation, * CLIENT PAUSE, or whatever), because it may end with some accumulated query * buffer that needs to be processed ASAP: * @@ -522,7 +522,7 @@ void handleClientsBlockedOnKeys(void) { serverTL->fixed_time_expire++; updateCachedTime(0); - /* Serve clients blocked on list key. */ + /* Serve clients blocked on the key. */ robj *o = lookupKeyWrite(rl->db,rl->key); if (o != NULL) { diff --git a/src/childinfo.cpp b/src/childinfo.cpp index 66ad8b8fd..77900ac11 100644 --- a/src/childinfo.cpp +++ b/src/childinfo.cpp @@ -76,11 +76,11 @@ void receiveChildInfo(void) { if (read(g_pserver->child_info_pipe[0],&g_pserver->child_info_data,wlen) == wlen && g_pserver->child_info_data.magic == CHILD_INFO_MAGIC) { - if (g_pserver->child_info_data.process_type == CHILD_INFO_TYPE_RDB) { + if (g_pserver->child_info_data.process_type == CHILD_TYPE_RDB) { g_pserver->stat_rdb_cow_bytes = g_pserver->child_info_data.cow_size; - } else if (g_pserver->child_info_data.process_type == CHILD_INFO_TYPE_AOF) { + } else if (g_pserver->child_info_data.process_type == CHILD_TYPE_AOF) { g_pserver->stat_aof_cow_bytes = g_pserver->child_info_data.cow_size; - } else if (g_pserver->child_info_data.process_type == CHILD_INFO_TYPE_MODULE) { + } else if (g_pserver->child_info_data.process_type == CHILD_TYPE_MODULE) { g_pserver->stat_module_cow_bytes = g_pserver->child_info_data.cow_size; } } diff --git a/src/cluster.cpp b/src/cluster.cpp index b146b1011..e60807180 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -77,6 +77,9 @@ uint64_t clusterGetMaxEpoch(void); int clusterBumpConfigEpochWithoutConsensus(void); void moduleCallClusterReceivers(const char *sender_id, uint64_t module_id, uint8_t type, const unsigned char *payload, uint32_t len); +#define RCVBUF_INIT_LEN 1024 +#define RCVBUF_MAX_PREALLOC (1<<20) /* 1MB */ + struct redisMaster *getFirstMaster() { serverAssert(listLength(g_pserver->masters) <= 1); @@ -394,7 +397,7 @@ void clusterSaveConfigOrDie(int do_fsync) { } } -/* Lock the cluster config using flock(), and leaks the file descritor used to +/* Lock the cluster config using flock(), and leaks the file descriptor used to * acquire the lock so that the file will be locked forever. * * This works because we always update nodes.conf with a new version @@ -566,13 +569,13 @@ void clusterInit(void) { /* Reset a node performing a soft or hard reset: * - * 1) All other nodes are forget. + * 1) All other nodes are forgotten. * 2) All the assigned / open slots are released. * 3) If the node is a slave, it turns into a master. - * 5) Only for hard reset: a new Node ID is generated. - * 6) Only for hard reset: currentEpoch and configEpoch are set to 0. - * 7) The new configuration is saved and the cluster state updated. - * 8) If the node was a slave, the whole data set is flushed away. */ + * 4) Only for hard reset: a new Node ID is generated. + * 5) Only for hard reset: currentEpoch and configEpoch are set to 0. + * 6) The new configuration is saved and the cluster state updated. + * 7) If the node was a slave, the whole data set is flushed away. */ void clusterReset(int hard) { dictIterator *di; dictEntry *de; @@ -639,7 +642,8 @@ clusterLink *createClusterLink(clusterNode *node) { clusterLink *link = (clusterLink*)zmalloc(sizeof(*link), MALLOC_LOCAL); link->ctime = mstime(); link->sndbuf = sdsempty(); - link->rcvbuf = sdsempty(); + link->rcvbuf = (char*)zmalloc(link->rcvbuf_alloc = RCVBUF_INIT_LEN); + link->rcvbuf_len = 0; link->node = node; link->conn = NULL; return link; @@ -666,7 +670,7 @@ void freeClusterLink(clusterLink *link) { link->conn = NULL; } sdsfree(link->sndbuf); - sdsfree(link->rcvbuf); + zfree(link->rcvbuf); if (link->node) link->node->link = NULL; zfree(link); @@ -684,7 +688,7 @@ static void clusterConnAcceptHandler(connection *conn) { /* Create a link object we use to handle the connection. * It gets passed to the readable handler when data is available. - * Initiallly the link->node pointer is set to NULL as we don't know + * Initially the link->node pointer is set to NULL as we don't know * which node is, but the right node is references once we know the * node identity. */ link = createClusterLink(NULL); @@ -1098,7 +1102,7 @@ uint64_t clusterGetMaxEpoch(void) { * 3) Persist the configuration on disk before sending packets with the * new configuration. * - * If the new config epoch is generated and assigend, C_OK is returned, + * If the new config epoch is generated and assigned, C_OK is returned, * otherwise C_ERR is returned (since the node has already the greatest * configuration around) and no operation is performed. * @@ -1171,7 +1175,7 @@ int clusterBumpConfigEpochWithoutConsensus(void) { * * In general we want a system that eventually always ends with different * masters having different configuration epochs whatever happened, since - * nothign is worse than a split-brain condition in a distributed system. + * nothing is worse than a split-brain condition in a distributed system. * * BEHAVIOR * @@ -1230,7 +1234,7 @@ void clusterHandleConfigEpochCollision(clusterNode *sender) { * entries from the black list. This is an O(N) operation but it is not a * problem since add / exists operations are called very infrequently and * the hash table is supposed to contain very little elements at max. - * However without the cleanup during long uptimes and with some automated + * However without the cleanup during long uptime and with some automated * node add/removal procedures, entries could accumulate. */ void clusterBlacklistCleanup(void) { dictIterator *di; @@ -1384,12 +1388,12 @@ int clusterHandshakeInProgress(char *ip, int port, int cport) { return de != NULL; } -/* Start an handshake with the specified address if there is not one +/* Start a handshake with the specified address if there is not one * already in progress. Returns non-zero if the handshake was actually * started. On error zero is returned and errno is set to one of the * following values: * - * EAGAIN - There is already an handshake in progress for this address. + * EAGAIN - There is already a handshake in progress for this address. * EINVAL - IP or port are not valid. */ int clusterStartHandshake(char *ip, int port, int cport) { clusterNode *n; @@ -1770,7 +1774,7 @@ int clusterProcessPacket(clusterLink *link) { /* Perform sanity checks */ if (totlen < 16) return 1; /* At least signature, version, totlen, count. */ - if (totlen > sdslen(link->rcvbuf)) return 1; + if (totlen > link->rcvbuf_len) return 1; if (ntohs(hdr->ver) != CLUSTER_PROTO_VER) { /* Can't handle messages of different versions. */ @@ -1835,7 +1839,7 @@ int clusterProcessPacket(clusterLink *link) { if (sender) sender->data_received = now; if (sender && !nodeInHandshake(sender)) { - /* Update our curretEpoch if we see a newer epoch in the cluster. */ + /* Update our currentEpoch if we see a newer epoch in the cluster. */ senderCurrentEpoch = ntohu64(hdr->currentEpoch); senderConfigEpoch = ntohu64(hdr->configEpoch); if (senderCurrentEpoch > g_pserver->cluster->currentEpoch) @@ -2327,7 +2331,7 @@ void clusterReadHandler(connection *conn) { unsigned int readlen, rcvbuflen; while(1) { /* Read as long as there is data to read. */ - rcvbuflen = sdslen(link->rcvbuf); + rcvbuflen = link->rcvbuf_len; if (rcvbuflen < 8) { /* First, obtain the first 8 bytes to get the full message * length. */ @@ -2363,7 +2367,15 @@ void clusterReadHandler(connection *conn) { return; } else { /* Read data and recast the pointer to the new buffer. */ - link->rcvbuf = sdscatlen(link->rcvbuf,buf,nread); + size_t unused = link->rcvbuf_alloc - link->rcvbuf_len; + if ((size_t)nread > unused) { + size_t required = link->rcvbuf_len + nread; + /* If less than 1mb, grow to twice the needed size, if larger grow by 1mb. */ + link->rcvbuf_alloc = required < RCVBUF_MAX_PREALLOC ? required * 2: required + RCVBUF_MAX_PREALLOC; + link->rcvbuf = (char*)zrealloc(link->rcvbuf, link->rcvbuf_alloc); + } + memcpy(link->rcvbuf + link->rcvbuf_len, buf, nread); + link->rcvbuf_len += nread; hdr = (clusterMsg*) link->rcvbuf; rcvbuflen += nread; } @@ -2371,8 +2383,11 @@ void clusterReadHandler(connection *conn) { /* Total length obtained? Process this packet. */ if (rcvbuflen >= 8 && rcvbuflen == ntohl(hdr->totlen)) { if (clusterProcessPacket(link)) { - sdsfree(link->rcvbuf); - link->rcvbuf = sdsempty(); + if (link->rcvbuf_alloc > RCVBUF_INIT_LEN) { + zfree(link->rcvbuf); + link->rcvbuf = (char*)zmalloc(link->rcvbuf_alloc = RCVBUF_INIT_LEN); + } + link->rcvbuf_len = 0; } else { return; /* Link no longer valid. */ } @@ -2453,7 +2468,7 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) { * first byte is zero, they'll do auto discovery. */ memset(hdr->myip,0,NET_IP_STR_LEN); if (g_pserver->cluster_announce_ip) { - strncpy(hdr->myip,g_pserver->cluster_announce_ip,NET_IP_STR_LEN); + strncpy(hdr->myip,g_pserver->cluster_announce_ip,NET_IP_STR_LEN-1); hdr->myip[NET_IP_STR_LEN-1] = '\0'; } @@ -2530,7 +2545,7 @@ void clusterSetGossipEntry(clusterMsg *hdr, int i, clusterNode *n) { } /* Send a PING or PONG packet to the specified node, making sure to add enough - * gossip informations. */ + * gossip information. */ void clusterSendPing(clusterLink *link, int type) { unsigned char *buf; clusterMsg *hdr; @@ -2550,7 +2565,7 @@ void clusterSendPing(clusterLink *link, int type) { * node_timeout we exchange with each other node at least 4 packets * (we ping in the worst case in node_timeout/2 time, and we also * receive two pings from the host), we have a total of 8 packets - * in the node_timeout*2 falure reports validity time. So we have + * in the node_timeout*2 failure reports validity time. So we have * that, for a single PFAIL node, we can expect to receive the following * number of failure reports (in the specified window of time): * @@ -2577,7 +2592,7 @@ void clusterSendPing(clusterLink *link, int type) { * faster to propagate to go from PFAIL to FAIL state. */ int pfail_wanted = g_pserver->cluster->stats_pfail_nodes; - /* Compute the maxium totlen to allocate our buffer. We'll fix the totlen + /* Compute the maximum totlen to allocate our buffer. We'll fix the totlen * later according to the number of gossip sections we really were able * to put inside the packet. */ totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); @@ -2614,7 +2629,7 @@ void clusterSendPing(clusterLink *link, int type) { if (thisNode->flags & (CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_NOADDR) || (thisNode->link == NULL && thisNode->numslots == 0)) { - freshnodes--; /* Tecnically not correct, but saves CPU. */ + freshnodes--; /* Technically not correct, but saves CPU. */ continue; } @@ -3199,7 +3214,7 @@ void clusterHandleSlaveFailover(void) { } } - /* If the previous failover attempt timedout and the retry time has + /* If the previous failover attempt timeout and the retry time has * elapsed, we can setup a new one. */ if (auth_age > auth_retry_time) { g_pserver->cluster->failover_auth_time = mstime() + @@ -3305,7 +3320,7 @@ void clusterHandleSlaveFailover(void) { * * Slave migration is the process that allows a slave of a master that is * already covered by at least another slave, to "migrate" to a master that - * is orpaned, that is, left with no working slaves. + * is orphaned, that is, left with no working slaves. * ------------------------------------------------------------------------- */ /* This function is responsible to decide if this replica should be migrated @@ -3322,7 +3337,7 @@ void clusterHandleSlaveFailover(void) { * the nodes anyway, so we spend time into clusterHandleSlaveMigration() * if definitely needed. * - * The fuction is called with a pre-computed max_slaves, that is the max + * The function is called with a pre-computed max_slaves, that is the max * number of working (not in FAIL state) slaves for a single master. * * Additional conditions for migration are examined inside the function. @@ -3441,7 +3456,7 @@ void clusterHandleSlaveMigration(int max_slaves) { * data loss due to the asynchronous master-slave replication. * -------------------------------------------------------------------------- */ -/* Reset the manual failover state. This works for both masters and slavesa +/* Reset the manual failover state. This works for both masters and slaves * as all the state about manual failover is cleared. * * The function can be used both to initialize the manual failover state at @@ -3527,7 +3542,7 @@ void clusterCron(void) { * duplicating the string. This way later we can check if * the address really changed. */ prev_ip = zstrdup(prev_ip); - strncpy(myself->ip,g_pserver->cluster_announce_ip,NET_IP_STR_LEN); + strncpy(myself->ip,g_pserver->cluster_announce_ip,NET_IP_STR_LEN-1); myself->ip[NET_IP_STR_LEN-1] = '\0'; } else { myself->ip[0] = '\0'; /* Force autodetection. */ @@ -3733,7 +3748,7 @@ void clusterCron(void) { replicationAddMaster(myself->slaveof->ip, myself->slaveof->port); } - /* Abourt a manual failover if the timeout is reached. */ + /* Abort a manual failover if the timeout is reached. */ manualFailoverCheckTimeout(); if (nodeIsSlave(myself)) { @@ -3838,12 +3853,12 @@ int clusterNodeSetSlotBit(clusterNode *n, int slot) { * target for replicas migration, if and only if at least one of * the other masters has slaves right now. * - * Normally masters are valid targerts of replica migration if: + * Normally masters are valid targets of replica migration if: * 1. The used to have slaves (but no longer have). * 2. They are slaves failing over a master that used to have slaves. * * However new masters with slots assigned are considered valid - * migration tagets if the rest of the cluster is not a slave-less. + * migration targets if the rest of the cluster is not a slave-less. * * See https://github.com/antirez/redis/issues/3043 for more info. */ if (n->numslots == 1 && clusterMastersHaveSlaves()) @@ -4027,7 +4042,7 @@ void clusterUpdateState(void) { * A) If no other node is in charge according to the current cluster * configuration, we add these slots to our node. * B) If according to our config other nodes are already in charge for - * this lots, we set the slots as IMPORTING from our point of view + * this slots, we set the slots as IMPORTING from our point of view * in order to justify we have those slots, and in order to make * keydb-trib aware of the issue, so that it can try to fix it. * 2) If we find data in a DB different than DB0 we return C_ERR to @@ -4056,7 +4071,7 @@ int verifyClusterConfigWithData(void) { /* Make sure we only have keys in DB0. */ for (j = 1; j < cserver.dbnum; j++) { - if (dictSize(g_pserver->db[j].pdict)) return C_ERR; + if (dictSize(g_pserver->db[j].dict)) return C_ERR; } /* Check that all the slots we see populated memory have a corresponding @@ -4437,7 +4452,7 @@ NULL clusterReplyMultiBulkSlots(c); } else if (!strcasecmp(szFromObj(c->argv[1]),"flushslots") && c->argc == 2) { /* CLUSTER FLUSHSLOTS */ - if (dictSize(g_pserver->db[0].pdict) != 0) { + if (dictSize(g_pserver->db[0].dict) != 0) { addReplyError(c,"DB must be empty to perform CLUSTER FLUSHSLOTS."); return; } @@ -4557,7 +4572,7 @@ NULL } /* If this slot is in migrating status but we have no keys * for it assigning the slot to another node will clear - * the migratig status. */ + * the migrating status. */ if (countKeysInSlot(slot) == 0 && g_pserver->cluster->migrating_slots_to[slot]) g_pserver->cluster->migrating_slots_to[slot] = NULL; @@ -4770,7 +4785,7 @@ NULL * slots nor keys to accept to replicate some other node. * Slaves can switch to another master without issues. */ if (nodeIsMaster(myself) && - (myself->numslots != 0 || dictSize(g_pserver->db[0].pdict) != 0)) { + (myself->numslots != 0 || dictSize(g_pserver->db[0].dict) != 0)) { addReplyError(c, "To set a master the node must be empty and " "without assigned slots."); @@ -4902,7 +4917,7 @@ NULL g_pserver->cluster->currentEpoch = epoch; /* No need to fsync the config here since in the unlucky event * of a failure to persist the config, the conflict resolution code - * will assign an unique config to this node. */ + * will assign a unique config to this node. */ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE| CLUSTER_TODO_SAVE_CONFIG); addReply(c,shared.ok); @@ -4927,7 +4942,7 @@ NULL /* Slaves can be reset while containing data, but not master nodes * that must be empty. */ - if (nodeIsMaster(myself) && dictSize(c->db->pdict) != 0) { + if (nodeIsMaster(myself) && dictSize(c->db->dict) != 0) { addReplyError(c,"CLUSTER RESET can't be called with " "master nodes containing keys"); return; @@ -4950,7 +4965,7 @@ void createDumpPayload(rio *payload, robj_roptr o, robj *key) { unsigned char buf[2]; uint64_t crc; - /* Serialize the object in a RDB-like format. It consist of an object type + /* Serialize the object in an RDB-like format. It consist of an object type * byte followed by the serialized object. This is understood by RESTORE. */ rioInitWithBuffer(payload,sdsempty()); serverAssert(rdbSaveObjectType(payload,o)); @@ -5665,7 +5680,7 @@ void readwriteCommand(client *c) { * resharding in progress). * * On success the function returns the node that is able to serve the request. - * If the node is not 'myself' a redirection must be perfomed. The kind of + * If the node is not 'myself' a redirection must be performed. The kind of * redirection is specified setting the integer passed by reference * 'error_code', which will be set to CLUSTER_REDIR_ASK or * CLUSTER_REDIR_MOVED. @@ -5743,7 +5758,10 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in margc = ms->commands[i].argc; margv = ms->commands[i].argv; - keyindex = getKeysFromCommand(mcmd,margv,margc,&numkeys); + getKeysResult result = GETKEYS_RESULT_INIT; + numkeys = getKeysFromCommand(mcmd,margv,margc,&result); + keyindex = result.keys; + for (j = 0; j < numkeys; j++) { robj *thiskey = margv[keyindex[j]]; int thisslot = keyHashSlot((char*)ptrFromObj(thiskey), @@ -5761,7 +5779,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in * not trapped earlier in processCommand(). Report the same * error to the client. */ if (n == NULL) { - getKeysFreeResult(keyindex); + getKeysFreeResult(&result); if (error_code) *error_code = CLUSTER_REDIR_DOWN_UNBOUND; return NULL; @@ -5785,7 +5803,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in if (!equalStringObjects(firstkey,thiskey)) { if (slot != thisslot) { /* Error: multiple keys from different slots. */ - getKeysFreeResult(keyindex); + getKeysFreeResult(&result); if (error_code) *error_code = CLUSTER_REDIR_CROSS_SLOT; return NULL; @@ -5797,14 +5815,14 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in } } - /* Migarting / Improrting slot? Count keys we don't have. */ + /* Migrating / Importing slot? Count keys we don't have. */ if ((migrating_slot || importing_slot) && lookupKeyRead(&g_pserver->db[0],thiskey) == nullptr) { missing_keys++; } } - getKeysFreeResult(keyindex); + getKeysFreeResult(&result); } /* No key at all in command? then we can serve the request @@ -5866,7 +5884,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in } /* Handle the read-only client case reading from a slave: if this - * node is a slave and the request is about an hash slot our master + * node is a slave and the request is about a hash slot our master * is serving, we can reply without redirection. */ int is_readonly_command = (c->cmd->flags & CMD_READONLY) || (c->cmd->proc == execCommand && !(c->mstate.cmd_inv_flags & CMD_READONLY)); @@ -5880,7 +5898,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in } /* Base case: just return the right node. However if this node is not - * myself, set error_code to MOVED since we need to issue a rediretion. */ + * myself, set error_code to MOVED since we need to issue a redirection. */ if (n != myself && error_code) *error_code = CLUSTER_REDIR_MOVED; return n; } @@ -5926,7 +5944,7 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co * 3) The client may remain blocked forever (or up to the max timeout time) * waiting for a key change that will never happen. * - * If the client is found to be blocked into an hash slot this node no + * If the client is found to be blocked into a hash slot this node no * longer handles, the client is sent a redirection error, and the function * returns 1. Otherwise 0 is returned and no operation is performed. */ int clusterRedirectBlockedClientIfNeeded(client *c) { @@ -5955,6 +5973,15 @@ int clusterRedirectBlockedClientIfNeeded(client *c) { int slot = keyHashSlot((char*)ptrFromObj(key), sdslen(szFromObj(key))); clusterNode *node = g_pserver->cluster->slots[slot]; + /* if the client is read-only and attempting to access key that our + * replica can handle, allow it. */ + if ((c->flags & CLIENT_READONLY) && + (c->lastcmd->flags & CMD_READONLY) && + nodeIsSlave(myself) && myself->slaveof == node) + { + node = myself; + } + /* We send an error and unblock the client if: * 1) The slot is unassigned, emitting a cluster down error. * 2) The slot is not handled by this node, nor being imported. */ diff --git a/src/cluster.h b/src/cluster.h index dad5694cd..6dfe318e2 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -42,7 +42,9 @@ typedef struct clusterLink { mstime_t ctime; /* Link creation time */ connection *conn; /* Connection to remote node */ sds sndbuf; /* Packet send buffer */ - sds rcvbuf; /* Packet reception buffer */ + char *rcvbuf; /* Packet reception buffer */ + size_t rcvbuf_len; /* Used size of rcvbuf */ + size_t rcvbuf_alloc; /* Used size of rcvbuf */ struct clusterNode *node; /* Node related to this link if any, or NULL */ } clusterLink; @@ -55,8 +57,8 @@ typedef struct clusterLink { #define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */ #define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */ #define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */ -#define CLUSTER_NODE_MIGRATE_TO 256 /* Master elegible for replica migration. */ -#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failver. */ +#define CLUSTER_NODE_MIGRATE_TO 256 /* Master eligible for replica migration. */ +#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failover. */ #define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" #define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER) @@ -168,10 +170,10 @@ typedef struct clusterState { clusterNode *mf_slave; /* Slave performing the manual failover. */ /* Manual failover state of slave. */ long long mf_master_offset; /* Master offset the slave needs to start MF - or zero if stil not received. */ + or zero if still not received. */ int mf_can_start; /* If non-zero signal that the manual failover can start requesting masters vote. */ - /* The followign fields are used by masters to take state on elections. */ + /* The following fields are used by masters to take state on elections. */ uint64_t lastVoteEpoch; /* Epoch of the last vote granted. */ int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */ /* Messages received and sent by type. */ diff --git a/src/config.cpp b/src/config.cpp index 66fc8afeb..1237c1d43 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -106,6 +106,15 @@ configEnum tls_auth_clients_enum[] = { {"optional", TLS_CLIENT_AUTH_OPTIONAL}, {NULL, 0} }; + +configEnum oom_score_adj_enum[] = { + {"no", OOM_SCORE_ADJ_NO}, + {"yes", OOM_SCORE_RELATIVE}, + {"relative", OOM_SCORE_RELATIVE}, + {"absolute", OOM_SCORE_ADJ_ABSOLUTE}, + {NULL, 0} +}; + /* Output buffer limits presets. */ clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = { {0, 0, 0}, /* normal */ @@ -302,7 +311,7 @@ void queueLoadModule(sds path, sds *argv, int argc) { * g_pserver->oom_score_adj_values if valid. */ -static int updateOOMScoreAdjValues(sds *args, const char **err) { +static int updateOOMScoreAdjValues(sds *args, const char **err, int apply) { int i; int values[CONFIG_OOM_COUNT]; @@ -310,8 +319,8 @@ static int updateOOMScoreAdjValues(sds *args, const char **err) { char *eptr; long long val = strtoll(args[i], &eptr, 10); - if (*eptr != '\0' || val < -1000 || val > 1000) { - if (err) *err = "Invalid oom-score-adj-values, elements must be between -1000 and 1000."; + if (*eptr != '\0' || val < -2000 || val > 2000) { + if (err) *err = "Invalid oom-score-adj-values, elements must be between -2000 and 2000."; return C_ERR; } @@ -335,6 +344,10 @@ static int updateOOMScoreAdjValues(sds *args, const char **err) { old_values[i] = g_pserver->oom_score_adj_values[i]; g_pserver->oom_score_adj_values[i] = values[i]; } + + /* When parsing the config file, we want to apply only when all is done. */ + if (!apply) + return C_OK; /* Update */ if (setOOMScoreAdj(-1) == C_ERR) { @@ -473,7 +486,30 @@ void loadServerConfigFromString(char *config) { } else if ((!strcasecmp(argv[0],"slaveof") || !strcasecmp(argv[0],"replicaof")) && argc == 3) { slaveof_linenum = linenum; - replicationAddMaster(argv[1], atoi(argv[2])); + if (!strcasecmp(argv[1], "no") && !strcasecmp(argv[2], "one")) { + if (listLength(g_pserver->masters)) { + listIter li; + listNode *ln; + listRewind(g_pserver->masters, &li); + while ((ln = listNext(&li))) + { + struct redisMaster *mi = (struct redisMaster*)listNodeValue(ln); + zfree(mi->masterauth); + zfree(mi->masteruser); + zfree(mi->repl_transfer_tmpfile); + delete mi->staleKeyMap; + zfree(mi); + listDelNode(g_pserver->masters, ln); + } + } + continue; + } + char *ptr; + int port = strtol(argv[2], &ptr, 10); + if (port < 0 || port > 65535 || *ptr != '\0') { + err= "Invalid master port"; goto loaderr; + } + replicationAddMaster(argv[1], port); } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) { if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) { err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN"; @@ -484,11 +520,16 @@ void loadServerConfigFromString(char *config) { * additionally is to remember the cleartext password in this * case, for backward compatibility with Redis <= 5. */ ACLSetUser(DefaultUser,"resetpass",-1); - sds aclop = sdscatprintf(sdsempty(),">%s",argv[1]); - ACLSetUser(DefaultUser,aclop,sdslen(aclop)); - sdsfree(aclop); sdsfree(g_pserver->requirepass); - g_pserver->requirepass = sdsnew(argv[1]); + g_pserver->requirepass = NULL; + if (sdslen(argv[1])) { + sds aclop = sdscatprintf(sdsempty(),">%s",argv[1]); + ACLSetUser(DefaultUser,aclop,sdslen(aclop)); + sdsfree(aclop); + g_pserver->requirepass = sdsnew(argv[1]); + } else { + ACLSetUser(DefaultUser,"nopass",-1); + } } else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){ /* DEAD OPTION */ } else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2) { @@ -543,7 +584,7 @@ void loadServerConfigFromString(char *config) { cserver.client_obuf_limits[type].soft_limit_bytes = soft; cserver.client_obuf_limits[type].soft_limit_seconds = soft_seconds; } else if (!strcasecmp(argv[0],"oom-score-adj-values") && argc == 1 + CONFIG_OOM_COUNT) { - if (updateOOMScoreAdjValues(&argv[1], &err) == C_ERR) goto loaderr; + if (updateOOMScoreAdjValues(&argv[1], &err, 0) == C_ERR) goto loaderr; } else if (!strcasecmp(argv[0],"notify-keyspace-events") && argc == 2) { int flags = keyspaceEventsStringToFlags(argv[1]); @@ -751,11 +792,16 @@ void configSetCommand(client *c) { * additionally is to remember the cleartext password in this * case, for backward compatibility with Redis <= 5. */ ACLSetUser(DefaultUser,"resetpass",-1); - sds aclop = sdscatprintf(sdsempty(),">%s",(char*)ptrFromObj(o)); - ACLSetUser(DefaultUser,aclop,sdslen(aclop)); - sdsfree(aclop); sdsfree(g_pserver->requirepass); - g_pserver->requirepass = sdsnew(szFromObj(o)); + g_pserver->requirepass = NULL; + if (sdslen(szFromObj(o))) { + sds aclop = sdscatprintf(sdsempty(),">%s",(char*)ptrFromObj(o)); + ACLSetUser(DefaultUser,aclop,sdslen(aclop)); + sdsfree(aclop); + g_pserver->requirepass = sdsnew(szFromObj(o)); + } else { + ACLSetUser(DefaultUser,"nopass",-1); + } } config_set_special_field("save") { int vlen, j; sds *v = sdssplitlen(szFromObj(o),sdslen(szFromObj(o))," ",1,&vlen); @@ -845,7 +891,7 @@ void configSetCommand(client *c) { int success = 1; sds *v = sdssplitlen(szFromObj(o), sdslen(szFromObj(o)), " ", 1, &vlen); - if (vlen != CONFIG_OOM_COUNT || updateOOMScoreAdjValues(v, &errstr) == C_ERR) + if (vlen != CONFIG_OOM_COUNT || updateOOMScoreAdjValues(v, &errstr, 1) == C_ERR) success = 0; sdsfreesplitres(v, vlen); @@ -1356,7 +1402,7 @@ void rewriteConfigNumericalOption(struct rewriteConfigState *state, const char * rewriteConfigRewriteLine(state,option,line,force); } -/* Rewrite a octal option. */ +/* Rewrite an octal option. */ void rewriteConfigOctalOption(struct rewriteConfigState *state, const char *option, int value, int defvalue) { int force = value != defvalue; sds line = sdscatprintf(sdsempty(),"%s %o",option,value); @@ -1381,6 +1427,12 @@ void rewriteConfigSaveOption(struct rewriteConfigState *state) { int j; sds line; + /* In Sentinel mode we don't need to rewrite the save parameters */ + if (g_pserver->sentinel_mode) { + rewriteConfigMarkAsProcessed(state,"save"); + return; + } + /* Note that if there are no save parameters at all, all the current * config line with "save" will be detected as orphaned and deleted, * resulting into no RDB persistence as expected. */ @@ -1628,60 +1680,62 @@ void rewriteConfigRemoveOrphaned(struct rewriteConfigState *state) { dictReleaseIterator(di); } -/* This function overwrites the old configuration file with the new content. - * - * 1) The old file length is obtained. - * 2) If the new content is smaller, padding is added. - * 3) A single write(2) call is used to replace the content of the file. - * 4) Later the file is truncated to the length of the new content. - * - * This way we are sure the file is left in a consistent state even if the - * process is stopped between any of the four operations. +/* This function replaces the old configuration file with the new content + * in an atomic manner. * * The function returns 0 on success, otherwise -1 is returned and errno - * set accordingly. */ + * is set accordingly. */ int rewriteConfigOverwriteFile(char *configfile, sds content) { - int retval = 0; - int fd = open(configfile,O_RDWR|O_CREAT,0644); - int content_size = sdslen(content), padding = 0; - struct stat sb; - sds content_padded; + int fd = -1; + int retval = -1; + char tmp_conffile[PATH_MAX]; + const char *tmp_suffix = ".XXXXXX"; + size_t offset = 0; + ssize_t written_bytes = 0; - /* 1) Open the old file (or create a new one if it does not - * exist), get the size. */ - if (fd == -1) return -1; /* errno set by open(). */ - if (fstat(fd,&sb) == -1) { - close(fd); - return -1; /* errno set by fstat(). */ + int tmp_path_len = snprintf(tmp_conffile, sizeof(tmp_conffile), "%s%s", configfile, tmp_suffix); + if (tmp_path_len <= 0 || (unsigned int)tmp_path_len >= sizeof(tmp_conffile)) { + serverLog(LL_WARNING, "Config file full path is too long"); + errno = ENAMETOOLONG; + return retval; } - /* 2) Pad the content at least match the old file size. */ - content_padded = sdsdup(content); - if (content_size < sb.st_size) { - /* If the old file was bigger, pad the content with - * a newline plus as many "#" chars as required. */ - padding = sb.st_size - content_size; - content_padded = sdsgrowzero(content_padded,sb.st_size); - content_padded[content_size] = '\n'; - memset(content_padded+content_size+1,'#',padding-1); +#ifdef _GNU_SOURCE + fd = mkostemp(tmp_conffile, O_CLOEXEC); +#else + /* There's a theoretical chance here to leak the FD if a module thread forks & execv in the middle */ + fd = mkstemp(tmp_conffile); +#endif + + if (fd == -1) { + serverLog(LL_WARNING, "Could not create tmp config file (%s)", strerror(errno)); + return retval; } - /* 3) Write the new content using a single write(2). */ - if (write(fd,content_padded,strlen(content_padded)) == -1) { - retval = -1; - goto cleanup; + while (offset < sdslen(content)) { + written_bytes = write(fd, content + offset, sdslen(content) - offset); + if (written_bytes <= 0) { + if (errno == EINTR) continue; /* FD is blocking, no other retryable errors */ + serverLog(LL_WARNING, "Failed after writing (%zd) bytes to tmp config file (%s)", offset, strerror(errno)); + goto cleanup; + } + offset+=written_bytes; } - /* 4) Truncate the file to the right length if we used padding. */ - if (padding) { - if (ftruncate(fd,content_size) == -1) { - /* Non critical error... */ - } + if (fsync(fd)) + serverLog(LL_WARNING, "Could not sync tmp config file to disk (%s)", strerror(errno)); + else if (fchmod(fd, 0644) == -1) + serverLog(LL_WARNING, "Could not chmod config file (%s)", strerror(errno)); + else if (rename(tmp_conffile, configfile) == -1) + serverLog(LL_WARNING, "Could not rename tmp config file (%s)", strerror(errno)); + else { + retval = 0; + serverLog(LL_DEBUG, "Rewritten config file (%s) successfully", configfile); } cleanup: - sdsfree(content_padded); close(fd); + if (retval) unlink(tmp_conffile); return retval; } @@ -1890,7 +1944,7 @@ static int enumConfigSet(typeData data, sds value, int update, const char **err) } sdsrange(enumerr,0,-3); /* Remove final ", ". */ - strncpy(loadbuf, enumerr, LOADBUF_SIZE); + strncpy(loadbuf, enumerr, LOADBUF_SIZE-1); loadbuf[LOADBUF_SIZE - 1] = '\0'; sdsfree(enumerr); @@ -2195,7 +2249,7 @@ static int isValidAOFfilename(char *val, const char **err) { static int updateHZ(long long val, long long prev, const char **err) { UNUSED(prev); UNUSED(err); - /* Hz is more an hint from the user, so we accept values out of range + /* Hz is more a hint from the user, so we accept values out of range * but cap them to reasonable values. */ g_pserver->config_hz = val; if (g_pserver->config_hz < CONFIG_MIN_HZ) g_pserver->config_hz = CONFIG_MIN_HZ; @@ -2213,7 +2267,7 @@ static int updateJemallocBgThread(int val, int prev, const char **err) { static int updateReplBacklogSize(long long val, long long prev, const char **err) { /* resizeReplicationBacklog sets g_pserver->repl_backlog_size, and relies on - * being able to tell when the size changes, so restore prev becore calling it. */ + * being able to tell when the size changes, so restore prev before calling it. */ UNUSED(err); g_pserver->repl_backlog_size = prev; resizeReplicationBacklog(val); @@ -2403,7 +2457,6 @@ standardConfig configs[] = { createBoolConfig("multi-master-no-forward", NULL, MODIFIABLE_CONFIG, cserver.multimaster_no_forward, 0, validateMultiMasterNoForward, NULL), createBoolConfig("allow-write-during-load", NULL, MODIFIABLE_CONFIG, g_pserver->fWriteDuringActiveLoad, 0, NULL, NULL), createBoolConfig("io-threads-do-reads", NULL, IMMUTABLE_CONFIG, fDummy, 0, NULL, NULL), - createBoolConfig("oom-score-adj", NULL, MODIFIABLE_CONFIG, g_pserver->oom_score_adj, 0, NULL, updateOOMScoreAdj), /* String Configs */ createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, g_pserver->acl_filename, "", NULL, NULL), @@ -2420,6 +2473,7 @@ standardConfig configs[] = { createStringConfig("bio_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->bio_cpulist, NULL, NULL, NULL), createStringConfig("aof_rewrite_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->aof_rewrite_cpulist, NULL, NULL, NULL), createStringConfig("bgsave_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->bgsave_cpulist, NULL, NULL, NULL), + createStringConfig("ignore-warnings", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, g_pserver->ignore_warnings, "ARM64-COW-BUG", NULL, NULL), /* Enum Configs */ createEnumConfig("supervised", NULL, IMMUTABLE_CONFIG, supervised_mode_enum, cserver.supervised_mode, SUPERVISED_NONE, NULL, NULL), @@ -2428,6 +2482,7 @@ standardConfig configs[] = { createEnumConfig("loglevel", NULL, MODIFIABLE_CONFIG, loglevel_enum, cserver.verbosity, LL_NOTICE, NULL, NULL), createEnumConfig("maxmemory-policy", NULL, MODIFIABLE_CONFIG, maxmemory_policy_enum, g_pserver->maxmemory_policy, MAXMEMORY_NO_EVICTION, NULL, NULL), createEnumConfig("appendfsync", NULL, MODIFIABLE_CONFIG, aof_fsync_enum, g_pserver->aof_fsync, AOF_FSYNC_EVERYSEC, NULL, NULL), + createEnumConfig("oom-score-adj", NULL, MODIFIABLE_CONFIG, oom_score_adj_enum, g_pserver->oom_score_adj, OOM_SCORE_ADJ_NO, NULL, updateOOMScoreAdj), /* Integer configs */ createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, cserver.dbnum, 16, INTEGER_CONFIG, NULL, NULL), diff --git a/src/config.h b/src/config.h index b0ffe908e..51fe6e253 100644 --- a/src/config.h +++ b/src/config.h @@ -64,7 +64,7 @@ /* Test for backtrace() */ #if defined(__APPLE__) || (defined(__linux__) && defined(__GLIBC__)) || \ - defined(__FreeBSD__) || (defined(__OpenBSD__) && defined(USE_BACKTRACE))\ + defined(__FreeBSD__) || ((defined(__OpenBSD__) || defined(__NetBSD__)) && defined(USE_BACKTRACE))\ || defined(__DragonFly__) #define HAVE_BACKTRACE 1 #endif @@ -124,6 +124,10 @@ #define USE_SETPROCTITLE #endif +#if defined(__HAIKU__) +#define ESOCKTNOSUPPORT 0 +#endif + #if ((defined __linux && defined(__GLIBC__)) || defined __APPLE__) #define USE_SETPROCTITLE #define INIT_SETPROCTITLE_REPLACEMENT @@ -172,7 +176,7 @@ void setproctitle(const char *fmt, ...); #endif /* BYTE_ORDER */ /* Sometimes after including an OS-specific header that defines the - * endianess we end with __BYTE_ORDER but not with BYTE_ORDER that is what + * endianness we end with __BYTE_ORDER but not with BYTE_ORDER that is what * the Redis code uses. In this case let's define everything without the * underscores. */ #ifndef BYTE_ORDER @@ -242,7 +246,7 @@ void setproctitle(const char *fmt, ...); #define redis_set_thread_title(name) pthread_set_name_np(pthread_self(), name) #elif defined __NetBSD__ #include -#define redis_set_thread_title(name) pthread_setname_np(pthread_self(), name, NULL) +#define redis_set_thread_title(name) pthread_setname_np(pthread_self(), "%s", name) #else #if (defined __APPLE__ && defined(MAC_OS_X_VERSION_10_7)) #ifdef __cplusplus @@ -258,7 +262,7 @@ int pthread_setname_np(const char *name); #endif /* Check if we can use setcpuaffinity(). */ -#if (defined __linux || defined __NetBSD__ || defined __FreeBSD__) +#if (defined __linux || defined __NetBSD__ || defined __FreeBSD__ || defined __DragonFly__) #define USE_SETCPUAFFINITY #ifdef __cplusplus extern "C" diff --git a/src/connection.cpp b/src/connection.cpp index bb3106a7e..8ba75264e 100644 --- a/src/connection.cpp +++ b/src/connection.cpp @@ -168,7 +168,12 @@ static int connSocketWrite(connection *conn, const void *data, size_t data_len) int ret = write(conn->fd, data, data_len); if (ret < 0 && errno != EAGAIN) { conn->last_errno = errno; - conn->state.store(CONN_STATE_ERROR, std::memory_order_relaxed); + + /* Don't overwrite the state of a connection that is not already + * connected, not to mess with handler callbacks. + */ + ConnectionState expected = CONN_STATE_CONNECTED; + conn->state.compare_exchange_strong(expected, CONN_STATE_ERROR, std::memory_order_relaxed); } return ret; @@ -180,7 +185,12 @@ static int connSocketRead(connection *conn, void *buf, size_t buf_len) { conn->state.store(CONN_STATE_CLOSED, std::memory_order_release); } else if (ret < 0 && errno != EAGAIN) { conn->last_errno = errno; - conn->state.store(CONN_STATE_ERROR, std::memory_order_release); + + /* Don't overwrite the state of a connection that is not already + * connected, not to mess with handler callbacks. + */ + ConnectionState expected = CONN_STATE_CONNECTED; + conn->state.compare_exchange_strong(expected, CONN_STATE_ERROR, std::memory_order_release); } return ret; @@ -260,8 +270,9 @@ static void connSocketEventHandler(struct aeEventLoop *el, int fd, void *clientD if (conn->state.load(std::memory_order_relaxed) == CONN_STATE_CONNECTING && (mask & AE_WRITABLE) && conn->conn_handler) { - if (connGetSocketError(conn)) { - conn->last_errno = errno; + int conn_error = connGetSocketError(conn); + if (conn_error) { + conn->last_errno = conn_error; conn->state.store(CONN_STATE_ERROR, std::memory_order_release); } else { conn->state.store(CONN_STATE_CONNECTED, std::memory_order_release); diff --git a/src/connection.h b/src/connection.h index 18a13f3de..606137229 100644 --- a/src/connection.h +++ b/src/connection.h @@ -111,7 +111,7 @@ static inline int connAccept(connection *conn, ConnectionCallbackFunc accept_han } /* Establish a connection. The connect_handler will be called when the connection - * is established, or if an error has occured. + * is established, or if an error has occurred. * * The connection handler will be responsible to set up any read/write handlers * as needed. @@ -173,7 +173,7 @@ static inline int connSetReadHandler(connection *conn, ConnectionCallbackFunc fu /* Set a write handler, and possibly enable a write barrier, this flag is * cleared when write handler is changed or removed. - * With barroer enabled, we never fire the event if the read handler already + * With barrier enabled, we never fire the event if the read handler already * fired in the same event loop iteration. Useful when you want to persist * things to disk before sending replies, and want to do that in a group fashion. */ static inline int connSetWriteHandlerWithBarrier(connection *conn, ConnectionCallbackFunc func, int barrier, bool fThreadSafe = false) { @@ -241,6 +241,7 @@ int connSockName(connection *conn, char *ip, size_t ip_len, int *port); const char *connGetInfo(connection *conn, char *buf, size_t buf_len); /* Helpers for tls special considerations */ +sds connTLSGetPeerCert(connection *conn); int tlsHasPendingData(); int tlsProcessPendingData(); diff --git a/src/crcspeed.c b/src/crcspeed.c index d2d97a8c7..81a80ce8e 100644 --- a/src/crcspeed.c +++ b/src/crcspeed.c @@ -35,7 +35,8 @@ void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) { /* generate CRCs for all single byte sequences */ for (int n = 0; n < 256; n++) { - table[0][n] = crcfn(0, &n, 1); + unsigned char v = n; + table[0][n] = crcfn(0, &v, 1); } /* generate nested CRC table for future slice-by-8 lookup */ diff --git a/src/db.cpp b/src/db.cpp index 81e8a25af..de2ed2754 100644 --- a/src/db.cpp +++ b/src/db.cpp @@ -35,6 +35,13 @@ #include #include +/* Database backup. */ +struct dbBackup { + redisDb *dbarray; + rax *slots_to_keys; + uint64_t slots_keys_count[CLUSTER_SLOTS]; +}; + /*----------------------------------------------------------------------------- * C-level DB API *----------------------------------------------------------------------------*/ @@ -86,7 +93,7 @@ void updateDbValAccess(dictEntry *de, int flags) * implementations that should instead rely on lookupKeyRead(), * lookupKeyWrite() and lookupKeyReadWithFlags(). */ static robj *lookupKey(redisDb *db, robj *key, int flags) { - dictEntry *de = dictFind(db->pdict,ptrFromObj(key)); + dictEntry *de = dictFind(db->dict,ptrFromObj(key)); if (de) { robj *val = (robj*)dictGetVal(de); @@ -131,11 +138,8 @@ robj_roptr lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { /* Key expired. If we are in the context of a master, expireIfNeeded() * returns 0 only when the key does not exist at all, so it's safe * to return NULL ASAP. */ - if (listLength(g_pserver->masters) == 0) { - g_pserver->stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); - return NULL; - } + if (listLength(g_pserver->masters) == 0) + goto keymiss; /* However if we are in the context of a replica, expireIfNeeded() will * not really try to expire the key, it only returns information @@ -145,7 +149,7 @@ robj_roptr lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { * However, if the command caller is not the master, and as additional * safety measure, the command invoked is a read-only command, we can * safely return NULL here, and provide a more consistent behavior - * to clients accessign expired values in a read-only fashion, that + * to clients accessing expired values in a read-only fashion, that * will say the key as non existing. * * Notably this covers GETs when slaves are used to scale reads. */ @@ -154,19 +158,21 @@ robj_roptr lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { serverTL->current_client->cmd && serverTL->current_client->cmd->flags & CMD_READONLY) { - g_pserver->stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); - return NULL; + goto keymiss; } } val = lookupKey(db,key,flags); - if (val == NULL) { + if (val == NULL) + goto keymiss; + g_pserver->stat_keyspace_hits++; + return val; + +keymiss: + if (!(flags & LOOKUP_NONOTIFY)) { g_pserver->stat_keyspace_misses++; notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); } - else - g_pserver->stat_keyspace_hits++; - return val; + return NULL; } /* Like lookupKeyReadWithFlags(), but does not use any flag, which is the @@ -205,7 +211,7 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) { int dbAddCore(redisDb *db, robj *key, robj *val, bool fUpdateMvcc) { serverAssert(!val->FExpires()); sds copy = sdsdup(szFromObj(key)); - int retval = dictAdd(db->pdict, copy, val); + int retval = dictAdd(db->dict, copy, val); uint64_t mvcc = getMvccTstamp(); if (fUpdateMvcc) { setMvccTstamp(key, mvcc); @@ -263,14 +269,14 @@ void dbOverwriteCore(redisDb *db, dictEntry *de, robj *key, robj *val, bool fUpd setMvccTstamp(val, getMvccTstamp()); } - dictSetVal(db->pdict, de, val); + dictSetVal(db->dict, de, val); if (g_pserver->lazyfree_lazy_server_del) { freeObjAsync(old); - dictSetVal(db->pdict, &auxentry, NULL); + dictSetVal(db->dict, &auxentry, NULL); } - dictFreeVal(db->pdict, &auxentry); + dictFreeVal(db->dict, &auxentry); } /* Overwrite an existing key with a new value. Incrementing the reference @@ -279,7 +285,7 @@ void dbOverwriteCore(redisDb *db, dictEntry *de, robj *key, robj *val, bool fUpd * * The program is aborted if the key was not already present. */ void dbOverwrite(redisDb *db, robj *key, robj *val) { - dictEntry *de = dictFind(db->pdict,ptrFromObj(key)); + dictEntry *de = dictFind(db->dict,ptrFromObj(key)); serverAssertWithInfo(NULL,key,de != NULL); dbOverwriteCore(db, de, key, val, !!g_pserver->fActiveReplica, false); @@ -290,7 +296,7 @@ int dbMerge(redisDb *db, robj *key, robj *val, int fReplace) { if (fReplace) { - dictEntry *de = dictFind(db->pdict, ptrFromObj(key)); + dictEntry *de = dictFind(db->dict, ptrFromObj(key)); if (de == nullptr) return (dbAddCore(db, key, val, false /* fUpdateMvcc */) == DICT_OK); @@ -321,7 +327,7 @@ int dbMerge(redisDb *db, robj *key, robj *val, int fReplace) * The client 'c' argument may be set to NULL if the operation is performed * in a context where there is no clear client performing the operation. */ void genericSetKey(client *c, redisDb *db, robj *key, robj *val, int keepttl, int signal) { - dictEntry *de = dictFind(db->pdict, ptrFromObj(key)); + dictEntry *de = dictFind(db->dict, ptrFromObj(key)); if (de == NULL) { dbAdd(db,key,val); } else { @@ -340,7 +346,7 @@ void setKey(client *c, redisDb *db, robj *key, robj *val) { /* Return true if the specified key exists in the specified database. * LRU/LFU info is not updated in any way. */ int dbExists(redisDb *db, robj *key) { - return dictFind(db->pdict,ptrFromObj(key)) != NULL; + return dictFind(db->dict,ptrFromObj(key)) != NULL; } /* Return a random key, in form of a Redis object. @@ -350,13 +356,13 @@ int dbExists(redisDb *db, robj *key) { robj *dbRandomKey(redisDb *db) { dictEntry *de; int maxtries = 100; - int allvolatile = dictSize(db->pdict) == db->setexpire->size(); + int allvolatile = dictSize(db->dict) == db->setexpire->size(); while(1) { sds key; robj *keyobj; - de = dictGetRandomKey(db->pdict); + de = dictGetRandomKey(db->dict); if (de == NULL) return NULL; key = (sds)dictGetKey(de); @@ -394,10 +400,10 @@ int dbSyncDelete(redisDb *db, robj *key) { /* Deleting an entry from the expires dict will not free the sds of * the key, because it is shared with the main dictionary. */ - dictEntry *de = dictFind(db->pdict, szFromObj(key)); + dictEntry *de = dictFind(db->dict, szFromObj(key)); if (de != nullptr && ((robj*)dictGetVal(de))->FExpires()) removeExpireCore(db, key, de); - if (dictDelete(db->pdict,ptrFromObj(key)) == DICT_OK) { + if (dictDelete(db->dict,ptrFromObj(key)) == DICT_OK) { if (g_pserver->cluster_enabled) slotToKeyDel(szFromObj(key)); return 1; } else { @@ -450,48 +456,18 @@ robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) { return o; } -/* Remove all keys from all the databases in a Redis g_pserver-> - * If callback is given the function is called from time to time to - * signal that work is in progress. +/* Remove all keys from the database(s) structure. The dbarray argument + * may not be the server main DBs (could be a backup). * - * The dbnum can be -1 if all the DBs should be flushed, or the specified - * DB number if we want to flush only a single Redis database number. - * - * Flags are be EMPTYDB_NO_FLAGS if no special flags are specified or - * 1. EMPTYDB_ASYNC if we want the memory to be freed in a different thread. - * 2. EMPTYDB_BACKUP if we want to empty the backup dictionaries created by - * disklessLoadMakeBackups. In that case we only free memory and avoid - * firing module events. - * and the function to return ASAP. - * - * On success the fuction returns the number of keys removed from the - * database(s). Otherwise -1 is returned in the specific case the - * DB number is out of range, and errno is set to EINVAL. */ -long long emptyDbGeneric(redisDb *dbarray, int dbnum, int flags, void(callback)(void*)) { - int async = (flags & EMPTYDB_ASYNC); - int backup = (flags & EMPTYDB_BACKUP); /* Just free the memory, nothing else */ - RedisModuleFlushInfoV1 fi = {REDISMODULE_FLUSHINFO_VERSION,!async,dbnum}; + * The dbnum can be -1 if all the DBs should be emptied, or the specified + * DB index if we want to empty only a single database. + * The function returns the number of keys removed from the database(s). */ +long long emptyDbStructure(redisDb *dbarray, int dbnum, int async, + void(callback)(void*)) +{ long long removed = 0; - - if (dbnum < -1 || dbnum >= cserver.dbnum) { - errno = EINVAL; - return -1; - } - - /* Pre-flush actions */ - if (!backup) { - /* Fire the flushdb modules event. */ - moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB, - REDISMODULE_SUBEVENT_FLUSHDB_START, - &fi); - - /* Make sure the WATCHed keys are affected by the FLUSH* commands. - * Note that we need to call the function while the keys are still - * there. */ - signalFlushedDb(dbnum); - } - int startdb, enddb; + if (dbnum == -1) { startdb = 0; enddb = cserver.dbnum-1; @@ -500,38 +476,147 @@ long long emptyDbGeneric(redisDb *dbarray, int dbnum, int flags, void(callback)( } for (int j = startdb; j <= enddb; j++) { - removed += dictSize(dbarray[j].pdict); + removed += dictSize(dbarray[j].dict); if (async) { emptyDbAsync(&dbarray[j]); } else { - dictEmpty(dbarray[j].pdict,callback); + dictEmpty(dbarray[j].dict,callback); dbarray[j].setexpire->clear(); } - } - - /* Post-flush actions */ - if (!backup) { - if (g_pserver->cluster_enabled) { - if (async) { - slotToKeyFlushAsync(); - } else { - slotToKeyFlush(); - } - } - if (dbnum == -1) flushSlaveKeysWithExpireList(); - - /* Also fire the end event. Note that this event will fire almost - * immediately after the start event if the flush is asynchronous. */ - moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB, - REDISMODULE_SUBEVENT_FLUSHDB_END, - &fi); + /* Because all keys of database are removed, reset average ttl. */ + dbarray[j].avg_ttl = 0; + dbarray[j].last_expire_set = 0; } return removed; } +/* Remove all keys from all the databases in a Redis server. + * If callback is given the function is called from time to time to + * signal that work is in progress. + * + * The dbnum can be -1 if all the DBs should be flushed, or the specified + * DB number if we want to flush only a single Redis database number. + * + * Flags are be EMPTYDB_NO_FLAGS if no special flags are specified or + * EMPTYDB_ASYNC if we want the memory to be freed in a different thread + * and the function to return ASAP. + * + * On success the function returns the number of keys removed from the + * database(s). Otherwise -1 is returned in the specific case the + * DB number is out of range, and errno is set to EINVAL. */ long long emptyDb(int dbnum, int flags, void(callback)(void*)) { - return emptyDbGeneric(g_pserver->db, dbnum, flags, callback); + int async = (flags & EMPTYDB_ASYNC); + RedisModuleFlushInfoV1 fi = {REDISMODULE_FLUSHINFO_VERSION,!async,dbnum}; + long long removed = 0; + + if (dbnum < -1 || dbnum >= cserver.dbnum) { + errno = EINVAL; + return -1; + } + + /* Fire the flushdb modules event. */ + moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB, + REDISMODULE_SUBEVENT_FLUSHDB_START, + &fi); + + /* Make sure the WATCHed keys are affected by the FLUSH* commands. + * Note that we need to call the function while the keys are still + * there. */ + signalFlushedDb(dbnum); + + /* Empty redis database structure. */ + removed = emptyDbStructure(g_pserver->db, dbnum, async, callback); + + /* Flush slots to keys map if enable cluster, we can flush entire + * slots to keys map whatever dbnum because only support one DB + * in cluster mode. */ + if (g_pserver->cluster_enabled) slotToKeyFlush(async); + + if (dbnum == -1) flushSlaveKeysWithExpireList(); + + /* Also fire the end event. Note that this event will fire almost + * immediately after the start event if the flush is asynchronous. */ + moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB, + REDISMODULE_SUBEVENT_FLUSHDB_END, + &fi); + + return removed; +} + +/* Store a backup of the database for later use, and put an empty one + * instead of it. */ +dbBackup *backupDb(void) { + dbBackup *backup = (dbBackup*)zmalloc(sizeof(dbBackup)); + + /* Backup main DBs. */ + backup->dbarray = (redisDb*)zmalloc(sizeof(redisDb)*cserver.dbnum); + for (int i=0; idbarray[i] = g_pserver->db[i]; + g_pserver->db[i].dict = dictCreate(&dbDictType,NULL); + g_pserver->db[i].setexpire = new(MALLOC_LOCAL) expireset; + g_pserver->db[i].expireitr = g_pserver->db[i].setexpire->end(); + } + + /* Backup cluster slots to keys map if enable cluster. */ + if (g_pserver->cluster_enabled) { + backup->slots_to_keys = g_pserver->cluster->slots_to_keys; + memcpy(backup->slots_keys_count, g_pserver->cluster->slots_keys_count, + sizeof(g_pserver->cluster->slots_keys_count)); + g_pserver->cluster->slots_to_keys = raxNew(); + memset(g_pserver->cluster->slots_keys_count, 0, + sizeof(g_pserver->cluster->slots_keys_count)); + } + + return backup; +} + +/* Discard a previously created backup, this can be slow (similar to FLUSHALL) + * Arguments are similar to the ones of emptyDb, see EMPTYDB_ flags. */ +void discardDbBackup(dbBackup *buckup, int flags, void(callback)(void*)) { + int async = (flags & EMPTYDB_ASYNC); + + /* Release main DBs backup . */ + emptyDbStructure(buckup->dbarray, -1, async, callback); + for (int i=0; idbarray[i].dict); + delete buckup->dbarray[i].setexpire; + } + + /* Release slots to keys map backup if enable cluster. */ + if (g_pserver->cluster_enabled) freeSlotsToKeysMap(buckup->slots_to_keys, async); + + /* Release buckup. */ + zfree(buckup->dbarray); + zfree(buckup); +} + +/* Restore the previously created backup (discarding what currently resides + * in the db). + * This function should be called after the current contents of the database + * was emptied with a previous call to emptyDb (possibly using the async mode). */ +void restoreDbBackup(dbBackup *buckup) { + /* Restore main DBs. */ + for (int i=0; idb[i].dict) == 0); + serverAssert(g_pserver->db[i].setexpire->empty()); + dictRelease(g_pserver->db[i].dict); + delete g_pserver->db[i].setexpire; + g_pserver->db[i] = buckup->dbarray[i]; + } + + /* Restore slots to keys map backup if enable cluster. */ + if (g_pserver->cluster_enabled) { + serverAssert(g_pserver->cluster->slots_to_keys->numele == 0); + raxFree(g_pserver->cluster->slots_to_keys); + g_pserver->cluster->slots_to_keys = buckup->slots_to_keys; + memcpy(g_pserver->cluster->slots_keys_count, buckup->slots_keys_count, + sizeof(g_pserver->cluster->slots_keys_count)); + } + + /* Release buckup. */ + zfree(buckup->dbarray); + zfree(buckup); } int selectDb(client *c, int id) { @@ -545,7 +630,7 @@ long long dbTotalServerKeyCount() { long long total = 0; int j; for (j = 0; j < cserver.dbnum; j++) { - total += dictSize(g_pserver->db[j].pdict); + total += dictSize(g_pserver->db[j].dict); } return total; } @@ -567,7 +652,18 @@ void signalModifiedKey(client *c, redisDb *db, robj *key) { } void signalFlushedDb(int dbid) { - touchWatchedKeysOnFlush(dbid); + int startdb, enddb; + if (dbid == -1) { + startdb = 0; + enddb = cserver.dbnum-1; + } else { + startdb = enddb = dbid; + } + + for (int j = startdb; j <= enddb; j++) { + touchAllWatchedKeysInDb(&g_pserver->db[j], NULL); + } + trackingInvalidateKeysOnFlush(dbid); } @@ -682,7 +778,7 @@ void existsCommand(client *c) { int j; for (j = 1; j < c->argc; j++) { - if (lookupKeyRead(c->db,c->argv[j])) count++; + if (lookupKeyReadWithFlags(c->db,c->argv[j],LOOKUP_NOTOUCH)) count++; } addReplyLongLong(c,count); } @@ -732,7 +828,7 @@ void keysCommand(client *c) { unsigned long numkeys = 0; void *replylen = addReplyDeferredLen(c); - di = dictGetSafeIterator(c->db->pdict); + di = dictGetSafeIterator(c->db->dict); allkeys = (pattern[0] == '*' && plen == 1); while((de = dictNext(di)) != NULL) { sds key = (sds)dictGetKey(de); @@ -876,7 +972,7 @@ void scanGenericCommand(client *c, robj_roptr o, unsigned long cursor) { /* Handle the case of a hash table. */ ht = NULL; if (o == nullptr) { - ht = c->db->pdict; + ht = c->db->dict; } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) { ht = (dict*)ptrFromObj(o); } else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) { @@ -884,7 +980,7 @@ void scanGenericCommand(client *c, robj_roptr o, unsigned long cursor) { count *= 2; /* We return key / value for this type. */ } else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = (zset*)ptrFromObj(o); - ht = zs->pdict; + ht = zs->dict; count *= 2; /* We return key / value for this type. */ } @@ -963,7 +1059,7 @@ void scanGenericCommand(client *c, robj_roptr o, unsigned long cursor) { /* Filter element if it is an expired key. */ if (!filter && o == nullptr && expireIfNeeded(c->db, kobj)) filter = 1; - /* Remove the element and its associted value if needed. */ + /* Remove the element and its associated value if needed. */ if (filter) { decrRefCount(kobj); listDelNode(keys, node); @@ -1009,7 +1105,7 @@ void scanCommand(client *c) { } void dbsizeCommand(client *c) { - addReplyLongLong(c,dictSize(c->db->pdict)); + addReplyLongLong(c,dictSize(c->db->dict)); } void lastsaveCommand(client *c) { @@ -1222,23 +1318,16 @@ int dbSwapDatabases(long id1, long id2) { if (id1 < 0 || id1 >= cserver.dbnum || id2 < 0 || id2 >= cserver.dbnum) return C_ERR; if (id1 == id2) return C_OK; - redisDb aux = g_pserver->db[id1]; redisDb *db1 = &g_pserver->db[id1], *db2 = &g_pserver->db[id2]; /* Swap hash tables. Note that we don't swap blocking_keys, * ready_keys and watched_keys, since we want clients to * remain in the same DB they were. */ - db1->pdict = db2->pdict; - db1->setexpire = db2->setexpire; - db1->expireitr = db2->expireitr; - db1->avg_ttl = db2->avg_ttl; - db1->last_expire_set = db2->last_expire_set; - - db2->pdict = aux.pdict; - db2->setexpire = aux.setexpire; - db2->expireitr = aux.expireitr; - db2->avg_ttl = aux.avg_ttl; - db2->last_expire_set = aux.last_expire_set; + std::swap(db1->dict, db2->dict); + std::swap(db1->setexpire, db2->setexpire); + std::swap(db1->expireitr, db2->expireitr); + std::swap(db1->avg_ttl, db2->avg_ttl); + std::swap(db1->last_expire_set, db2->last_expire_set); /* Now we need to handle clients blocked on lists: as an effect * of swapping the two DBs, a client that was waiting for list @@ -1248,9 +1337,14 @@ int dbSwapDatabases(long id1, long id2) { * However normally we only do this check for efficiency reasons * in dbAdd() when a list is created. So here we need to rescan * the list of clients blocked on lists and signal lists as ready - * if needed. */ + * if needed. + * + * Also the swapdb should make transaction fail if there is any + * client watching keys */ scanDatabaseForReadyLists(db1); + touchAllWatchedKeysInDb(db1, db2); scanDatabaseForReadyLists(db2); + touchAllWatchedKeysInDb(db2, db1); return C_OK; } @@ -1278,6 +1372,8 @@ void swapdbCommand(client *c) { addReplyError(c,"DB index is out of range"); return; } else { + RedisModuleSwapDbInfo si = {REDISMODULE_SWAPDBINFO_VERSION,(int32_t)id1,(int32_t)id2}; + moduleFireServerEvent(REDISMODULE_EVENT_SWAPDB,0,&si); g_pserver->dirty++; addReply(c,shared.ok); } @@ -1287,7 +1383,7 @@ void swapdbCommand(client *c) { * Expires API *----------------------------------------------------------------------------*/ int removeExpire(redisDb *db, robj *key) { - dictEntry *de = dictFind(db->pdict,ptrFromObj(key)); + dictEntry *de = dictFind(db->dict,ptrFromObj(key)); return removeExpireCore(db, key, de); } int removeExpireCore(redisDb *db, robj *key, dictEntry *de) { @@ -1308,7 +1404,7 @@ int removeExpireCore(redisDb *db, robj *key, dictEntry *de) { } int removeSubkeyExpire(redisDb *db, robj *key, robj *subkey) { - dictEntry *de = dictFind(db->pdict,ptrFromObj(key)); + dictEntry *de = dictFind(db->dict,ptrFromObj(key)); serverAssertWithInfo(NULL,key,de != NULL); robj *val = (robj*)dictGetVal(de); @@ -1350,13 +1446,13 @@ void setExpire(client *c, redisDb *db, robj *key, robj *subkey, long long when) serverAssert(GlobalLocksAcquired()); /* Reuse the sds from the main dict in the expire dict */ - kde = dictFind(db->pdict,ptrFromObj(key)); + kde = dictFind(db->dict,ptrFromObj(key)); serverAssertWithInfo(NULL,key,kde != NULL); if (((robj*)dictGetVal(kde))->getrefcount(std::memory_order_relaxed) == OBJ_SHARED_REFCOUNT) { // shared objects cannot have the expire bit set, create a real object - dictSetVal(db->pdict, kde, dupStringObject((robj*)dictGetVal(kde))); + dictSetVal(db->dict, kde, dupStringObject((robj*)dictGetVal(kde))); } /* Update TTL stats (exponential moving average) */ @@ -1409,13 +1505,13 @@ void setExpire(client *c, redisDb *db, robj *key, expireEntry &&e) serverAssert(GlobalLocksAcquired()); /* Reuse the sds from the main dict in the expire dict */ - kde = dictFind(db->pdict,ptrFromObj(key)); + kde = dictFind(db->dict,ptrFromObj(key)); serverAssertWithInfo(NULL,key,kde != NULL); if (((robj*)dictGetVal(kde))->getrefcount(std::memory_order_relaxed) == OBJ_SHARED_REFCOUNT) { // shared objects cannot have the expire bit set, create a real object - dictSetVal(db->pdict, kde, dupStringObject((robj*)dictGetVal(kde))); + dictSetVal(db->dict, kde, dupStringObject((robj*)dictGetVal(kde))); } if (((robj*)dictGetVal(kde))->FExpires()) @@ -1440,7 +1536,7 @@ expireEntry *getExpire(redisDb *db, robj_roptr key) { if (db->setexpire->size() == 0) return nullptr; - de = dictFind(db->pdict, ptrFromObj(key)); + de = dictFind(db->dict, ptrFromObj(key)); if (de == NULL) return nullptr; robj *obj = (robj*)dictGetVal(de); @@ -1617,27 +1713,54 @@ int expireIfNeeded(redisDb *db, robj *key) { /* ----------------------------------------------------------------------------- * API to get key arguments from commands * ---------------------------------------------------------------------------*/ -#define MAX_KEYS_BUFFER 256 -thread_local static int getKeysTempBuffer[MAX_KEYS_BUFFER]; + +/* Prepare the getKeysResult struct to hold numkeys, either by using the + * pre-allocated keysbuf or by allocating a new array on the heap. + * + * This function must be called at least once before starting to populate + * the result, and can be called repeatedly to enlarge the result array. + */ +int *getKeysPrepareResult(getKeysResult *result, int numkeys) { + /* GETKEYS_RESULT_INIT initializes keys to NULL, point it to the pre-allocated stack + * buffer here. */ + if (!result->keys) { + serverAssert(!result->numkeys); + result->keys = result->keysbuf; + } + + /* Resize if necessary */ + if (numkeys > result->size) { + if (result->keys != result->keysbuf) { + /* We're not using a static buffer, just (re)alloc */ + result->keys = (int*)zrealloc(result->keys, numkeys * sizeof(int)); + } else { + /* We are using a static buffer, copy its contents */ + result->keys = (int*)zmalloc(numkeys * sizeof(int)); + if (result->numkeys) + memcpy(result->keys, result->keysbuf, result->numkeys * sizeof(int)); + } + result->size = numkeys; + } + + return result->keys; +} /* The base case is to use the keys position as given in the command table * (firstkey, lastkey, step). */ -int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, int *numkeys) { +int getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result) { int j, i = 0, last, *keys; UNUSED(argv); if (cmd->firstkey == 0) { - *numkeys = 0; - return NULL; + result->numkeys = 0; + return 0; } last = cmd->lastkey; if (last < 0) last = argc+last; int count = ((last - cmd->firstkey)+1); - keys = getKeysTempBuffer; - if (count > MAX_KEYS_BUFFER) - keys = (int*)zmalloc(sizeof(int)*count); + keys = getKeysPrepareResult(result, count); for (j = cmd->firstkey; j <= last; j += cmd->keystep) { if (j >= argc) { @@ -1648,23 +1771,23 @@ int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, in * return no keys and expect the command implementation to report * an arity or syntax error. */ if (cmd->flags & CMD_MODULE || cmd->arity < 0) { - getKeysFreeResult(keys); - *numkeys = 0; - return NULL; + getKeysFreeResult(result); + result->numkeys = 0; + return 0; } else { serverPanic("Redis built-in command declared keys positions not matching the arity requirements."); } } keys[i++] = j; } - *numkeys = i; - return keys; + result->numkeys = i; + return i; } /* Return all the arguments that are keys in the command passed via argc / argv. * * The command returns the positions of all the key arguments inside the array, - * so the actual return value is an heap allocated array of integers. The + * so the actual return value is a heap allocated array of integers. The * length of the array is returned by reference into *numkeys. * * 'cmd' must be point to the corresponding entry into the redisCommand @@ -1672,26 +1795,26 @@ int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, in * * This function uses the command table if a command-specific helper function * is not required, otherwise it calls the command-specific function. */ -int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { +int getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) { if (cmd->flags & CMD_MODULE_GETKEYS) { - return moduleGetCommandKeysViaAPI(cmd,argv,argc,numkeys); + return moduleGetCommandKeysViaAPI(cmd,argv,argc,result); } else if (!(cmd->flags & CMD_MODULE) && cmd->getkeys_proc) { - return cmd->getkeys_proc(cmd,argv,argc,numkeys); + return cmd->getkeys_proc(cmd,argv,argc,result); } else { - return getKeysUsingCommandTable(cmd,argv,argc,numkeys); + return getKeysUsingCommandTable(cmd,argv,argc,result); } } /* Free the result of getKeysFromCommand. */ -void getKeysFreeResult(int *result) { - if (result != getKeysTempBuffer) - zfree(result); +void getKeysFreeResult(getKeysResult *result) { + if (result && result->keys != result->keysbuf) + zfree(result->keys); } /* Helper function to extract keys from following commands: * ZUNIONSTORE ... * ZINTERSTORE ... */ -int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { +int zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) { int i, num, *keys; UNUSED(cmd); @@ -1699,30 +1822,30 @@ int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *nu /* Sanity check. Don't return any key if the command is going to * reply with syntax error. */ if (num < 1 || num > (argc-3)) { - *numkeys = 0; - return NULL; + result->numkeys = 0; + return 0; } /* Keys in z{union,inter}store come from two places: * argv[1] = storage key, * argv[3...n] = keys to intersect */ - keys = getKeysTempBuffer; - if (num+1>MAX_KEYS_BUFFER) - keys = (int*)zmalloc(sizeof(int)*(num+1)); + /* Total keys = {union,inter} keys + storage key */ + keys = getKeysPrepareResult(result, num+1); + result->numkeys = num+1; /* Add all key positions for argv[3...n] to keys[] */ for (i = 0; i < num; i++) keys[i] = 3+i; /* Finally add the argv[1] key position (the storage key target). */ keys[num] = 1; - *numkeys = num+1; /* Total keys = {union,inter} keys + storage key */ - return keys; + + return result->numkeys; } /* Helper function to extract keys from the following commands: * EVAL