diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9ca185e89..cb41880c9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,7 +7,7 @@ jobs: test-ubuntu-latest: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: make run: | sudo apt-get update @@ -17,7 +17,7 @@ jobs: run: ./utils/gen-test-certs.sh - name: test-tls run: | - sudo apt-get -y install tcl8.5 tcl-tls + sudo apt-get -y install tcl tcl-tls ./runtest --clients 2 --verbose --tls - name: cluster-test run: | @@ -42,14 +42,14 @@ jobs: build-macos-latest: runs-on: macos-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: make run: make -j2 build-libc-malloc: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: make run: | sudo apt-get update diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 3c10236b4..6e4f88ef3 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -1,16 +1,21 @@ name: Daily on: + pull_request: + branches: + # any PR to a release branch. + - '[0-9].[0-9]' schedule: - - cron: '0 7 * * *' + - cron: '0 0 * * *' jobs: - test-jemalloc: + test-ubuntu-jemalloc: runs-on: ubuntu-latest - timeout-minutes: 1200 + if: github.repository == 'redis/redis' + timeout-minutes: 14400 steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: make run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev @@ -21,12 +26,17 @@ jobs: ./runtest --accurate --verbose - name: module api test run: ./runtest-moduleapi --verbose + - name: sentinel tests + run: ./runtest-sentinel + - name: cluster tests + run: ./runtest-cluster - test-libc-malloc: + test-ubuntu-libc-malloc: runs-on: ubuntu-latest - timeout-minutes: 1200 + if: github.repository == 'redis/redis' + timeout-minutes: 14400 steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: make run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev @@ -37,11 +47,17 @@ jobs: ./runtest --accurate --verbose - name: module api test run: ./runtest-moduleapi --verbose + - name: sentinel tests + run: ./runtest-sentinel + - name: cluster tests + run: ./runtest-cluster test: runs-on: ubuntu-latest + if: github.repository == 'redis/redis' + timeout-minutes: 14400 steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: make run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev @@ -59,7 +75,7 @@ jobs: test-ubuntu-arm: runs-on: [self-hosted, linux, arm] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: make run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev @@ -74,16 +90,91 @@ jobs: test-valgrind: runs-on: ubuntu-latest + if: github.repository == 'redis/redis' timeout-minutes: 14400 steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - name: make run: | sudo apt-get -y install uuid-dev libcurl4-openssl-dev make valgrind - name: test run: | + sudo apt-get update sudo apt-get install tcl8.5 valgrind -y ./runtest --valgrind --verbose --clients 1 - name: module api test run: ./runtest-moduleapi --valgrind --verbose --clients 1 + + test-centos7-jemalloc: + runs-on: ubuntu-latest + if: github.repository == 'redis/redis' + container: centos:7 + timeout-minutes: 14400 + steps: + - uses: actions/checkout@v2 + - name: make + run: | + yum -y install centos-release-scl + yum -y install devtoolset-7 + scl enable devtoolset-7 "make" + - name: test + run: | + yum -y install tcl + ./runtest --accurate --verbose + - name: module api test + run: ./runtest-moduleapi --verbose + - name: sentinel tests + run: ./runtest-sentinel + - name: cluster tests + run: ./runtest-cluster + + test-centos7-tls: + runs-on: ubuntu-latest + if: github.repository == 'redis/redis' + container: centos:7 + timeout-minutes: 14400 + steps: + - uses: actions/checkout@v2 + - name: make + run: | + yum -y install centos-release-scl epel-release + yum -y install devtoolset-7 openssl-devel openssl + scl enable devtoolset-7 "make BUILD_TLS=yes" + - name: test + run: | + yum -y install tcl tcltls + ./utils/gen-test-certs.sh + ./runtest --accurate --verbose --tls + ./runtest --accurate --verbose + - name: module api test + run: | + ./runtest-moduleapi --verbose --tls + ./runtest-moduleapi --verbose + - name: sentinel tests + run: | + ./runtest-sentinel --tls + ./runtest-sentinel + - name: cluster tests + run: | + ./runtest-cluster --tls + ./runtest-cluster + + test-macos-latest: + runs-on: macos-latest + if: github.repository == 'redis/redis' + timeout-minutes: 14400 + steps: + - uses: actions/checkout@v2 + - name: make + run: make + - name: test + run: | + ./runtest --accurate --verbose --no-latency + - name: module api test + run: ./runtest-moduleapi --verbose + - name: sentinel tests + run: ./runtest-sentinel + - name: cluster tests + run: ./runtest-cluster + diff --git a/.gitignore b/.gitignore index c169eca17..21f903288 100644 --- a/.gitignore +++ b/.gitignore @@ -48,9 +48,13 @@ src/nodes.conf deps/lua/src/lua deps/lua/src/luac deps/lua/src/liblua.a +tests/tls/* .make-* .prerequisites *.dSYM Makefile.dep .vscode/* .idea/* +.ccls +.ccls-cache/* +compile_commands.json diff --git a/00-RELEASENOTES b/00-RELEASENOTES index c6ee44246..bff270e77 100644 --- a/00-RELEASENOTES +++ b/00-RELEASENOTES @@ -11,6 +11,1139 @@ CRITICAL: There is a critical bug affecting MOST USERS. Upgrade ASAP. SECURITY: There are security fixes in the release. -------------------------------------------------------------------------------- +================================================================================ +Redis 6.0.10 Released Tue Jan 12 16:20:20 IST 2021 +================================================================================ + +Upgrade urgency MODERATE: several bugs with moderate impact are fixed, +Here is a comprehensive list of changes in this release compared to 6.0.9. + +Command behavior changes: +* SWAPDB invalidates WATCHed keys (#8239) +* SORT command behaves differently when used on a writable replica (#8283) +* EXISTS should not alter LRU (#8016) + In Redis 5.0 and 6.0 it would have touched the LRU/LFU of the key. +* OBJECT should not reveal logically expired keys (#8016) + Will now behave the same TYPE or any other non-DEBUG command. +* GEORADIUS[BYMEMBER] can fail with -OOM if Redis is over the memory limit (#8107) + +Other behavior changes: +* Sentinel: Fix missing updates to the config file after SENTINEL SET command (#8229) +* CONFIG REWRITE is atomic and safer, but requires write access to the config file's folder (#7824, #8051) + This change was already present in 6.0.9, but was missing from the release notes. + +Bug fixes with compatibility implications (bugs introduced in Redis 6.0): +* Fix RDB CRC64 checksum on big-endian systems (#8270) + If you're using big-endian please consider the compatibility implications with + RESTORE, replication and persistence. +* Fix wrong order of key/value in Lua's map response (#8266) + If your scripts use redis.setresp() or return a map (new in Redis 6.0), please + consider the implications. + +Bug fixes: +* Fix an issue where a forked process deletes the parent's pidfile (#8231) +* Fix crashes when enabling io-threads-do-reads (#8230) +* Fix a crash in redis-cli after executing cluster backup (#8267) +* Handle output buffer limits for module blocked clients (#8141) + Could result in a module sending reply to a blocked client to go beyond the limit. +* Fix setproctitle related crashes. (#8150, #8088) + Caused various crashes on startup, mainly on Apple M1 chips or under instrumentation. +* Backup/restore cluster mode keys to slots map for repl-diskless-load=swapdb (#8108) + In cluster mode with repl-diskless-load, when loading failed, slot map wouldn't + have been restored. +* Fix oom-score-adj-values range, and bug when used in config file (#8046) + Enabling setting this in the config file in a line after enabling it, would + have been buggy. +* Reset average ttl when empty databases (#8106) + Just causing misleading metric in INFO +* Disable rehash when Redis has child process (#8007) + This could have caused excessive CoW during BGSAVE, replication or AOFRW. +* Further improved ACL algorithm for picking categories (#7966) + Output of ACL GETUSER is now more similar to the one provided by ACL SETUSER. +* Fix bug with module GIL being released prematurely (#8061) + Could in theory (and rarely) cause multi-threaded modules to corrupt memory. +* Reduce effect of client tracking causing feedback loop in key eviction (#8100) +* Fix cluster access to unaligned memory (SIGBUS on old ARM) (#7958) +* Fix saving of strings larger than 2GB into RDB files (#8306) + +Additional improvements: +* Avoid wasteful transient memory allocation in certain cases (#8286, #5954) + +Platform / toolchain support related improvements: +* Fix crash log registers output on ARM. (#8020) +* Add a check for an ARM64 Linux kernel bug (#8224) + Due to the potential severity of this issue, Redis will print log warning on startup. +* Raspberry build fix. (#8095) + +New configuration options: +* oom-score-adj-values config can now take absolute values (besides relative ones) (#8046) + +Module related fixes: +* Moved RMAPI_FUNC_SUPPORTED so that it's usable (#8037) +* Improve timer accuracy (#7987) +* Allow '\0' inside of result of RM_CreateStringPrintf (#6260) + +================================================================================ +Redis 6.0.9 Released Mon Oct 26 10:37:47 IST 2020 +================================================================================ + +Upgrade urgency: SECURITY if you use an affected platform (see below). + Otherwise the upgrade urgency is MODERATE. + +This release fixes a potential heap overflow when using a heap allocator other +than jemalloc or glibc's malloc. See: +https://github.com/redis/redis/pull/7963 + +Other fixes in this release: + +New: +* Memory reporting of clients argv (#7874) +* Add redis-cli control on raw format line delimiter (#7841) +* Add redis-cli support for rediss:// -u prefix (#7900) +* Get rss size support for NetBSD and DragonFlyBSD + +Behavior changes: +* WATCH no longer ignores keys which have expired for MULTI/EXEC (#7920) +* Correct OBJECT ENCODING response for stream type (#7797) +* Allow blocked XREAD on a cluster replica (#7881) +* TLS: Do not require CA config if not used (#7862) + +Bug fixes: +* INFO report real peak memory (before eviction) (#7894) +* Allow requirepass config to clear the password (#7899) +* Fix config rewrite file handling to make it really atomic (#7824) +* Fix excessive categories being displayed from ACLs (#7889) +* Add fsync in replica when full RDB payload was received (#7839) +* Don't write replies to socket when output buffer limit reached (#7202) +* Fix redis-check-rdb support for modules aux data (#7826) +* Other smaller bug fixes + +Modules API: +* Add APIs for version and compatibility checks (#7865) +* Add RM_GetClientCertificate (#7866) +* Add RM_GetDetachedThreadSafeContext (#7886) +* Add RM_GetCommandKeys (#7884) +* Add Swapdb Module Event (#7804) +* RM_GetContextFlags provides indication of being in a fork child (#7783) +* RM_GetContextFlags document missing flags: MULTI_DIRTY, IS_CHILD (#7821) +* Expose real client on connection events (#7867) +* Minor improvements to module blocked on keys (#7903) + +Full list of commits: + +Yossi Gottlieb in commit ce0d74d8f: + Fix wrong zmalloc_size() assumption. (#7963) + 1 file changed, 3 deletions(-) + +Oran Agra in commit d3ef26822: + Attempt to fix sporadic test failures due to wait_for_log_messages (#7955) + 1 file changed, 2 insertions(+) + +David CARLIER in commit 76993a0d4: + cpu affinity: DragonFlyBSD support (#7956) + 2 files changed, 9 insertions(+), 2 deletions(-) + +Zach Fewtrell in commit b23cdc14a: + fix invalid 'failover' identifier in cluster slave selection test (#7942) + 1 file changed, 1 insertion(+), 1 deletion(-) + +WuYunlong in commit 99a4cb401: + Update rdb_last_bgsave_time_sec in INFO on diskless replication (#7917) + 1 file changed, 11 insertions(+), 14 deletions(-) + +Wen Hui in commit 258287c35: + do not add save parameter during config rewrite in sentinel mode (#7945) + 1 file changed, 6 insertions(+) + +Qu Chen in commit 6134279e2: + WATCH no longer ignores keys which have expired for MULTI/EXEC. (#7920) + 2 files changed, 3 insertions(+), 3 deletions(-) + +Oran Agra in commit d15ec67c6: + improve verbose logging on failed test. print log file lines (#7938) + 1 file changed, 4 insertions(+) + +Yossi Gottlieb in commit 8a2e6d24f: + Add a --no-latency tests flag. (#7939) + 5 files changed, 23 insertions(+), 9 deletions(-) + +filipe oliveira in commit 0a1737dc5: + Fixed bug concerning redis-benchmark non clustered benchmark forcing always the same hash tag {tag} (#7931) + 1 file changed, 31 insertions(+), 24 deletions(-) + +Oran Agra in commit 6d9b3df71: + fix 32bit build warnings (#7926) + 2 files changed, 3 insertions(+), 3 deletions(-) + +Wen Hui in commit ed6f7a55e: + fix double fclose in aofrewrite (#7919) + 1 file changed, 6 insertions(+), 5 deletions(-) + +Oran Agra in commit 331d73c92: + INFO report peak memory before eviction (#7894) + 1 file changed, 11 insertions(+), 1 deletion(-) + +Yossi Gottlieb in commit e88e13528: + Fix tests failure on busybox systems. (#7916) + 2 files changed, 2 insertions(+), 2 deletions(-) + +Oran Agra in commit b7f53738e: + Allow requirepass config to clear the password (#7899) + 1 file changed, 18 insertions(+), 8 deletions(-) + +Wang Yuan in commit 2ecb28b68: + Remove temporary aof and rdb files in a background thread (#7905) + 2 files changed, 3 insertions(+), 3 deletions(-) + +guybe7 in commit 7bc605e6b: + Minor improvements to module blocked on keys (#7903) + 3 files changed, 15 insertions(+), 9 deletions(-) + +Andreas Lind in commit 1b484608d: + Support redis-cli -u rediss://... (#7900) + 1 file changed, 9 insertions(+), 1 deletion(-) + +Yossi Gottlieb in commit 95095d680: + Modules: fix RM_GetCommandKeys API. (#7901) + 3 files changed, 4 insertions(+), 7 deletions(-) + +Meir Shpilraien (Spielrein) in commit cd3ae2f2c: + Add Module API for version and compatibility checks (#7865) + 9 files changed, 180 insertions(+), 3 deletions(-) + +Yossi Gottlieb in commit 1d723f734: + Module API: Add RM_GetClientCertificate(). (#7866) + 6 files changed, 88 insertions(+) + +Yossi Gottlieb in commit d72172752: + Modules: Add RM_GetDetachedThreadSafeContext(). (#7886) + 4 files changed, 52 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit e4f9aff19: + Modules: add RM_GetCommandKeys(). + 6 files changed, 238 insertions(+), 1 deletion(-) + +Yossi Gottlieb in commit 6682b913e: + Introduce getKeysResult for getKeysFromCommand. + 7 files changed, 170 insertions(+), 121 deletions(-) + +Madelyn Olson in commit 9db65919c: + Fixed excessive categories being displayed from acls (#7889) + 2 files changed, 29 insertions(+), 2 deletions(-) + +Oran Agra in commit f34c50cf6: + Add some additional signal info to the crash log (#7891) + 1 file changed, 4 insertions(+), 1 deletion(-) + +Oran Agra in commit 300bb4701: + Allow blocked XREAD on a cluster replica (#7881) + 3 files changed, 43 insertions(+) + +Oran Agra in commit bc5cf0f1a: + memory reporting of clients argv (#7874) + 5 files changed, 55 insertions(+), 5 deletions(-) + +DvirDukhan in commit 13d2e6a57: + redis-cli add control on raw format line delimiter (#7841) + 1 file changed, 8 insertions(+), 6 deletions(-) + +Oran Agra in commit d54e25620: + Include internal sds fragmentation in MEMORY reporting (#7864) + 2 files changed, 7 insertions(+), 7 deletions(-) + +Oran Agra in commit ac2c2b74e: + Fix crash in script timeout during AOF loading (#7870) + 2 files changed, 47 insertions(+), 4 deletions(-) + +Rafi Einstein in commit 00d2082e7: + Makefile: enable program suffixes via PROG_SUFFIX (#7868) + 2 files changed, 10 insertions(+), 6 deletions(-) + +nitaicaro in commit d2c2c26e7: + Fixed Tracking test “The other connection is able to get invalidations” (#7871) + 1 file changed, 3 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit 2c172556f: + Modules: expose real client on conn events. + 1 file changed, 11 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit 2972d0c1f: + Module API: Fail ineffective auth calls. + 1 file changed, 5 insertions(+) + +Yossi Gottlieb in commit aeb2a3b6a: + TLS: Do not require CA config if not used. (#7862) + 1 file changed, 5 insertions(+), 3 deletions(-) + +Oran Agra in commit d8e64aeb8: + warning: comparison between signed and unsigned integer in 32bit build (#7838) + 1 file changed, 2 insertions(+), 2 deletions(-) + +David CARLIER in commit 151209982: + Add support for Haiku OS (#7435) + 3 files changed, 16 insertions(+) + +Gavrie Philipson in commit b1d3e169f: + Fix typo in module API docs (#7861) + 1 file changed, 2 insertions(+), 2 deletions(-) + +David CARLIER in commit 08e3b8d13: + getting rss size implementation for netbsd (#7293) + 1 file changed, 20 insertions(+) + +Oran Agra in commit 0377a889b: + Fix new obuf-limits tests to work with TLS (#7848) + 2 files changed, 29 insertions(+), 13 deletions(-) + +caozb in commit a057ad9b1: + ignore slaveof no one in redis.conf (#7842) + 1 file changed, 10 insertions(+), 1 deletion(-) + +Wang Yuan in commit 87ecee645: + Don't support Gopher if enable io threads to read queries (#7851) + 2 files changed, 8 insertions(+), 5 deletions(-) + +Wang Yuan in commit b92902236: + Set 'loading' and 'shutdown_asap' to volatile sig_atomic_t type (#7845) + 1 file changed, 2 insertions(+), 2 deletions(-) + +Uri Shachar in commit ee0875a02: + Fix config rewrite file handling to make it really atomic (#7824) + 1 file changed, 49 insertions(+), 47 deletions(-) + +WuYunlong in commit d577519e1: + Add fsync to readSyncBulkPayload(). (#7839) + 1 file changed, 11 insertions(+) + +Wen Hui in commit 104e0ea3e: + rdb.c: handle fclose error case differently to avoid double fclose (#7307) + 1 file changed, 7 insertions(+), 6 deletions(-) + +Wang Yuan in commit 0eb015ac6: + Don't write replies if close the client ASAP (#7202) + 7 files changed, 144 insertions(+), 2 deletions(-) + +Guy Korland in commit 08a03e32c: + Fix RedisModule_HashGet examples (#6697) + 1 file changed, 4 insertions(+), 4 deletions(-) + +Oran Agra in commit 09551645d: + fix recently broken TLS build error, and add coverage for CI (#7833) + 2 files changed, 4 insertions(+), 3 deletions(-) + +David CARLIER in commit c545ba5d0: + Further NetBSD update and build fixes. (#7831) + 3 files changed, 72 insertions(+), 3 deletions(-) + +WuYunlong in commit ec9050053: + Fix redundancy use of semicolon in do-while macros in ziplist.c. (#7832) + 1 file changed, 3 insertions(+), 3 deletions(-) + +yixiang in commit 27a4d1314: + Fix connGetSocketError usage (#7811) + 2 files changed, 6 insertions(+), 4 deletions(-) + +Oran Agra in commit 30795dcae: + RM_GetContextFlags - document missing flags (#7821) + 1 file changed, 6 insertions(+) + +Yossi Gottlieb in commit 14a12849f: + Fix occasional hangs on replication reconnection. (#7830) + 2 files changed, 14 insertions(+), 3 deletions(-) + +Ariel Shtul in commit d5a1b06dc: + Fix redis-check-rdb support for modules aux data (#7826) + 3 files changed, 21 insertions(+), 1 deletion(-) + +Wen Hui in commit 39f793693: + refactor rewriteStreamObject code for adding missing streamIteratorStop call (#7829) + 1 file changed, 36 insertions(+), 18 deletions(-) + +WuYunlong in commit faad29bfb: + Make IO threads killable so that they can be canceled at any time. + 1 file changed, 1 insertion(+) + +WuYunlong in commit b3f1b5830: + Make main thread killable so that it can be canceled at any time. Refine comment of makeThreadKillable(). + 3 files changed, 11 insertions(+), 4 deletions(-) + +Oran Agra in commit 0f43d1f55: + RM_GetContextFlags provides indication that we're in a fork child (#7783) + 8 files changed, 28 insertions(+), 18 deletions(-) + +Wen Hui in commit a55ea9cdf: + Add Swapdb Module Event (#7804) + 5 files changed, 52 insertions(+) + +Daniel Dai in commit 1d8f72bef: + fix make warnings in debug.c MacOS (#7805) + 2 files changed, 3 insertions(+), 2 deletions(-) + +David CARLIER in commit 556953d93: + debug.c: NetBSD build warning fix. (#7810) + 1 file changed, 4 insertions(+), 3 deletions(-) + +Wang Yuan in commit d02435b66: + Remove tmp rdb file in background thread (#7762) + 6 files changed, 82 insertions(+), 8 deletions(-) + +Oran Agra in commit 1bd7bfdc0: + Add printf attribute and fix warnings and a minor bug (#7803) + 2 files changed, 12 insertions(+), 4 deletions(-) + +WuYunlong in commit d25147b4c: + bio: doFastMemoryTest should try to kill io threads as well. + 3 files changed, 19 insertions(+) + +WuYunlong in commit 4489ba081: + bio: fix doFastMemoryTest. + 4 files changed, 25 insertions(+), 3 deletions(-) + +Wen Hui in commit cf85def67: + correct OBJECT ENCODING response for stream type (#7797) + 1 file changed, 1 insertion(+) + +WuYunlong in commit cf5bcf892: + Clarify help text of tcl scripts. (#7798) + 1 file changed, 1 insertion(+) + +Mykhailo Pylyp in commit f72665c65: + Recalculate hardcoded variables from $::instances_count in sentinel tests (#7561) + 3 files changed, 15 insertions(+), 13 deletions(-) + +Oran Agra in commit c67b19e7a: + Fix failing valgrind installation in github actions (#7792) + 1 file changed, 1 insertion(+) + +Oran Agra in commit 92763fd2a: + fix broken PEXPIREAT test (#7791) + 1 file changed, 10 insertions(+), 6 deletions(-) + +Wang Yuan in commit f5b4c0ccb: + Remove dead global variable 'lru_clock' (#7782) + 1 file changed, 1 deletion(-) + +Oran Agra in commit 82d431fd6: + Squash merging 125 typo/grammar/comment/doc PRs (#7773) + 80 files changed, 436 insertions(+), 416 deletions(-) + +================================================================================ +Redis 6.0.8 Released Wed Sep 09 23:34:17 IDT 2020 +================================================================================ + +Upgrade urgency HIGH: Anyone who's using Redis 6.0.7 with Sentinel or +CONFIG REWRITE command is affected and should upgrade ASAP, see #7760. + +Bug fixes: + +* CONFIG REWRITE after setting oom-score-adj-values either via CONFIG SET or + loading it from a config file, will generate a corrupt config file that will + cause Redis to fail to start +* Fix issue with redis-cli --pipe on MacOS +* Fix RESP3 response for HKEYS/HVALS on non-existing key +* Various small bug fixes + +New features / Changes: + +* Remove THP warning when set to madvise +* Allow EXEC with read commands on readonly replica in cluster +* Add masters/replicas options to redis-cli --cluster call command + +Module API: + +* Add RedisModule_ThreadSafeContextTryLock + +Full list of commits: + +Oran Agra in commit cdabf696a: + Fix RESP3 response for HKEYS/HVALS on non-existing key + 1 file changed, 3 insertions(+), 1 deletion(-) + +Oran Agra in commit ec633c716: + Fix leak in new blockedclient module API test + 1 file changed, 3 insertions(+) + +Yossi Gottlieb in commit 6bac07c5c: + Tests: fix oom-score-adj false positives. (#7772) + 1 file changed, 1 insertion(+), 1 deletion(-) + +杨博东 in commit 6043dc614: + Tests: Add aclfile load and save tests (#7765) + 2 files changed, 41 insertions(+) + +Roi Lipman in commit c0b5f9bf0: + RM_ThreadSafeContextTryLock a non-blocking method for acquiring GIL (#7738) + 7 files changed, 122 insertions(+), 1 deletion(-) + +Yossi Gottlieb in commit 5780a1599: + Tests: validate CONFIG REWRITE for all params. (#7764) + 6 files changed, 43 insertions(+), 6 deletions(-) + +Oran Agra in commit e3c14b25d: + Change THP warning to use madvise rather than never (#7771) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Itamar Haber in commit 28929917b: + Documents RM_Call's fmt (#5448) + 1 file changed, 25 insertions(+) + +Jan-Erik Rediger in commit 9146402c2: + Check that THP is not set to always (madvise is ok) (#4001) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Yossi Gottlieb in commit d05089429: + Tests: clean up stale .cli files. (#7768) + 1 file changed, 2 insertions(+) + +Eran Liberty in commit 8861c1bae: + Allow exec with read commands on readonly replica in cluster (#7766) + 3 files changed, 59 insertions(+), 3 deletions(-) + +Yossi Gottlieb in commit 2cf2ff2f6: + Fix CONFIG REWRITE of oom-score-adj-values. (#7761) + 1 file changed, 2 insertions(+), 1 deletion(-) + +Oran Agra in commit 1386c80f7: + handle cur_test for nested tests + 1 file changed, 3 insertions(+) + +Oran Agra in commit c7d4945f0: + Add daily CI for MacOS (#7759) + 1 file changed, 18 insertions(+) + +bodong.ybd in commit 32548264c: + Tests: Some fixes for macOS + 3 files changed, 26 insertions(+), 11 deletions(-) + +Oran Agra in commit 1e17f9812: + Fix cluster consistency-check test (#7754) + 1 file changed, 55 insertions(+), 29 deletions(-) + +Yossi Gottlieb in commit f4ecdf86a: + Tests: fix unmonitored servers. (#7756) + 1 file changed, 5 insertions(+) + +Oran Agra in commit 9f020050d: + fix broken cluster/sentinel tests by recent commit (#7752) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Oran Agra in commit fdbabb496: + Improve valgrind support for cluster tests (#7725) + 3 files changed, 83 insertions(+), 23 deletions(-) + +Oran Agra in commit 35a6a0bbc: + test infra - add durable mode to work around test suite crashing + 3 files changed, 35 insertions(+), 3 deletions(-) + +Oran Agra in commit e3136b13f: + test infra - wait_done_loading + 2 files changed, 16 insertions(+), 36 deletions(-) + +Oran Agra in commit 83c75dbd9: + test infra - flushall between tests in external mode + 1 file changed, 1 insertion(+) + +Oran Agra in commit 265f5d3cf: + test infra - improve test skipping ability + 3 files changed, 91 insertions(+), 36 deletions(-) + +Oran Agra in commit fcd3a9908: + test infra - reduce disk space usage + 3 files changed, 33 insertions(+), 11 deletions(-) + +Oran Agra in commit b6ea4699f: + test infra - write test name to logfile + 3 files changed, 35 insertions(+) + +Yossi Gottlieb in commit 4a4b07fc6: + redis-cli: fix writeConn() buffer handling. (#7749) + 1 file changed, 37 insertions(+), 6 deletions(-) + +Oran Agra in commit f2d08de2e: + Print server startup messages after daemonization (#7743) + 1 file changed, 4 insertions(+), 4 deletions(-) + +Thandayuthapani in commit 77541d555: + Add masters/replicas options to redis-cli --cluster call command (#6491) + 1 file changed, 13 insertions(+), 2 deletions(-) + +Oran Agra in commit 91d13a854: + fix README about BUILD_WITH_SYSTEMD usage (#7739) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Yossi Gottlieb in commit 88d03d965: + Fix double-make issue with make && make install. (#7734) + 1 file changed, 2 insertions(+) + +================================================================================ +Redis 6.0.7 Released Fri Aug 28 11:05:09 IDT 2020 +================================================================================ + +Upgrade urgency MODERATE: several bugs with moderate impact are fixed, +Specifically the first two listed below which cause protocol errors for clients. + +Bug fixes: + +* CONFIG SET could hung the client when arrives during RDB/ROF loading (When + processed after another command that was also rejected with -LOADING error) +* LPOS command when RANK is greater than matches responded wiht broken protocol + (negative multi-bulk count) +* UNLINK / Lazyfree for stream type key would have never do async freeing +* PERSIST should invalidate WATCH (Like EXPIRE does) +* EXEC with only read commands could have be rejected when OOM +* TLS: relax verification on CONFIG SET (Don't error if some configs are set + and tls isn't enabled) +* TLS: support cluster/replication without tls-port +* Systemd startup after network is online +* Redis-benchmark improvements +* Various small bug fixes + +New features: + +* Add oom-score-adj configuration option to control Linux OOM killer +* Show IO threads statistics and status in INFO output +* Add optional tls verification mode (see tls-auth-clients) + +Module API: + +* Add RedisModule_HoldString +* Add loaded keyspace event +* Fix RedisModuleEvent_LoadingProgress +* Fix RedisModuleEvent_MasterLinkChange hook missing on successful psync +* Fix missing RM_CLIENTINFO_FLAG_SSL +* Refactor redismodule.h for use with -fno-common / extern + +Full list of commits: + +Oran Agra in commit c26394e4f: + Reduce the probability of failure when start redis in runtest-cluster #7554 (#7635) + 1 file changed, 23 insertions(+), 5 deletions(-) + +Leoš Literák in commit 745d5e802: + Update README.md with instructions how to build with systemd support (#7730) + 1 file changed, 5 insertions(+) + +Yossi Gottlieb in commit 03f1d208a: + Fix oom-score-adj on older distros. (#7724) + 1 file changed, 2 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit 941174d9c: + Backport Lua 5.2.2 stack overflow fix. (#7733) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Wang Yuan in commit c897dba14: + Fix wrong format specifiers of 'sdscatfmt' for the INFO command (#7706) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Wen Hui in commit 5e3fab5e7: + fix make warnings (#7692) + 1 file changed, 4 insertions(+), 3 deletions(-) + +Nathan Scott in commit a2b09c13f: + Annotate module API functions in redismodule.h for use with -fno-common (#6900) + 1 file changed, 265 insertions(+), 241 deletions(-) + +Yossi Gottlieb in commit bf244273f: + Add oom-score-adj configuration option to control Linux OOM killer. (#1690) + 8 files changed, 306 insertions(+), 1 deletion(-) + +Meir Shpilraien (Spielrein) in commit b5a6ab98f: + see #7544, added RedisModule_HoldString api. (#7577) + 4 files changed, 83 insertions(+), 8 deletions(-) + +ShooterIT in commit ff04cf62b: + [Redis-benchmark] Remove zrem test, add zpopmin test + 1 file changed, 5 insertions(+), 5 deletions(-) + +ShooterIT in commit 0f3260f31: + [Redis-benchmark] Support zset type + 1 file changed, 16 insertions(+) + +Arun Ranganathan in commit 45d0b94fc: + Show threading configuration in INFO output (#7446) + 3 files changed, 46 insertions(+), 14 deletions(-) + +Meir Shpilraien (Spielrein) in commit a22f61e12: + This PR introduces a new loaded keyspace event (#7536) + 8 files changed, 135 insertions(+), 4 deletions(-) + +Oran Agra in commit 1c9ca1030: + Fix rejectCommand trims newline in shared error objects, hung clients (#7714) + 4 files changed, 42 insertions(+), 23 deletions(-) + +valentinogeron in commit 217471795: + EXEC with only read commands should not be rejected when OOM (#7696) + 2 files changed, 51 insertions(+), 8 deletions(-) + +Itamar Haber in commit 6e6c47d16: + Expands lazyfree's effort estimate to include Streams (#5794) + 1 file changed, 24 insertions(+) + +Yossi Gottlieb in commit da6813623: + Add language servers stuff, test/tls to gitignore. (#7698) + 1 file changed, 4 insertions(+) + +Valentino Geron in commit de7fb126e: + Assert that setDeferredAggregateLen isn't called with negative value + 1 file changed, 1 insertion(+) + +Valentino Geron in commit 6cf27f25f: + Fix LPOS command when RANK is greater than matches + 2 files changed, 9 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit 9bba54ace: + Tests: fix redis-cli with remote hosts. (#7693) + 3 files changed, 5 insertions(+), 5 deletions(-) + +huangzhw in commit 0fec2cb81: + RedisModuleEvent_LoadingProgress always at 100% progress (#7685) + 1 file changed, 2 insertions(+), 2 deletions(-) + +guybe7 in commit 931e19aa6: + Modules: Invalidate saved_oparray after use (#7688) + 1 file changed, 2 insertions(+) + +杨博东 in commit 6f2065570: + Fix flock cluster config may cause failure to restart after kill -9 (#7674) + 4 files changed, 31 insertions(+), 7 deletions(-) + +Raghav Muddur in commit 200149a2a: + Update clusterMsgDataPublish to clusterMsgModule (#7682) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Madelyn Olson in commit 72daa1b4e: + Fixed hset error since it's shared with hmset (#7678) + 1 file changed, 1 insertion(+), 1 deletion(-) + +guybe7 in commit 3bf9ac994: + PERSIST should signalModifiedKey (Like EXPIRE does) (#7671) + 1 file changed, 1 insertion(+) + +Oran Agra in commit b37501684: + OOM Crash log include size of allocation attempt. (#7670) + 1 file changed, 2 insertions(+), 1 deletion(-) + +Wen Hui in commit 2136cb68f: + [module] using predefined REDISMODULE_NO_EXPIRE in RM_GetExpire (#7669) + 1 file changed, 2 insertions(+), 1 deletion(-) + +Oran Agra in commit f56aee4bc: + Trim trailing spaces in error replies coming from rejectCommand (#7668) + 1 file changed, 5 insertions(+), 1 deletion(-) + +Yossi Gottlieb in commit 012d7506a: + Module API: fix missing RM_CLIENTINFO_FLAG_SSL. (#7666) + 6 files changed, 82 insertions(+), 1 deletion(-) + +Yossi Gottlieb in commit a0adbc857: + TLS: relax verification on CONFIG SET. (#7665) + 2 files changed, 24 insertions(+), 7 deletions(-) + +Madelyn Olson in commit 2ef29715b: + Fixed timer warning (#5953) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Wagner Francisco Mezaroba in commit b76f171f5: + allow --pattern to be used along with --bigkeys (#3586) + 1 file changed, 9 insertions(+), 2 deletions(-) + +zhaozhao.zz in commit cc7b57765: + redis-benchmark: fix wrong random key for hset (#4895) + 1 file changed, 1 insertion(+), 1 deletion(-) + +zhaozhao.zz in commit 479c1ba77: + CLIENT_MASTER should ignore server.proto_max_bulk_len + 1 file changed, 2 insertions(+), 1 deletion(-) + +zhaozhao.zz in commit f61ce8a52: + config: proto-max-bulk-len must be 1mb or greater + 2 files changed, 2 insertions(+), 2 deletions(-) + +zhaozhao.zz in commit 0350f597a: + using proto-max-bulk-len in checkStringLength for SETRANGE and APPEND + 1 file changed, 2 insertions(+), 2 deletions(-) + +YoongHM in commit eea63548d: + Start redis after network is online (#7639) + 1 file changed, 2 insertions(+) + +Yossi Gottlieb in commit aef6d74fb: + Run daily workflow on main repo only (no forks). (#7646) + 1 file changed, 7 insertions(+) + +WuYunlong in commit 917b4d241: + see #7250, fix signature of RedisModule_DeauthenticateAndCloseClient (#7645) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Wang Yuan in commit efab7fd54: + Print error info if failed opening config file (#6943) + 1 file changed, 2 insertions(+), 1 deletion(-) + +Wen Hui in commit 8c4468bcf: + fix memory leak in ACLLoadFromFile error handling (#7623) + 1 file changed, 1 insertion(+) + +Oran Agra in commit 89724e1d2: + redis-cli --cluster-yes - negate force flag for clarity + 1 file changed, 9 insertions(+), 9 deletions(-) + +Frank Meier in commit c813739af: + reintroduce REDISCLI_CLUSTER_YES env variable in redis-cli + 1 file changed, 6 insertions(+) + +Frank Meier in commit 7e3b86c18: + add force option to 'create-cluster create' script call (#7612) + 1 file changed, 6 insertions(+), 2 deletions(-) + +Oran Agra in commit 3f7fa4312: + fix new rdb test failing on timing issues (#7604) + 1 file changed, 2 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit 417976d7a: + Fix test-centos7-tls daily job. (#7598) + 1 file changed, 2 insertions(+), 2 deletions(-) + +Oran Agra in commit c41818c51: + module hook for master link up missing on successful psync (#7584) + 2 files changed, 22 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit 6ef3fc185: + CI: Add daily CentOS 7.x jobs. (#7582) + 1 file changed, 50 insertions(+), 4 deletions(-) + +WuYunlong in commit 002c37482: + Fix running single test 14-consistency-check.tcl (#7587) + 1 file changed, 1 insertion(+) + +Yossi Gottlieb in commit 66cbbb6ad: + Clarify RM_BlockClient() error condition. (#6093) + 1 file changed, 9 insertions(+) + +namtsui in commit 22aba2207: + Avoid an out-of-bounds read in the redis-sentinel (#7443) + 1 file changed, 2 insertions(+), 2 deletions(-) + +Wen Hui in commit af08887dc: + Add SignalModifiedKey hook in XGROUP CREATE with MKSTREAM option (#7562) + 1 file changed, 1 insertion(+) + +Wen Hui in commit a5e0a64b0: + fix leak in error handling of debug populate command (#7062) + 1 file changed, 3 insertions(+), 4 deletions(-) + +Yossi Gottlieb in commit cbfdfa231: + Fix TLS cluster tests. (#7578) + 1 file changed, 4 insertions(+), 1 deletion(-) + +Yossi Gottlieb in commit 6d5376d30: + TLS: Propagate and handle SSL_new() failures. (#7576) + 4 files changed, 48 insertions(+), 6 deletions(-) + +Oran Agra in commit a662cd577: + Fix failing tests due to issues with wait_for_log_message (#7572) + 3 files changed, 38 insertions(+), 34 deletions(-) + +Jiayuan Chen in commit 2786a4b5e: + Add optional tls verification (#7502) + 6 files changed, 40 insertions(+), 5 deletions(-) + +Oran Agra in commit 3ef3d3612: + Daily github action: run cluster and sentinel tests with tls (#7575) + 1 file changed, 2 insertions(+), 2 deletions(-) + +Yossi Gottlieb in commit f20f63322: + TLS: support cluster/replication without tls-port. + 2 files changed, 5 insertions(+), 4 deletions(-) + +grishaf in commit 3c9ae059d: + Fix prepareForShutdown function declaration (#7566) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Oran Agra in commit 3f4803af9: + Stabilize bgsave test that sometimes fails with valgrind (#7559) + 1 file changed, 20 insertions(+), 2 deletions(-) + +Madelyn Olson in commit 1a3c51a1f: + Properly reset errno for rdbLoad (#7542) + 1 file changed, 1 insertion(+) + +Oran Agra in commit 92d80b13a: + testsuite may leave servers alive on error (#7549) + 1 file changed, 3 insertions(+) + +Yossi Gottlieb in commit 245582ba7: + Tests: drop TCL 8.6 dependency. (#7548) + 1 file changed, 27 insertions(+), 22 deletions(-) + +Oran Agra in commit f20e1ba2d: + Fixes to release scripts (#7547) + 2 files changed, 2 insertions(+), 2 deletions(-) + +Remi Collet in commit 60ff56993: + Fix deprecated tail syntax in tests (#7543) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Wen Hui in commit 34e8541b9: + Add missing calls to raxStop (#7532) + 4 files changed, 63 insertions(+), 19 deletions(-) + +Wen Hui in commit 2f7bc5435: + add missing caching command in client help (#7399) + 1 file changed, 1 insertion(+) + +zhaozhao.zz in commit c15be9ffe: + replication: need handle -NOPERM error after send ping (#7538) + 1 file changed, 1 insertion(+) + +Scott Brenner in commit 1b29152c3: + GitHub Actions workflows - use latest version of actions/checkout (#7534) + 2 files changed, 10 insertions(+), 10 deletions(-) + +================================================================================ +Redis 6.0.6 Released Mon Jul 20 09:31:30 IDT 2020 +================================================================================ + +Upgrade urgency MODERATE: several bugs with moderate impact are fixed here. + +The most important issues are listed here: + +* Fix crash when enabling CLIENT TRACKING with prefix +* EXEC always fails with EXECABORT and multi-state is cleared +* RESTORE ABSTTL won't store expired keys into the db +* redis-cli better handling of non-pritable key names +* TLS: Ignore client cert when tls-auth-clients off +* Tracking: fix invalidation message on flush +* Notify systemd on Sentinel startup +* Fix crash on a misuse of STRALGO +* Few fixes in module API +* Fix a few rare leaks (STRALGO error misuse, Sentinel) +* Fix a possible invalid access in defrag of scripts (unlikely to cause real harm) + +New features: + +* LPOS command to search in a list +* Use user+pass for MIGRATE in redis-cli and redis-benchmark in cluster mode +* redis-cli support TLS for --pipe, --rdb and --replica options +* TLS: Session caching configuration support + +And this is the full list of commits: + +Itamar Haber in commit 50548cafc: + Adds SHA256SUM to redis-stable tarball upload + 1 file changed, 1 insertion(+) + +yoav-steinberg in commit 3a4c6684f: + Support passing stack allocated module strings to moduleCreateArgvFromUserFormat (#7528) + 1 file changed, 4 insertions(+), 1 deletion(-) + +Luke Palmer in commit 2fd0b2bd6: + Send null for invalidate on flush (#7469) + 1 file changed, 14 insertions(+), 10 deletions(-) + +dmurnane in commit c3c81e1a8: + Notify systemd on sentinel startup (#7168) + 1 file changed, 4 insertions(+) + +Developer-Ecosystem-Engineering in commit e2770f29b: + Add registers dump support for Apple silicon (#7453) + 1 file changed, 54 insertions(+), 2 deletions(-) + +Wen Hui in commit b068eae97: + correct error msg for num connections reaching maxclients in cluster mode (#7444) + 1 file changed, 2 insertions(+), 2 deletions(-) + +WuYunlong in commit e6169ae5c: + Fix command help for unexpected options (#7476) + 6 files changed, 20 insertions(+), 3 deletions(-) + +WuYunlong in commit abf08fc02: + Refactor RM_KeyType() by using macro. (#7486) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Oran Agra in commit 11b83076a: + diskless master disconnect replicas when rdb child failed (#7518) + 1 file changed, 6 insertions(+), 5 deletions(-) + +Oran Agra in commit 8f27f2f7d: + redis-cli tests, fix valgrind timing issue (#7519) + 1 file changed, 1 insertion(+), 1 deletion(-) + +WuYunlong in commit 180b588e8: + Fix out of update help info in tcl tests. (#7516) + 1 file changed, 2 deletions(-) + +Qu Chen in commit 417c60bdc: + Replica always reports master's config epoch in CLUSTER NODES output. (#7235) + 1 file changed, 5 insertions(+), 1 deletion(-) + +Oran Agra in commit 72a242419: + RESTORE ABSTTL skip expired keys - leak (#7511) + 1 file changed, 1 insertion(+) + +Oran Agra in commit 2ca45239f: + fix recently added time sensitive tests failing with valgrind (#7512) + 2 files changed, 12 insertions(+), 6 deletions(-) + +Oran Agra in commit 123dc8b21: + runtest --stop pause stops before terminating the redis server (#7513) + 2 files changed, 8 insertions(+), 2 deletions(-) + +Oran Agra in commit a6added45: + update release scripts for new hosts, and CI to run more tests (#7480) + 5 files changed, 68 insertions(+), 26 deletions(-) + +jimgreen2013 in commit cf4869f9e: + fix description about ziplist, the code is ok (#6318) + 1 file changed, 2 insertions(+), 2 deletions(-) + +马永泽 in commit d548f219b: + fix benchmark in cluster mode fails to authenticate (#7488) + 1 file changed, 56 insertions(+), 40 deletions(-) + +Abhishek Soni in commit e58eb7b89: + fix: typo in CI job name (#7466) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Jiayuan Chen in commit 6def10a2b: + Fix typo in deps README (#7500) + 1 file changed, 1 insertion(+), 1 deletion(-) + +WuYunlong in commit 8af61afef: + Add missing latency-monitor tcl test to test_helper.tcl. (#6782) + 1 file changed, 1 insertion(+) + +Yossi Gottlieb in commit a419f400e: + TLS: Session caching configuration support. (#7420) + 6 files changed, 56 insertions(+), 16 deletions(-) + +Yossi Gottlieb in commit 2e4bb2667: + TLS: Ignore client cert when tls-auth-clients off. (#7457) + 1 file changed, 1 insertion(+), 3 deletions(-) + +James Hilliard in commit f0b1aee9e: + Use pkg-config to properly detect libssl and libcrypto libraries (#7452) + 1 file changed, 15 insertions(+), 3 deletions(-) + +Yossi Gottlieb in commit e92b99564: + TLS: Add missing redis-cli options. (#7456) + 3 files changed, 166 insertions(+), 52 deletions(-) + +Oran Agra in commit 1f3db5bf5: + redis-cli --hotkeys fixed to handle non-printable key names + 1 file changed, 11 insertions(+), 5 deletions(-) + +Oran Agra in commit c3044f369: + redis-cli --bigkeys fixed to handle non-printable key names + 1 file changed, 24 insertions(+), 16 deletions(-) + +Oran Agra in commit b3f75527b: + RESTORE ABSTTL won't store expired keys into the db (#7472) + 4 files changed, 46 insertions(+), 16 deletions(-) + +huangzhw in commit 6f87fc92f: + defrag.c activeDefragSdsListAndDict when defrag sdsele, We can't use (#7492) + 1 file changed, 1 insertion(+), 1 deletion(-) + +Oran Agra in commit d8e6a3e5b: + skip a test that uses +inf on valgrind (#7440) + 1 file changed, 12 insertions(+), 9 deletions(-) + +Oran Agra in commit 28fd1a110: + stabilize tests that look for log lines (#7367) + 3 files changed, 33 insertions(+), 11 deletions(-) + +Oran Agra in commit a513b4ed9: + tests/valgrind: don't use debug restart (#7404) + 4 files changed, 114 insertions(+), 57 deletions(-) + +Oran Agra in commit 70e72fc1b: + change references to the github repo location (#7479) + 5 files changed, 7 insertions(+), 7 deletions(-) + +zhaozhao.zz in commit c63e533cc: + BITOP: propagate only when it really SET or DEL targetkey (#5783) + 1 file changed, 2 insertions(+), 1 deletion(-) + +antirez in commit 31040ff54: + Update comment to clarify change in #7398. + 1 file changed, 4 insertions(+), 1 deletion(-) + +antirez in commit b605fe827: + LPOS: option FIRST renamed RANK. + 2 files changed, 19 insertions(+), 19 deletions(-) + +Dave Nielsen in commit 8deb24954: + updated copyright year + 1 file changed, 1 insertion(+), 1 deletion(-) + +Oran Agra in commit a61c2930c: + EXEC always fails with EXECABORT and multi-state is cleared + 6 files changed, 204 insertions(+), 91 deletions(-) + +antirez in commit 3c8041637: + Include cluster.h for getClusterConnectionsCount(). + 1 file changed, 1 insertion(+) + +antirez in commit 5be673ee8: + Fix BITFIELD i64 type handling, see #7417. + 1 file changed, 8 insertions(+), 6 deletions(-) + +antirez in commit 5f289df9b: + Clarify maxclients and cluster in conf. Remove myself too. + 2 files changed, 9 insertions(+), 1 deletion(-) + +hwware in commit 000f928d6: + fix memory leak in sentinel connection sharing + 1 file changed, 1 insertion(+) + +chenhui0212 in commit d9a3c0171: + Fix comments in function raxLowWalk of listpack.c + 1 file changed, 2 insertions(+), 2 deletions(-) + +Tomasz Poradowski in commit 7526e4506: + ensure SHUTDOWN_NOSAVE in Sentinel mode + 2 files changed, 9 insertions(+), 8 deletions(-) + +chenhui0212 in commit 6487cbc33: + fix comments in listpack.c + 1 file changed, 2 insertions(+), 2 deletions(-) + +antirez in commit 69b66bfca: + Use cluster connections too, to limit maxclients. + 3 files changed, 23 insertions(+), 8 deletions(-) + +antirez in commit 5a960a033: + Tracking: fix enableBcastTrackingForPrefix() invalid sdslen() call. + 1 file changed, 1 insertion(+), 1 deletion(-) + +root in commit 1c2e50de3: + cluster.c remove if of clusterSendFail in markNodeAsFailingIfNeeded + 1 file changed, 1 insertion(+), 1 deletion(-) + +meir@redislabs.com in commit 040efb697: + Fix RM_ScanKey module api not to return int encoded strings + 3 files changed, 24 insertions(+), 7 deletions(-) + +antirez in commit 1b8b7941d: + Fix LCS object type checking. Related to #7379. + 1 file changed, 17 insertions(+), 10 deletions(-) + +hwware in commit 6b571b45a: + fix memory leak + 1 file changed, 11 insertions(+), 12 deletions(-) + +hwware in commit 674759062: + fix server crash in STRALGO command + 1 file changed, 7 insertions(+) + +Benjamin Sergeant in commit a05ffefdc: + Update redis-cli.c + 1 file changed, 19 insertions(+), 6 deletions(-) + +Jamie Scott in commit 870b63733: + minor fix + 1 file changed, 2 insertions(+), 3 deletions(-) + ================================================================================ Redis 6.0.5 Released Tue Jun 09 11:56:08 CEST 2020 ================================================================================ diff --git a/BUGS b/BUGS index a8e936892..7af259340 100644 --- a/BUGS +++ b/BUGS @@ -1 +1 @@ -Please check https://github.com/antirez/redis/issues +Please check https://github.com/redis/redis/issues diff --git a/CONTRIBUTING b/CONTRIBUTING deleted file mode 100644 index 000edbeaf..000000000 --- a/CONTRIBUTING +++ /dev/null @@ -1,50 +0,0 @@ -Note: by contributing code to the Redis project in any form, including sending -a pull request via Github, a code fragment or patch via private email or -public discussion groups, you agree to release your code under the terms -of the BSD license that you can find in the COPYING file included in the Redis -source distribution. You will include BSD license in the COPYING file within -each source file that you contribute. - -# IMPORTANT: HOW TO USE REDIS GITHUB ISSUES - -* Github issues SHOULD ONLY BE USED to report bugs, and for DETAILED feature - requests. Everything else belongs to the Redis Google Group: - - https://groups.google.com/forum/m/#!forum/Redis-db - - PLEASE DO NOT POST GENERAL QUESTIONS that are not about bugs or suspected - bugs in the Github issues system. We'll be very happy to help you and provide - all the support in the mailing list. - - There is also an active community of Redis users at Stack Overflow: - - http://stackoverflow.com/questions/tagged/redis - -# How to provide a patch for a new feature - -1. If it is a major feature or a semantical change, please don't start coding -straight away: if your feature is not a conceptual fit you'll lose a lot of -time writing the code without any reason. Start by posting in the mailing list -and creating an issue at Github with the description of, exactly, what you want -to accomplish and why. Use cases are important for features to be accepted. -Here you'll see if there is consensus about your idea. - -2. If in step 1 you get an acknowledgment from the project leaders, use the - following procedure to submit a patch: - - a. Fork Redis on github ( http://help.github.com/fork-a-repo/ ) - b. Create a topic branch (git checkout -b my_branch) - c. Push to your branch (git push origin my_branch) - d. Initiate a pull request on github ( https://help.github.com/articles/creating-a-pull-request/ ) - e. Done :) - -3. Keep in mind that we are very overloaded, so issues and PRs sometimes wait -for a *very* long time. However this is not lack of interest, as the project -gets more and more users, we find ourselves in a constant need to prioritize -certain issues/PRs over others. If you think your issue/PR is very important -try to popularize it, have other users commenting and sharing their point of -view and so forth. This helps. - -4. For minor fixes just open a pull request on Github. - -Thanks! diff --git a/COPYING b/COPYING index 3a9a7a66f..e1e5dd654 100644 --- a/COPYING +++ b/COPYING @@ -1,5 +1,5 @@ -Copyright (c) 2006-2015, Salvatore Sanfilippo -Copyright (C) 2019, John Sully +Copyright (c) 2006-2020, Salvatore Sanfilippo +Copyright (C) 2019-2020, John Sully All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/README.md b/README.md index fe0030dff..1decad4b0 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ ##### Have feedback? Take our quick survey: https://www.surveymonkey.com/r/Y9XNS93 +##### KeyDB is Hiring! We are currently building out our dev team. If you are interested please see the posting here: https://keydb.dev/careers.html + What is KeyDB? -------------- @@ -15,11 +17,13 @@ KeyDB is a high performance fork of Redis with a focus on multithreading, memory KeyDB maintains full compatibility with the Redis protocol, modules, and scripts. This includes the atomicity guarantees for scripts and transactions. Because KeyDB keeps in sync with Redis development KeyDB is a superset of Redis functionality, making KeyDB a drop in replacement for existing Redis deployments. -On the same hardware KeyDB can perform twice as many queries per second as Redis, with 60% lower latency. Active-Replication simplifies hot-spare failover allowing you to easily distribute writes over replicas and use simple TCP based load balancing/failover. KeyDB's higher performance allows you to do more on less hardware which reduces operation costs and complexity. +On the same hardware KeyDB can achieve significantly higher throughput than Redis. Active-Replication simplifies hot-spare failover allowing you to easily distribute writes over replicas and use simple TCP based load balancing/failover. KeyDB's higher performance allows you to do more on less hardware which reduces operation costs and complexity. - +The chart below compares several KeyDB and Redis setups, including the latest Redis6 io-threads option, and TLS benchmarks. -See the full benchmark results and setup information here: https://docs.keydb.dev/blog/2019/10/07/blog-post/ + + +See the full benchmark results and setup information here: https://docs.keydb.dev/blog/2020/09/29/blog-post/ Why fork Redis? --------------- @@ -82,6 +86,8 @@ Building KeyDB KeyDB can be compiled and is tested for use on Linux. KeyDB currently relies on SO_REUSEPORT's load balancing behavior which is available only in Linux. When we support marshalling connections across threads we plan to support other operating systems such as FreeBSD. +More on CentOS/Archlinux/Alpine/Debian/Ubuntu dependencies and builds can be found here: https://docs.keydb.dev/docs/build/ + Install dependencies: % sudo apt install build-essential nasm autotools-dev autoconf libjemalloc-dev tcl tcl-dev uuid-dev libcurl4-openssl-dev @@ -95,9 +101,14 @@ libssl-dev on Debian/Ubuntu) and run: % make BUILD_TLS=yes -You can enable flash support with: +To build with systemd support, you'll need systemd development libraries (such +as libsystemd-dev on Debian/Ubuntu or systemd-devel on CentOS) and run: - % make MALLOC=memkind + % make USE_SYSTEMD=yes + +To append a suffix to KeyDB program names, use: + + % make PROG_SUFFIX="-alt" ***Note that the following dependencies may be needed: % sudo apt-get install autoconf autotools-dev libnuma-dev libtool @@ -112,7 +123,7 @@ installed): Fixing build problems with dependencies or cached build options --------- -KeyDB has some dependencies which are included into the `deps` directory. +KeyDB has some dependencies which are included in the `deps` directory. `make` does not automatically rebuild dependencies even if something in the source code of dependencies changes. @@ -139,7 +150,7 @@ with a 64 bit target, or the other way around, you need to perform a In case of build errors when trying to build a 32 bit binary of KeyDB, try the following steps: -* Install the packages libc6-dev-i386 (also try g++-multilib). +* Install the package libc6-dev-i386 (also try g++-multilib). * Try using the following command line instead of `make 32bit`: `make CFLAGS="-m32 -march=native" LDFLAGS="-m32"` @@ -164,14 +175,14 @@ Verbose build ------------- KeyDB will build with a user friendly colorized output by default. -If you want to see a more verbose output use the following: +If you want to see a more verbose output, use the following: % make V=1 Running KeyDB ------------- -To run KeyDB with the default configuration just type: +To run KeyDB with the default configuration, just type: % cd src % ./keydb-server @@ -224,7 +235,7 @@ You can find the list of all the available commands at https://docs.keydb.dev/do Installing KeyDB ----------------- -In order to install KeyDB binaries into /usr/local/bin just use: +In order to install KeyDB binaries into /usr/local/bin, just use: % make install @@ -233,8 +244,8 @@ different destination. Make install will just install binaries in your system, but will not configure init scripts and configuration files in the appropriate place. This is not -needed if you want just to play a bit with KeyDB, but if you are installing -it the proper way for a production system, we have a script doing this +needed if you just want to play a bit with KeyDB, but if you are installing +it the proper way for a production system, we have a script that does this for Ubuntu and Debian systems: % cd utils diff --git a/deps/README.md b/deps/README.md index 685dbb40d..02c99052f 100644 --- a/deps/README.md +++ b/deps/README.md @@ -21,7 +21,7 @@ just following tose steps: 1. Remove the jemalloc directory. 2. Substitute it with the new jemalloc source tree. -3. Edit the Makefile localted in the same directory as the README you are +3. Edit the Makefile located in the same directory as the README you are reading, and change the --with-version in the Jemalloc configure script options with the version you are using. This is required because otherwise Jemalloc configuration script is broken and will not work nested in another @@ -33,7 +33,7 @@ If you want to upgrade Jemalloc while also providing support for active defragmentation, in addition to the above steps you need to perform the following additional steps: -5. In Jemalloc three, file `include/jemalloc/jemalloc_macros.h.in`, make sure +5. In Jemalloc tree, file `include/jemalloc/jemalloc_macros.h.in`, make sure to add `#define JEMALLOC_FRAG_HINT`. 6. Implement the function `je_get_defrag_hint()` inside `src/jemalloc.c`. You can see how it is implemented in the current Jemalloc source tree shipped @@ -47,9 +47,9 @@ Hiredis Hiredis uses the SDS string library, that must be the same version used inside Redis itself. Hiredis is also very critical for Sentinel. Historically Redis often used forked versions of hiredis in a way or the other. In order to upgrade it is advised to take a lot of care: 1. Check with diff if hiredis API changed and what impact it could have in Redis. -2. Make sure thet the SDS library inside Hiredis and inside Redis are compatible. +2. Make sure that the SDS library inside Hiredis and inside Redis are compatible. 3. After the upgrade, run the Redis Sentinel test. -4. Check manually that redis-cli and redis-benchmark behave as expecteed, since we have no tests for CLI utilities currently. +4. Check manually that redis-cli and redis-benchmark behave as expected, since we have no tests for CLI utilities currently. Linenoise --- @@ -77,6 +77,6 @@ and our version: 1. Makefile is modified to allow a different compiler than GCC. 2. We have the implementation source code, and directly link to the following external libraries: `lua_cjson.o`, `lua_struct.o`, `lua_cmsgpack.o` and `lua_bit.o`. -3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order toa void direct bytecode execution. +3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order to avoid direct bytecode execution. diff --git a/deps/linenoise/linenoise.c b/deps/linenoise/linenoise.c index cfe51e768..ccf5c5548 100644 --- a/deps/linenoise/linenoise.c +++ b/deps/linenoise/linenoise.c @@ -625,7 +625,7 @@ static void refreshMultiLine(struct linenoiseState *l) { rpos2 = (plen+l->pos+l->cols)/l->cols; /* current cursor relative row. */ lndebug("rpos2 %d", rpos2); - /* Go up till we reach the expected positon. */ + /* Go up till we reach the expected position. */ if (rows-rpos2 > 0) { lndebug("go-up %d", rows-rpos2); snprintf(seq,64,"\x1b[%dA", rows-rpos2); @@ -767,7 +767,7 @@ void linenoiseEditBackspace(struct linenoiseState *l) { } } -/* Delete the previosu word, maintaining the cursor at the start of the +/* Delete the previous word, maintaining the cursor at the start of the * current word. */ void linenoiseEditDeletePrevWord(struct linenoiseState *l) { size_t old_pos = l->pos; diff --git a/deps/lua/src/ldo.c b/deps/lua/src/ldo.c index 514f7a2a3..939940a4c 100644 --- a/deps/lua/src/ldo.c +++ b/deps/lua/src/ldo.c @@ -274,7 +274,7 @@ int luaD_precall (lua_State *L, StkId func, int nresults) { CallInfo *ci; StkId st, base; Proto *p = cl->p; - luaD_checkstack(L, p->maxstacksize); + luaD_checkstack(L, p->maxstacksize + p->numparams); func = restorestack(L, funcr); if (!p->is_vararg) { /* no varargs? */ base = func + 1; diff --git a/keydb.conf b/keydb.conf index e8ca634b7..3874c5acd 100644 --- a/keydb.conf +++ b/keydb.conf @@ -24,7 +24,7 @@ # to customize a few per-server settings. Include files can include # other files, so use this wisely. # -# Notice option "include" won't be rewritten by command "CONFIG REWRITE" +# Note that option "include" won't be rewritten by command "CONFIG REWRITE" # from admin or KeyDB Sentinel. Since KeyDB always uses the last processed # line as value of a configuration directive, you'd better put includes # at the beginning of this file to avoid overwriting config change at runtime. @@ -46,7 +46,7 @@ ################################## NETWORK ##################################### # By default, if no "bind" configuration directive is specified, KeyDB listens -# for connections from all the network interfaces available on the server. +# for connections from all available network interfaces on the host machine. # It is possible to listen to just one or multiple selected interfaces using # the "bind" configuration directive, followed by one or more IP addresses. # @@ -58,13 +58,12 @@ # ~~~ WARNING ~~~ If the computer running KeyDB is directly exposed to the # internet, binding to all the interfaces is dangerous and will expose the # instance to everybody on the internet. So by default we uncomment the -# following bind directive, that will force KeyDB to listen only into -# the IPv4 loopback interface address (this means KeyDB will be able to -# accept connections only from clients running into the same computer it -# is running). +# following bind directive, that will force KeyDB to listen only on the +# IPv4 loopback interface address (this means KeyDB will only be able to +# accept client connections from the same host that it is running on). # # IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES -# JUST COMMENT THE FOLLOWING LINE. +# JUST COMMENT OUT THE FOLLOWING LINE. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ bind 127.0.0.1 @@ -93,8 +92,8 @@ port 6379 # TCP listen() backlog. # -# In high requests-per-second environments you need an high backlog in order -# to avoid slow clients connections issues. Note that the Linux kernel +# In high requests-per-second environments you need a high backlog in order +# to avoid slow clients connection issues. Note that the Linux kernel # will silently truncate it to the value of /proc/sys/net/core/somaxconn so # make sure to raise both the value of somaxconn and tcp_max_syn_backlog # in order to get the desired effect. @@ -118,8 +117,8 @@ timeout 0 # of communication. This is useful for two reasons: # # 1) Detect dead peers. -# 2) Take the connection alive from the point of view of network -# equipment in the middle. +# 2) Force network equipment in the middle to consider the connection to be +# alive. # # On Linux, the specified value (in seconds) is the period used to send ACKs. # Note that to close the connection the double of the time is needed. @@ -159,11 +158,14 @@ tcp-keepalive 300 # By default, clients (including replica servers) on a TLS port are required # to authenticate using valid client side certificates. # -# It is possible to disable authentication using this directive. +# If "no" is specified, client certificates are not required and not accepted. +# If "optional" is specified, client certificates are accepted and must be +# valid if provided, but are not required. # # tls-auth-clients no +# tls-auth-clients optional -# By default, a Redis replica does not attempt to establish a TLS connection +# By default, a KeyDB replica does not attempt to establish a TLS connection # with its master. # # Use the following directive to enable TLS on replication links. @@ -225,11 +227,12 @@ daemonize no # supervision tree. Options: # supervised no - no supervision interaction # supervised upstart - signal upstart by putting KeyDB into SIGSTOP mode +# requires "expect stop" in your upstart job config # supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET # supervised auto - detect upstart or systemd method based on # UPSTART_JOB or NOTIFY_SOCKET environment variables # Note: these supervision methods only signal "process is ready." -# They do not enable continuous liveness pings back to your supervisor. +# They do not enable continuous pings back to your supervisor. supervised no # If a pid file is specified, KeyDB writes it where specified at startup @@ -279,6 +282,9 @@ databases 16 # ASCII art logo in startup logs by setting the following option to yes. always-show-logo yes +# Retrieving "message of today" using CURL requests. +#enable-motd yes + ################################ SNAPSHOTTING ################################ # # Save the DB on disk: @@ -288,7 +294,7 @@ always-show-logo yes # Will save the DB if both the given number of seconds and the given # number of write operations against the DB occurred. # -# In the example below the behaviour will be to save: +# In the example below the behavior will be to save: # after 900 sec (15 min) if at least 1 key changed # after 300 sec (5 min) if at least 10 keys changed # after 60 sec if at least 10000 keys changed @@ -321,7 +327,7 @@ save 60 10000 stop-writes-on-bgsave-error yes # Compress string objects using LZF when dump .rdb databases? -# For default that's set to 'yes' as it's almost always a win. +# By default compression is enabled as it's almost always a win. # If you want to save some CPU in the saving child set it to 'no' but # the dataset will likely be bigger if you have compressible values or keys. rdbcompression yes @@ -409,11 +415,11 @@ dir ./ # still reply to client requests, possibly with out of date data, or the # data set may just be empty if this is the first synchronization. # -# 2) if replica-serve-stale-data is set to 'no' the replica will reply with -# an error "SYNC with master in progress" to all the kind of commands -# but to INFO, replicaOF, AUTH, PING, SHUTDOWN, REPLCONF, ROLE, CONFIG, -# SUBSCRIBE, UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB, -# COMMAND, POST, HOST: and LATENCY. +# 2) If replica-serve-stale-data is set to 'no' the replica will reply with +# an error "SYNC with master in progress" to all commands except: +# INFO, REPLICAOF, AUTH, PING, SHUTDOWN, REPLCONF, ROLE, CONFIG, SUBSCRIBE, +# UNSUBSCRIBE, PSUBSCRIBE, PUNSUBSCRIBE, PUBLISH, PUBSUB, COMMAND, POST, +# HOST and LATENCY. # replica-serve-stale-data yes @@ -491,14 +497,14 @@ repl-diskless-sync-delay 5 # ----------------------------------------------------------------------------- # WARNING: RDB diskless load is experimental. Since in this setup the replica # does not immediately store an RDB on disk, it may cause data loss during -# failovers. RDB diskless load + Redis modules not handling I/O reads may also -# cause Redis to abort in case of I/O errors during the initial synchronization +# failovers. RDB diskless load + KeyDB modules not handling I/O reads may also +# cause KeyDB to abort in case of I/O errors during the initial synchronization # stage with the master. Use only if your do what you are doing. # ----------------------------------------------------------------------------- # # Replica can load the RDB it reads from the replication link directly from the # socket, or store the RDB to a file and read that file after it was completely -# recived from the master. +# received from the master. # # In many cases the disk is slower than the network, and storing and loading # the RDB file may increase replication time (and even increase the master's @@ -528,7 +534,8 @@ repl-diskless-load disabled # # It is important to make sure that this value is greater than the value # specified for repl-ping-replica-period otherwise a timeout will be detected -# every time there is low traffic between the master and the replica. +# every time there is low traffic between the master and the replica. The default +# value is 60 seconds. # # repl-timeout 60 @@ -553,28 +560,28 @@ repl-disable-tcp-nodelay no # partial resync is enough, just passing the portion of data the replica # missed while disconnected. # -# The bigger the replication backlog, the longer the time the replica can be -# disconnected and later be able to perform a partial resynchronization. +# The bigger the replication backlog, the longer the replica can endure the +# disconnect and later be able to perform a partial resynchronization. # -# The backlog is only allocated once there is at least a replica connected. +# The backlog is only allocated if there is at least one replica connected. # # repl-backlog-size 1mb -# After a master has no longer connected replicas for some time, the backlog -# will be freed. The following option configures the amount of seconds that -# need to elapse, starting from the time the last replica disconnected, for -# the backlog buffer to be freed. +# After a master has no connected replicas for some time, the backlog will be +# freed. The following option configures the amount of seconds that need to +# elapse, starting from the time the last replica disconnected, for the backlog +# buffer to be freed. # # Note that replicas never free the backlog for timeout, since they may be # promoted to masters later, and should be able to correctly "partially -# resynchronize" with the replicas: hence they should always accumulate backlog. +# resynchronize" with other replicas: hence they should always accumulate backlog. # # A value of 0 means to never release the backlog. # # repl-backlog-ttl 3600 # The replica priority is an integer number published by KeyDB in the INFO -# output. It is used by Redis Sentinel in order to select a replica to promote +# output. It is used by KeyDB Sentinel in order to select a replica to promote # into a master if the master is no longer working correctly. # # A replica with a low priority number is considered better for promotion, so @@ -617,8 +624,8 @@ replica-priority 100 # Another place where this info is available is in the output of the # "ROLE" command of a master. # -# The listed IP and address normally reported by a replica is obtained -# in the following way: +# The listed IP address and port normally reported by a replica is +# obtained in the following way: # # IP: The address is auto detected by checking the peer address # of the socket used by the replica to connect with the master. @@ -628,7 +635,7 @@ replica-priority 100 # listen for connections. # # However when port forwarding or Network Address Translation (NAT) is -# used, the replica may be actually reachable via different IP and port +# used, the replica may actually be reachable via different IP and port # pairs. The following two options can be used by a replica in order to # report to its master a specific set of IP and port, so that both INFO # and ROLE will report those values. @@ -641,31 +648,31 @@ replica-priority 100 ############################### KEYS TRACKING ################################# -# Redis implements server assisted support for client side caching of values. +# KeyDB implements server assisted support for client side caching of values. # This is implemented using an invalidation table that remembers, using # 16 millions of slots, what clients may have certain subsets of keys. In turn # this is used in order to send invalidation messages to clients. Please -# to understand more about the feature check this page: +# check this page to understand more about the feature: # # https://redis.io/topics/client-side-caching # # When tracking is enabled for a client, all the read only queries are assumed -# to be cached: this will force Redis to store information in the invalidation +# to be cached: this will force KeyDB to store information in the invalidation # table. When keys are modified, such information is flushed away, and # invalidation messages are sent to the clients. However if the workload is -# heavily dominated by reads, Redis could use more and more memory in order +# heavily dominated by reads, KeyDB could use more and more memory in order # to track the keys fetched by many clients. # # For this reason it is possible to configure a maximum fill value for the # invalidation table. By default it is set to 1M of keys, and once this limit -# is reached, Redis will start to evict keys in the invalidation table +# is reached, KeyDB will start to evict keys in the invalidation table # even if they were not modified, just to reclaim memory: this will in turn # force the clients to invalidate the cached values. Basically the table # maximum size is a trade off between the memory you want to spend server # side to track information about who cached what, and the ability of clients # to retain cached objects in memory. # -# If you set the value to 0, it means there are no limits, and Redis will +# If you set the value to 0, it means there are no limits, and KeyDB will # retain as many keys as needed in the invalidation table. # In the "stats" INFO section, you can find information about the number of # keys in the invalidation table at every given moment. @@ -677,7 +684,7 @@ replica-priority 100 ################################## SECURITY ################################### -# Warning: since KeyDB is pretty fast an outside user can try up to +# Warning: since KeyDB is pretty fast, an outside user can try up to # 1 million passwords per second against a modern box. This means that you # should use very strong passwords, otherwise they will be very easy to break. # Note that because the password is really a shared secret between the client @@ -701,7 +708,7 @@ replica-priority 100 # AUTH (or the HELLO command AUTH option) in order to be authenticated and # start to work. # -# The ACL rules that describe what an user can do are the following: +# The ACL rules that describe what a user can do are the following: # # on Enable the user: it is possible to authenticate as this user. # off Disable the user: it's no longer possible to authenticate @@ -711,7 +718,7 @@ replica-priority 100 # - Disallow the execution of that command # +@ Allow the execution of all the commands in such category # with valid categories are like @admin, @set, @sortedset, ... -# and so forth, see the full list in the server.c file where +# and so forth, see the full list in the server.cpp file where # the KeyDB command table is described and defined. # The special category @all means all the commands, but currently # present in the server, and that will be loaded in the future @@ -729,7 +736,7 @@ replica-priority 100 # It is possible to specify multiple patterns. # allkeys Alias for ~* # resetkeys Flush the list of allowed keys patterns. -# > Add this passowrd to the list of valid password for the user. +# > Add this password to the list of valid password for the user. # For example >mypass will add "mypass" to the list. # This directive clears the "nopass" flag (see later). # < Remove this password from the list of valid passwords. @@ -775,16 +782,15 @@ replica-priority 100 # # The ACL Log tracks failed commands and authentication events associated # with ACLs. The ACL Log is useful to troubleshoot failed commands blocked -# by ACLs. The ACL Log is stored in and consumes memory. There is no limit -# to its length.You can reclaim memory with ACL LOG RESET or set a maximum -# length below. +# by ACLs. The ACL Log is stored in memory. You can reclaim memory with +# ACL LOG RESET. Define the maximum entry length of the ACL Log below. acllog-max-len 128 # Using an external ACL file # # Instead of configuring users here in this file, it is possible to use # a stand-alone file just listing users. The two methods cannot be mixed: -# if you configure users here and at the same time you activate the exteranl +# if you configure users here and at the same time you activate the external # ACL file, the server will refuse to start. # # The format of the external ACL user file is exactly the same as the @@ -792,7 +798,7 @@ acllog-max-len 128 # # aclfile /etc/keydb/users.acl -# IMPORTANT NOTE: starting with Redis 6 "requirepass" is just a compatiblity +# IMPORTANT NOTE: starting with Redis 6 "requirepass" is just a compatibility # layer on top of the new ACL system. The option effect will be just setting # the password for the default user. Clients will still authenticate using # AUTH as usually, or more explicitly with AUTH default @@ -836,6 +842,11 @@ acllog-max-len 128 # Once the limit is reached KeyDB will close all the new connections sending # an error 'max number of clients reached'. # +# IMPORTANT: When Redis Cluster is used, the max number of connections is also +# shared with the cluster bus: every node in the cluster will use two +# connections, one incoming and another outgoing. It is important to size the +# limit accordingly in case of very large clusters. +# # maxclients 10000 ############################## MEMORY MANAGEMENT ################################ @@ -898,8 +909,8 @@ acllog-max-len 128 # LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated # algorithms (in order to save memory), so you can tune it for speed or -# accuracy. For default KeyDB will check five keys and pick the one that was -# used less recently, you can change the sample size using the following +# accuracy. By default KeyDB will check five keys and pick the one that was +# used least recently, you can change the sample size using the following # configuration directive. # # The default of 5 produces good enough results. 10 Approximates very closely @@ -927,7 +938,7 @@ acllog-max-len 128 # # replica-ignore-maxmemory yes -# Redis reclaims expired keys in two ways: upon access when those keys are +# KeyDB reclaims expired keys in two ways: upon access when those keys are # found to be expired, and also in background, in what is called the # "active expire key". The key space is slowly and interactively scanned # looking for expired keys to reclaim, so that it is possible to free memory @@ -939,8 +950,8 @@ acllog-max-len 128 # it is possible to increase the expire "effort" that is normally set to # "1", to a greater value, up to the value "10". At its maximum value the # system will use more CPU, longer cycles (and technically may introduce -# more latency), and will tollerate less already expired keys still present -# in the system. It's a tradeoff betweeen memory, CPU and latecy. +# more latency), and will tolerate less already expired keys still present +# in the system. It's a tradeoff between memory, CPU and latency. # # active-expire-effort 1 @@ -1000,51 +1011,36 @@ replica-lazy-flush no lazyfree-lazy-user-del no -################################ THREADED I/O ################################# +############################ KERNEL OOM CONTROL ############################## -# Redis is mostly single threaded, however there are certain threaded -# operations such as UNLINK, slow I/O accesses and other things that are -# performed on side threads. +# On Linux, it is possible to hint the kernel OOM killer on what processes +# should be killed first when out of memory. # -# Now it is also possible to handle Redis clients socket reads and writes -# in different I/O threads. Since especially writing is so slow, normally -# Redis users use pipelining in order to speedup the Redis performances per -# core, and spawn multiple instances in order to scale more. Using I/O -# threads it is possible to easily speedup two times Redis without resorting -# to pipelining nor sharding of the instance. +# Enabling this feature makes KeyDB actively control the oom_score_adj value +# for all its processes, depending on their role. The default scores will +# attempt to have background child processes killed before all others, and +# replicas killed before masters. # -# By default threading is disabled, we suggest enabling it only in machines -# that have at least 4 or more cores, leaving at least one spare core. -# Using more than 8 threads is unlikely to help much. We also recommend using -# threaded I/O only if you actually have performance problems, with Redis -# instances being able to use a quite big percentage of CPU time, otherwise -# there is no point in using this feature. +# KeyDB supports three options: # -# So for instance if you have a four cores boxes, try to use 2 or 3 I/O -# threads, if you have a 8 cores, try to use 6 threads. In order to -# enable I/O threads use the following configuration directive: +# no: Don't make changes to oom-score-adj (default). +# yes: Alias to "relative" see below. +# absolute: Values in oom-score-adj-values are written as is to the kernel. +# relative: Values are used relative to the initial value of oom_score_adj when +# the server starts and are then clamped to a range of -1000 to 1000. +# Because typically the initial value is 0, they will often match the +# absolute values. +oom-score-adj no + +# When oom-score-adj is used, this directive controls the specific values used +# for master, replica and background child processes. Values range -2000 to +# 2000 (higher means more likely to be killed). # -# io-threads 4 -# -# Setting io-threads to 1 will just use the main thread as usually. -# When I/O threads are enabled, we only use threads for writes, that is -# to thread the write(2) syscall and transfer the client buffers to the -# socket. However it is also possible to enable threading of reads and -# protocol parsing using the following configuration directive, by setting -# it to yes: -# -# io-threads-do-reads no -# -# Usually threading reads doesn't help much. -# -# NOTE 1: This configuration directive cannot be changed at runtime via -# CONFIG SET. Aso this feature currently does not work when SSL is -# enabled. -# -# NOTE 2: If you want to test the Redis speedup using redis-benchmark, make -# sure you also run the benchmark itself in threaded mode, using the -# --threads option to match the number of Redis theads, otherwise you'll not -# be able to notice the improvements. +# Unprivileged processes (not root, and without CAP_SYS_RESOURCE capabilities) +# can freely increase their value, but not decrease it below its initial +# settings. This means that setting oom-score-adj to "relative" and setting the +# oom-score-adj-values to positive values will always succeed. +oom-score-adj-values 0 200 800 ############################## APPEND ONLY MODE ############################### @@ -1170,8 +1166,8 @@ aof-load-truncated yes # # [RDB file][AOF tail] # -# When loading KeyDB recognizes that the AOF file starts with the "REDIS" -# string and loads the prefixed RDB file, and continues loading the AOF +# When loading, KeyDB recognizes that the AOF file starts with the "REDIS" +# string and loads the prefixed RDB file, then continues loading the AOF # tail. aof-use-rdb-preamble yes @@ -1185,7 +1181,7 @@ aof-use-rdb-preamble yes # # When a long running script exceeds the maximum execution time only the # SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be -# used to stop a script that did not yet called write commands. The second +# used to stop a script that did not yet call any write commands. The second # is the only way to shut down the server in the case a write command was # already issued by the script but the user doesn't want to wait for the natural # termination of the script. @@ -1211,7 +1207,7 @@ lua-time-limit 5000 # Cluster node timeout is the amount of milliseconds a node must be unreachable # for it to be considered in failure state. -# Most other internal time limits are multiple of the node timeout. +# Most other internal time limits are a multiple of the node timeout. # # cluster-node-timeout 15000 @@ -1238,18 +1234,18 @@ lua-time-limit 5000 # the failover if, since the last interaction with the master, the time # elapsed is greater than: # -# (node-timeout * replica-validity-factor) + repl-ping-replica-period +# (node-timeout * cluster-replica-validity-factor) + repl-ping-replica-period # -# So for example if node-timeout is 30 seconds, and the replica-validity-factor +# So for example if node-timeout is 30 seconds, and the cluster-replica-validity-factor # is 10, and assuming a default repl-ping-replica-period of 10 seconds, the # replica will not try to failover if it was not able to talk with the master # for longer than 310 seconds. # -# A large replica-validity-factor may allow replicas with too old data to failover +# A large cluster-replica-validity-factor may allow replicas with too old data to failover # a master, while a too small value may prevent the cluster from being able to # elect a replica at all. # -# For maximum availability, it is possible to set the replica-validity-factor +# For maximum availability, it is possible to set the cluster-replica-validity-factor # to a value of 0, which means, that replicas will always try to failover the # master regardless of the last time they interacted with the master. # (However they'll always try to apply a delay proportional to their @@ -1280,7 +1276,7 @@ lua-time-limit 5000 # cluster-migration-barrier 1 # By default KeyDB Cluster nodes stop accepting queries if they detect there -# is at least an hash slot uncovered (no available node is serving it). +# is at least a hash slot uncovered (no available node is serving it). # This way if the cluster is partially down (for example a range of hash slots # are no longer covered) all the cluster becomes, eventually, unavailable. # It automatically returns available as soon as all the slots are covered again. @@ -1335,7 +1331,7 @@ lua-time-limit 5000 # * cluster-announce-port # * cluster-announce-bus-port # -# Each instruct the node about its address, client port, and cluster message +# Each instructs the node about its address, client port, and cluster message # bus port. The information is then published in the header of the bus packets # so that other nodes will be able to correctly map the address of the node # publishing the information. @@ -1346,7 +1342,7 @@ lua-time-limit 5000 # Note that when remapped, the bus port may not be at the fixed offset of # clients port + 10000, so you can specify any port and bus-port depending # on how they get remapped. If the bus-port is not set, a fixed offset of -# 10000 will be used as usually. +# 10000 will be used as usual. # # Example: # @@ -1449,61 +1445,6 @@ latency-monitor-threshold 0 # specify at least one of K or E, no events will be delivered. notify-keyspace-events "" -############################### GOPHER SERVER ################################# - -# KeyDB contains an implementation of the Gopher protocol, as specified in -# the RFC 1436 (https://www.ietf.org/rfc/rfc1436.txt). -# -# The Gopher protocol was very popular in the late '90s. It is an alternative -# to the web, and the implementation both server and client side is so simple -# that the KeyDB server has just 100 lines of code in order to implement this -# support. -# -# What do you do with Gopher nowadays? Well Gopher never *really* died, and -# lately there is a movement in order for the Gopher more hierarchical content -# composed of just plain text documents to be resurrected. Some want a simpler -# internet, others believe that the mainstream internet became too much -# controlled, and it's cool to create an alternative space for people that -# want a bit of fresh air. -# -# Anyway for the 10nth birthday of the KeyDB, we gave it the Gopher protocol -# as a gift. -# -# --- HOW IT WORKS? --- -# -# The KeyDB Gopher support uses the inline protocol of KeyDB, and specifically -# two kind of inline requests that were anyway illegal: an empty request -# or any request that starts with "/" (there are no KeyDB commands starting -# with such a slash). Normal RESP2/RESP3 requests are completely out of the -# path of the Gopher protocol implementation and are served as usually as well. -# -# If you open a connection to KeyDB when Gopher is enabled and send it -# a string like "/foo", if there is a key named "/foo" it is served via the -# Gopher protocol. -# -# In order to create a real Gopher "hole" (the name of a Gopher site in Gopher -# talking), you likely need a script like the following: -# -# https://github.com/antirez/gopher2redis -# -# --- SECURITY WARNING --- -# -# If you plan to put KeyDB on the internet in a publicly accessible address -# to server Gopher pages MAKE SURE TO SET A PASSWORD to the instance. -# Once a password is set: -# -# 1. The Gopher server (when enabled, not by default) will still serve -# content via Gopher. -# 2. However other commands cannot be called before the client will -# authenticate. -# -# So use the 'requirepass' option to protect your instance. -# -# To enable Gopher support uncomment the following line and set -# the option from no (the default) to yes. -# -# gopher-enabled no - ############################### ADVANCED CONFIG ############################### # Hashes are encoded using a memory efficient data structure when they have a @@ -1647,8 +1588,8 @@ client-output-buffer-limit pubsub 32mb 8mb 60 # client-query-buffer-limit 1gb # In the KeyDB protocol, bulk requests, that are, elements representing single -# strings, are normally limited ot 512 mb. However you can change this limit -# here. +# strings, are normally limited to 512 mb. However you can change this limit +# here, but must be 1mb or greater # # proto-max-bulk-len 512mb @@ -1676,7 +1617,7 @@ hz 10 # # Since the default HZ value by default is conservatively set to 10, KeyDB # offers, and enables by default, the ability to use an adaptive HZ value -# which will temporary raise when there are many connected clients. +# which will temporarily raise when there are many connected clients. # # When dynamic HZ is enabled, the actual configured HZ will be used # as a baseline, but multiples of the configured HZ value will be actually @@ -1743,7 +1684,7 @@ rdb-save-incremental-fsync yes # for the key counter to be divided by two (or decremented if it has a value # less <= 10). # -# The default value for the lfu-decay-time is 1. A Special value of 0 means to +# The default value for the lfu-decay-time is 1. A special value of 0 means to # decay the counter every time it happens to be scanned. # # lfu-log-factor 10 @@ -1763,7 +1704,7 @@ rdb-save-incremental-fsync yes # restart is needed in order to lower the fragmentation, or at least to flush # away all the data and create it again. However thanks to this feature # implemented by Oran Agra for Redis 4.0 this process can happen at runtime -# in an "hot" way, while the server is running. +# in a "hot" way, while the server is running. # # Basically when the fragmentation is over a certain level (see the # configuration options below) KeyDB will start to create new copies of the @@ -1816,14 +1757,14 @@ rdb-save-incremental-fsync yes # Jemalloc background thread for purging will be enabled by default jemalloc-bg-thread yes -# It is possible to pin different threads and processes of Redis to specific +# It is possible to pin different threads and processes of KeyDB to specific # CPUs in your system, in order to maximize the performances of the server. -# This is useful both in order to pin different Redis threads in different -# CPUs, but also in order to make sure that multiple Redis instances running +# This is useful both in order to pin different KeyDB threads in different +# CPUs, but also in order to make sure that multiple KeyDB instances running # in the same host will be pinned to different CPUs. # # Normally you can do this using the "taskset" command, however it is also -# possible to this via Redis configuration directly, both in Linux and FreeBSD. +# possible to this via KeyDB configuration directly, both in Linux and FreeBSD. # # You can pin the server/IO threads, bio threads, aof rewrite child process, and # the bgsave child process. The syntax to specify the cpu list is the same as @@ -1841,10 +1782,25 @@ jemalloc-bg-thread yes # Set bgsave child process to cpu affinity 1,10,11 # bgsave_cpulist 1,10-11 +# In some cases KeyDB will emit warnings and even refuse to start if it detects +# that the system is in bad state, it is possible to suppress these warnings +# by setting the following config which takes a space delimited list of warnings +# to suppress +# +# ignore-warnings ARM64-COW-BUG + # The minimum number of clients on a thread before KeyDB assigns new connections to a different thread # Tuning this parameter is a tradeoff between locking overhead and distributing the workload over multiple cores # min-clients-per-thread 50 +# How often to run RDB load progress callback? +# The callback runs during key load to ping other servers and prevent timeouts. +# It also updates load time estimates. +# Change these values to run it more or less often. It will run when either condition is true. +# Either when x bytes have been processed, or when x keys have been loaded. +# loading-process-events-interval-bytes 2097152 +# loading-process-events-interval-keys 8192 + # Avoid forwarding RREPLAY messages to other masters? # WARNING: This setting is dangerous! You must be certain all masters are connected to each # other in a true mesh topology or data loss will occur! @@ -1859,6 +1815,8 @@ jemalloc-bg-thread yes # Number of worker threads serving requests. This number should be related to the performance # of your network hardware, not the number of cores on your machine. We don't recommend going # above 4 at this time. By default this is set 1. +# +# Note: KeyDB does not use io-threads, but io-threads is a config alias for server-threads server-threads 2 # Should KeyDB pin threads to CPUs? By default this is disabled, and KeyDB will not bind threads. diff --git a/runtest-moduleapi b/runtest-moduleapi index f6cc0a258..268506160 100755 --- a/runtest-moduleapi +++ b/runtest-moduleapi @@ -25,4 +25,8 @@ $TCLSH tests/test_helper.tcl \ --single unit/moduleapi/scan \ --single unit/moduleapi/datatype \ --single unit/moduleapi/auth \ +--single unit/moduleapi/keyspace_events \ +--single unit/moduleapi/blockedclient \ +--single unit/moduleapi/moduleloadsave \ +--single unit/moduleapi/getkeys \ "${@}" diff --git a/sentinel.conf b/sentinel.conf index 7cec3c356..2ec6717d5 100644 --- a/sentinel.conf +++ b/sentinel.conf @@ -259,6 +259,6 @@ sentinel deny-scripts-reconfig yes # SENTINEL SET can also be used in order to perform this configuration at runtime. # # In order to set a command back to its original name (undo the renaming), it -# is possible to just rename a command to itsef: +# is possible to just rename a command to itself: # # SENTINEL rename-command mymaster CONFIG CONFIG diff --git a/src/.gitignore b/src/.gitignore index aee7aacf0..a9c50bee3 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -2,4 +2,5 @@ *.gcno *.gcov redis.info +KeyDB.info lcov-html diff --git a/src/Makefile b/src/Makefile index 038ac51f8..3af99b3b4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -47,7 +47,7 @@ endif USEASM?=true -ifneq ($(SANITIZE),) +ifneq ($(strip $(SANITIZE)),) CFLAGS+= -fsanitize=$(SANITIZE) -DSANITIZE CXXFLAGS+= -fsanitize=$(SANITIZE) -DSANITIZE LDFLAGS+= -fsanitize=$(SANITIZE) @@ -107,7 +107,7 @@ endif FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(REDIS_CFLAGS) FINAL_CXXFLAGS=$(CXX_STD) $(WARN) $(OPT) $(DEBUG) $(CXXFLAGS) $(REDIS_CFLAGS) FINAL_LDFLAGS=$(LDFLAGS) $(REDIS_LDFLAGS) $(DEBUG) -FINAL_LIBS+=-lm -lcurl +FINAL_LIBS+=-lm DEBUG=-g -ggdb ifneq ($(uname_S),Darwin) @@ -152,12 +152,21 @@ ifeq ($(uname_S),OpenBSD) endif else +ifeq ($(uname_S),NetBSD) + # NetBSD + FINAL_LIBS+= -lpthread + ifeq ($(USE_BACKTRACE),yes) + FINAL_CFLAGS+= -DUSE_BACKTRACE -I/usr/pkg/include + FINAL_LDFLAGS+= -L/usr/pkg/lib + FINAL_LIBS+= -lexecinfo + endif +else ifeq ($(uname_S),FreeBSD) # FreeBSD FINAL_LIBS+= -lpthread -lexecinfo else ifeq ($(uname_S),DragonFly) - # FreeBSD + # DragonFly FINAL_LIBS+= -lpthread -lexecinfo else ifeq ($(uname_S),OpenBSD) @@ -167,12 +176,23 @@ else ifeq ($(uname_S),NetBSD) # NetBSD FINAL_LIBS+= -lpthread -lexecinfo +else +ifeq ($(uname_S),Haiku) + # Haiku + FINAL_CFLAGS+= -DBSD_SOURCE + FINAL_LDFLAGS+= -lbsd -lnetwork + FINAL_LIBS+= -lpthread else # All the other OSes (notably Linux) FINAL_LDFLAGS+= -rdynamic FINAL_LIBS+=-ldl -pthread -lrt -luuid +ifneq ($(NO_MOTD),yes) FINAL_CFLAGS += -DMOTD FINAL_CXXFLAGS += -DMOTD + FINAL_LIBS+=-lcurl +endif +endif +endif endif endif endif @@ -236,10 +256,23 @@ ifeq ($(MALLOC),memkind) endif ifeq ($(BUILD_TLS),yes) - FINAL_CFLAGS+=-DUSE_OPENSSL $(OPENSSL_CXXFLAGS) - FINAL_CXXFLAGS+=-DUSE_OPENSSL $(OPENSSL_CXXFLAGS) - FINAL_LDFLAGS+=$(OPENSSL_LDFLAGS) - FINAL_LIBS += ../deps/hiredis/libhiredis_ssl.a -lssl -lcrypto + FINAL_CFLAGS+=-DUSE_OPENSSL $(OPENSSL_CXXFLAGS) + FINAL_CXXFLAGS+=-DUSE_OPENSSL $(OPENSSL_CXXFLAGS) + FINAL_LDFLAGS+=$(OPENSSL_LDFLAGS) + FINAL_LIBS += ../deps/hiredis/libhiredis_ssl.a -lssl -lcrypto + LIBSSL_PKGCONFIG := $(shell $(PKG_CONFIG) --exists libssl && echo $$?) +ifeq ($(LIBSSL_PKGCONFIG),0) + LIBSSL_LIBS=$(shell $(PKG_CONFIG) --libs libssl) +else + LIBSSL_LIBS=-lssl +endif + LIBCRYPTO_PKGCONFIG := $(shell $(PKG_CONFIG) --exists libcrypto && echo $$?) +ifeq ($(LIBCRYPTO_PKGCONFIG),0) + LIBCRYPTO_LIBS=$(shell $(PKG_CONFIG) --libs libcrypto) +else + LIBCRYPTO_LIBS=-lcrypto +endif + FINAL_LIBS += ../deps/hiredis/libhiredis_ssl.a $(LIBSSL_LIBS) $(LIBCRYPTO_LIBS) endif REDIS_CC=$(QUIET_CC)$(CC) $(FINAL_CFLAGS) @@ -261,15 +294,15 @@ QUIET_LINK = @printf ' %b %b\n' $(LINKCOLOR)LINK$(ENDCOLOR) $(BINCOLOR)$@$(EN QUIET_INSTALL = @printf ' %b %b\n' $(LINKCOLOR)INSTALL$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR); endif -REDIS_SERVER_NAME=keydb-server -REDIS_SENTINEL_NAME=keydb-sentinel -REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o acl.o storage.o rdb-s3.o fastlock.o new.o tracking.o cron.o connection.o tls.o sha256.o motd.o timeout.o setcpuaffinity.o $(ASM_OBJ) -REDIS_CLI_NAME=keydb-cli +REDIS_SERVER_NAME=keydb-server$(PROG_SUFFIX) +REDIS_SENTINEL_NAME=keydb-sentinel$(PROG_SUFFIX) +REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o t_nhash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o acl.o storage.o rdb-s3.o fastlock.o new.o tracking.o cron.o connection.o tls.o sha256.o motd.o timeout.o setcpuaffinity.o $(ASM_OBJ) +REDIS_CLI_NAME=keydb-cli$(PROG_SUFFIX) REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o redis-cli-cpphelper.o zmalloc.o release.o anet.o ae.o crcspeed.o crc64.o siphash.o crc16.o storage-lite.o fastlock.o new.o motd.o $(ASM_OBJ) -REDIS_BENCHMARK_NAME=keydb-benchmark +REDIS_BENCHMARK_NAME=keydb-benchmark$(PROG_SUFFIX) REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o siphash.o redis-benchmark.o storage-lite.o fastlock.o new.o $(ASM_OBJ) -REDIS_CHECK_RDB_NAME=keydb-check-rdb -REDIS_CHECK_AOF_NAME=keydb-check-aof +REDIS_CHECK_RDB_NAME=keydb-check-rdb$(PROG_SUFFIX) +REDIS_CHECK_AOF_NAME=keydb-check-aof$(PROG_SUFFIX) all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) @echo "" @@ -290,6 +323,8 @@ persist-settings: distclean echo WARN=$(WARN) >> .make-settings echo OPT=$(OPT) >> .make-settings echo MALLOC=$(MALLOC) >> .make-settings + echo BUILD_TLS=$(BUILD_TLS) >> .make-settings + echo USE_SYSTEMD=$(USE_SYSTEMD) >> .make-settings echo CFLAGS=$(CFLAGS) >> .make-settings echo CXXFLAGS=$(CXXFLAGS) >> .make-settings echo LDFLAGS=$(LDFLAGS) >> .make-settings @@ -360,7 +395,7 @@ DEP = $(REDIS_SERVER_OBJ:%.o=%.d) $(REDIS_CLI_OBJ:%.o=%.d) $(REDIS_BENCHMARK_OBJ $(KEYDB_AS) $< -o $@ clean: - rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov redis.info lcov-html Makefile.dep dict-benchmark + rm -rf $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) *.o *.gcda *.gcno *.gcov KeyDB.info lcov-html Makefile.dep dict-benchmark rm -f $(DEP) .PHONY: clean @@ -382,9 +417,10 @@ check: test lcov: $(MAKE) gcov - @(set -e; cd ..; ./runtest --clients 1) - @geninfo -o redis.info . - @genhtml --legend -o lcov-html redis.info + @(set -e; cd ..; ./runtest --config server-threads 3; ./runtest-sentinel; ./runtest-cluster; ./runtest-moduleapi) + @geninfo -o KeyDB.info --no-external . + @genhtml --legend -o lcov-html KeyDB.info + @genhtml --legend -o lcov-html KeyDB.info | grep lines | awk '{print $$2;}' | sed 's/%//g' test-sds: sds.c sds.h $(REDIS_CC) sds.c zmalloc.cpp -DSDS_TEST_MAIN $(FINAL_LIBS) -o /tmp/sds_test diff --git a/src/acl.cpp b/src/acl.cpp index 509dd0776..7f8ab74bd 100644 --- a/src/acl.cpp +++ b/src/acl.cpp @@ -300,7 +300,13 @@ void ACLFreeUserAndKillClients(user *u) { * it in non authenticated mode. */ c->puser = DefaultUser; c->authenticated = 0; - freeClientAsync(c); + /* We will write replies to this client later, so we can't + * close it directly even if async. */ + if (c == serverTL->current_client) { + c->flags |= CLIENT_CLOSE_AFTER_COMMAND; + } else { + freeClientAsync(c); + } } } ACLFreeUser(u); @@ -377,7 +383,7 @@ int ACLUserCanExecuteFutureCommands(user *u) { * zero, the user flag ALLCOMMANDS is cleared since it is no longer possible * to skip the command bit explicit test. */ void ACLSetUserCommandBit(user *u, unsigned long id, int value) { - uint64_t word, bit; + uint64_t word=0, bit=0; if (ACLGetCommandBitCoordinates(id,&word,&bit) == C_ERR) return; if (value) { u->allowed_commands[word] |= bit; @@ -472,21 +478,68 @@ sds ACLDescribeUserCommandRules(user *u) { ACLSetUser(fakeuser,"-@all",-1); } - /* Try to add or subtract each category one after the other. Often a - * single category will not perfectly match the set of commands into - * it, so at the end we do a final pass adding/removing the single commands - * needed to make the bitmap exactly match. */ - for (int j = 0; ACLCommandCategories[j].flag != 0; j++) { - unsigned long on, off; - ACLCountCategoryBitsForUser(u,&on,&off,ACLCommandCategories[j].name); - if ((additive && on > off) || (!additive && off > on)) { - sds op = sdsnewlen(additive ? "+@" : "-@", 2); - op = sdscat(op,ACLCommandCategories[j].name); - ACLSetUser(fakeuser,op,-1); - rules = sdscatsds(rules,op); - rules = sdscatlen(rules," ",1); - sdsfree(op); + /* Attempt to find a good approximation for categories and commands + * based on the current bits used, by looping over the category list + * and applying the best fit each time. Often a set of categories will not + * perfectly match the set of commands into it, so at the end we do a + * final pass adding/removing the single commands needed to make the bitmap + * exactly match. A temp user is maintained to keep track of categories + * already applied. */ + user tu = {0}; + user *tempuser = &tu; + + /* Keep track of the categories that have been applied, to prevent + * applying them twice. */ + char applied[sizeof(ACLCommandCategories)/sizeof(ACLCommandCategories[0])]; + memset(applied, 0, sizeof(applied)); + + memcpy(tempuser->allowed_commands, + u->allowed_commands, + sizeof(u->allowed_commands)); + while (1) { + int best = -1; + unsigned long mindiff = INT_MAX, maxsame = 0; + for (int j = 0; ACLCommandCategories[j].flag != 0; j++) { + if (applied[j]) continue; + + unsigned long on, off, diff, same; + ACLCountCategoryBitsForUser(tempuser,&on,&off,ACLCommandCategories[j].name); + /* Check if the current category is the best this loop: + * * It has more commands in common with the user than commands + * that are different. + * AND EITHER + * * It has the fewest number of differences + * than the best match we have found so far. + * * OR it matches the fewest number of differences + * that we've seen but it has more in common. */ + diff = additive ? off : on; + same = additive ? on : off; + if (same > diff && + ((diff < mindiff) || (diff == mindiff && same > maxsame))) + { + best = j; + mindiff = diff; + maxsame = same; + } } + + /* We didn't find a match */ + if (best == -1) break; + + sds op = sdsnewlen(additive ? "+@" : "-@", 2); + op = sdscat(op,ACLCommandCategories[best].name); + ACLSetUser(fakeuser,op,-1); + + sds invop = sdsnewlen(additive ? "-@" : "+@", 2); + invop = sdscat(invop,ACLCommandCategories[best].name); + ACLSetUser(tempuser,invop,-1); + + rules = sdscatsds(rules,op); + rules = sdscatlen(rules," ",1); + sdsfree(op); + sdsfree(invop); + + applied[best] = 1; } /* Fix the final ACLs with single commands differences. */ @@ -670,8 +723,8 @@ void ACLAddAllowedSubcommand(user *u, unsigned long id, const char *sub) { * - Disallow the execution of that command * +@ Allow the execution of all the commands in such category * with valid categories are like @admin, @set, @sortedset, ... - * and so forth, see the full list in the server.c file where - * the Redis command table is described and defined. + * and so forth, see the full list in the server.cpp file where + * the KeyDB command table is described and defined. * The special category @all means all the commands, but currently * present in the server, and that will be loaded in the future * via modules. @@ -1099,8 +1152,9 @@ int ACLCheckCommandPerm(client *c, int *keyidxptr) { if (!(c->puser->flags & USER_FLAG_ALLKEYS) && (c->cmd->getkeys_proc || c->cmd->firstkey)) { - int numkeys; - int *keyidx = getKeysFromCommand(c->cmd,c->argv,c->argc,&numkeys); + getKeysResult result = GETKEYS_RESULT_INIT; + int numkeys = getKeysFromCommand(c->cmd,c->argv,c->argc,&result); + int *keyidx = result.keys; for (int j = 0; j < numkeys; j++) { listIter li; listNode *ln; @@ -1121,11 +1175,11 @@ int ACLCheckCommandPerm(client *c, int *keyidxptr) { } if (!match) { if (keyidxptr) *keyidxptr = keyidx[j]; - getKeysFreeResult(keyidx); + getKeysFreeResult(&result); return ACL_DENIED_KEY; } } - getKeysFreeResult(keyidx); + getKeysFreeResult(&result); } /* If we survived all the above checks, the user can execute the @@ -1330,6 +1384,7 @@ sds ACLLoadFromFile(const char *filename) { errors = sdscatprintf(errors, "'%s:%d: username '%s' contains invalid characters. ", g_pserver->acl_filename, linenum, argv[1]); + sdsfreesplitres(argv,argc); continue; } @@ -1914,7 +1969,7 @@ void aclCommand(client *c) { addReplyBulkCString(c,"client-info"); addReplyBulkCBuffer(c,le->cinfo,sdslen(le->cinfo)); } - } else if (!strcasecmp(sub,"help")) { + } else if (c->argc == 2 && !strcasecmp(sub,"help")) { const char *help[] = { "LOAD -- Reload users from the ACL file.", "SAVE -- Save the current config to the ACL file.", diff --git a/src/adlist.c b/src/adlist.c index 7b7b012ce..6d5d77fb3 100644 --- a/src/adlist.c +++ b/src/adlist.c @@ -34,8 +34,9 @@ #include "zmalloc.h" /* Create a new list. The created list can be freed with - * AlFreeList(), but private value of every node need to be freed - * by the user before to call AlFreeList(). + * listRelease(), but private value of every node need to be freed + * by the user before to call listRelease(), or by setting a free method using + * listSetFreeMethod. * * On error, NULL is returned. Otherwise the pointer to the new list. */ list *listCreate(void) @@ -217,8 +218,8 @@ void listRewindTail(list *list, listIter *li) { * listDelNode(), but not to remove other elements. * * The function returns a pointer to the next element of the list, - * or NULL if there are no more elements, so the classical usage patter - * is: + * or NULL if there are no more elements, so the classical usage + * pattern is: * * iter = listGetIterator(list,); * while ((node = listNext(iter)) != NULL) { diff --git a/src/ae.cpp b/src/ae.cpp index 789a6888b..125179c89 100644 --- a/src/ae.cpp +++ b/src/ae.cpp @@ -48,6 +48,7 @@ #include "fastlock.h" #include "zmalloc.h" #include "config.h" +#include "serverassert.h" #ifdef USE_MUTEX thread_local int cOwnLock = 0; @@ -84,8 +85,6 @@ fastlock g_lock("AE (global)"); #endif thread_local aeEventLoop *g_eventLoopThisThread = NULL; -#define AE_ASSERT(x) if (!(x)) do { fprintf(stderr, "AE_ASSERT FAILURE %s: %d\n", __FILE__, __LINE__); *((volatile int*)1) = 1; } while(0) - /* Include the best multiplexing layer supported by this system. * The following should be ordered by performances, descending. */ #ifdef HAVE_EVPORT @@ -140,7 +139,7 @@ void aeProcessCmd(aeEventLoop *eventLoop, int fd, void *, int ) auto cb = read(fd, &cmd, sizeof(aeCommand)); if (cb != sizeof(cmd)) { - AE_ASSERT(errno == EAGAIN); + serverAssert(errno == EAGAIN); break; } switch (cmd.op) @@ -251,8 +250,8 @@ int aeCreateRemoteFileEvent(aeEventLoop *eventLoop, int fd, int mask, auto size = safe_write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd)); if (size != sizeof(cmd)) { - AE_ASSERT(size == sizeof(cmd) || size <= 0); - AE_ASSERT(errno == EAGAIN); + serverAssert(size == sizeof(cmd) || size <= 0); + serverAssert(errno == EAGAIN); ret = AE_ERR; } @@ -307,9 +306,14 @@ int aePostFunction(aeEventLoop *eventLoop, std::function fn, bool fSynch } auto size = write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd)); - if (size != sizeof(cmd)) + if (!(!size || size == sizeof(cmd))) { + printf("Last error: %d\n", errno); + } + serverAssert(!size || size == sizeof(cmd)); + + if (size == 0) return AE_ERR; - AE_ASSERT(size == sizeof(cmd)); + int ret = AE_OK; if (fSynchronous) { @@ -352,7 +356,7 @@ aeEventLoop *aeCreateEventLoop(int setsize) { goto err; eventLoop->fdCmdRead = rgfd[0]; eventLoop->fdCmdWrite = rgfd[1]; - fcntl(eventLoop->fdCmdWrite, F_SETFL, O_NONBLOCK); + //fcntl(eventLoop->fdCmdWrite, F_SETFL, O_NONBLOCK); fcntl(eventLoop->fdCmdRead, F_SETFL, O_NONBLOCK); eventLoop->cevents = 0; aeCreateFileEvent(eventLoop, eventLoop->fdCmdRead, AE_READABLE|AE_READ_THREADSAFE, aeProcessCmd, NULL); @@ -373,6 +377,11 @@ int aeGetSetSize(aeEventLoop *eventLoop) { return eventLoop->setsize; } +/* Return the current EventLoop. */ +aeEventLoop *aeGetCurrentEventLoop(){ + return g_eventLoopThisThread; +} + /* Tells the next iteration/s of the event processing to set timeout of 0. */ void aeSetDontWait(aeEventLoop *eventLoop, int noWait) { if (noWait) @@ -389,7 +398,7 @@ void aeSetDontWait(aeEventLoop *eventLoop, int noWait) { * * Otherwise AE_OK is returned and the operation is successful. */ int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) { - AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); + serverAssert(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); int i; if (setsize == eventLoop->setsize) return AE_OK; @@ -427,14 +436,14 @@ extern "C" void aeDeleteEventLoop(aeEventLoop *eventLoop) { } extern "C" void aeStop(aeEventLoop *eventLoop) { - AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); + serverAssert(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); eventLoop->stop = 1; } extern "C" int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask, aeFileProc *proc, void *clientData) { - AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); + serverAssert(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); if (fd >= eventLoop->setsize) { errno = ERANGE; return AE_ERR; @@ -463,12 +472,12 @@ void aeDeleteFileEventAsync(aeEventLoop *eventLoop, int fd, int mask) cmd.mask = mask; cmd.fLock = true; auto cb = write(eventLoop->fdCmdWrite, &cmd, sizeof(cmd)); - AE_ASSERT(cb == sizeof(cmd)); + serverAssert(cb == sizeof(cmd)); } extern "C" void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask) { - AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); + serverAssert(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); if (fd >= eventLoop->setsize) return; aeFileEvent *fe = &eventLoop->events[fd]; if (fe->mask == AE_NONE) return; @@ -526,7 +535,7 @@ extern "C" long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long millise aeTimeProc *proc, void *clientData, aeEventFinalizerProc *finalizerProc) { - AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); + serverAssert(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); long long id = eventLoop->timeEventNextId++; aeTimeEvent *te; @@ -548,7 +557,7 @@ extern "C" long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long millise extern "C" int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id) { - AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); + serverAssert(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); aeTimeEvent *te = eventLoop->timeEventHead; while(te) { if (te->id == id) { @@ -573,7 +582,7 @@ extern "C" int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id) */ static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop) { - AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); + serverAssert(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); aeTimeEvent *te = eventLoop->timeEventHead; aeTimeEvent *nearest = NULL; @@ -589,7 +598,7 @@ static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop) /* Process time events */ static int processTimeEvents(aeEventLoop *eventLoop) { - std::unique_lock ulock(g_lock); + std::unique_lock ulock(g_lock, std::defer_lock); int processed = 0; aeTimeEvent *te; long long maxId; @@ -634,8 +643,10 @@ static int processTimeEvents(aeEventLoop *eventLoop) { eventLoop->timeEventHead = te->next; if (te->next) te->next->prev = te->prev; - if (te->finalizerProc) + if (te->finalizerProc) { + if (!ulock.owns_lock()) ulock.lock(); te->finalizerProc(eventLoop, te->clientData); + } zfree(te); te = next; continue; @@ -654,6 +665,7 @@ static int processTimeEvents(aeEventLoop *eventLoop) { if (now_sec > te->when_sec || (now_sec == te->when_sec && now_ms >= te->when_ms)) { + if (!ulock.owns_lock()) ulock.lock(); int retval; id = te->id; @@ -746,7 +758,7 @@ extern "C" void ProcessEventCore(aeEventLoop *eventLoop, aeFileEvent *fe, int ma * The function returns the number of events processed. */ int aeProcessEvents(aeEventLoop *eventLoop, int flags) { - AE_ASSERT(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); + serverAssert(g_eventLoopThisThread == NULL || g_eventLoopThisThread == eventLoop); int processed = 0, numevents; /* Nothing to do? return ASAP */ @@ -870,9 +882,9 @@ void aeMain(aeEventLoop *eventLoop) { ulock.lock(); eventLoop->beforesleep(eventLoop); } - AE_ASSERT(!aeThreadOwnsLock()); // we should have relinquished it after processing + serverAssert(!aeThreadOwnsLock()); // we should have relinquished it after processing aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP); - AE_ASSERT(!aeThreadOwnsLock()); // we should have relinquished it after processing + serverAssert(!aeThreadOwnsLock()); // we should have relinquished it after processing } } diff --git a/src/ae.h b/src/ae.h index fdd444d3a..e77abb01f 100644 --- a/src/ae.h +++ b/src/ae.h @@ -160,6 +160,7 @@ const char *aeGetApiName(void); void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep, int flags); void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep, int flags); int aeGetSetSize(aeEventLoop *eventLoop); +aeEventLoop *aeGetCurrentEventLoop(); int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); void aeSetDontWait(aeEventLoop *eventLoop, int noWait); diff --git a/src/ae_evport.c b/src/ae_evport.c index 7b7fbe28e..744e1a6bb 100644 --- a/src/ae_evport.c +++ b/src/ae_evport.c @@ -232,7 +232,7 @@ static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) { /* * ENOMEM is a potentially transient condition, but the kernel won't * generally return it unless things are really bad. EAGAIN indicates - * we've reached an resource limit, for which it doesn't make sense to + * we've reached a resource limit, for which it doesn't make sense to * retry (counter-intuitively). All other errors indicate a bug. In any * of these cases, the best we can do is to abort. */ diff --git a/src/aelocker.h b/src/aelocker.h index e854f907b..777be4832 100644 --- a/src/aelocker.h +++ b/src/aelocker.h @@ -9,7 +9,7 @@ public: { } - void arm(client *c, bool fIfNeeded = false) // if a client is passed, then the client is already locked + void arm(client *c = nullptr, bool fIfNeeded = false) // if a client is passed, then the client is already locked { if (m_fArmed) return; diff --git a/src/aof.cpp b/src/aof.cpp index e37bc67d6..48ac1ac4d 100644 --- a/src/aof.cpp +++ b/src/aof.cpp @@ -566,7 +566,7 @@ sds catAppendOnlyGenericCommand(sds dst, int argc, robj **argv) { return dst; } -/* Create the sds representation of an PEXPIREAT command, using +/* Create the sds representation of a PEXPIREAT command, using * 'seconds' as time to live and 'cmd' to understand what command * we are translating into a PEXPIREAT. * @@ -642,7 +642,7 @@ sds catAppendOnlyExpireMemberAtCommand(sds buf, struct redisCommand *cmd, robj * when += mstime(); robj *argvNew[4]; - argvNew[0] = createStringObject("PEXPIREMEMBERAT",15); + argvNew[0] = shared.pexpirememberat; argvNew[1] = argv[1]; argvNew[2] = argv[2]; argvNew[3] = createStringObjectFromLongLong(when); @@ -752,6 +752,7 @@ struct client *createAOFClient(void) { c->querybuf_peak = 0; c->argc = 0; c->argv = NULL; + c->argv_len_sum = 0; c->bufpos = 0; c->flags = 0; c->fPendingAsyncWrite = FALSE; @@ -781,6 +782,7 @@ void freeFakeClientArgv(struct client *c) { for (j = 0; j < c->argc; j++) decrRefCount(c->argv[j]); zfree(c->argv); + c->argv_len_sum = 0; } void freeFakeClient(struct client *c) { @@ -1159,7 +1161,7 @@ int rewriteSortedSetObject(rio *r, robj *key, robj *o) { } } else if (o->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = (zset*)ptrFromObj(o); - dictIterator *di = dictGetIterator(zs->pdict); + dictIterator *di = dictGetIterator(zs->dict); dictEntry *de; while((de = dictNext(di)) != NULL) { @@ -1292,16 +1294,24 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { * the ID, the second is an array of field-value pairs. */ /* Emit the XADD ...fields... command. */ - if (rioWriteBulkCount(r,'*',3+numfields*2) == 0) return 0; - if (rioWriteBulkString(r,"XADD",4) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - if (rioWriteBulkStreamID(r,&id) == 0) return 0; + if (!rioWriteBulkCount(r,'*',3+numfields*2) || + !rioWriteBulkString(r,"XADD",4) || + !rioWriteBulkObject(r,key) || + !rioWriteBulkStreamID(r,&id)) + { + streamIteratorStop(&si); + return 0; + } while(numfields--) { unsigned char *field, *value; int64_t field_len, value_len; streamIteratorGetField(&si,&field,&value,&field_len,&value_len); - if (rioWriteBulkString(r,(char*)field,field_len) == 0) return 0; - if (rioWriteBulkString(r,(char*)value,value_len) == 0) return 0; + if (!rioWriteBulkString(r,(char*)field,field_len) || + !rioWriteBulkString(r,(char*)value,value_len)) + { + streamIteratorStop(&si); + return 0; + } } } } else { @@ -1309,22 +1319,30 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { * the key we are serializing is an empty string, which is possible * for the Stream type. */ id.ms = 0; id.seq = 1; - if (rioWriteBulkCount(r,'*',7) == 0) return 0; - if (rioWriteBulkString(r,"XADD",4) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - if (rioWriteBulkString(r,"MAXLEN",6) == 0) return 0; - if (rioWriteBulkString(r,"0",1) == 0) return 0; - if (rioWriteBulkStreamID(r,&id) == 0) return 0; - if (rioWriteBulkString(r,"x",1) == 0) return 0; - if (rioWriteBulkString(r,"y",1) == 0) return 0; + if (!rioWriteBulkCount(r,'*',7) || + !rioWriteBulkString(r,"XADD",4) || + !rioWriteBulkObject(r,key) || + !rioWriteBulkString(r,"MAXLEN",6) || + !rioWriteBulkString(r,"0",1) || + !rioWriteBulkStreamID(r,&id) || + !rioWriteBulkString(r,"x",1) || + !rioWriteBulkString(r,"y",1)) + { + streamIteratorStop(&si); + return 0; + } } /* Append XSETID after XADD, make sure lastid is correct, * in case of XDEL lastid. */ - if (rioWriteBulkCount(r,'*',3) == 0) return 0; - if (rioWriteBulkString(r,"XSETID",6) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - if (rioWriteBulkStreamID(r,&s->last_id) == 0) return 0; + if (!rioWriteBulkCount(r,'*',3) || + !rioWriteBulkString(r,"XSETID",6) || + !rioWriteBulkObject(r,key) || + !rioWriteBulkStreamID(r,&s->last_id)) + { + streamIteratorStop(&si); + return 0; + } /* Create all the stream consumer groups. */ @@ -1335,12 +1353,17 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { while(raxNext(&ri)) { streamCG *group = (streamCG*)ri.data; /* Emit the XGROUP CREATE in order to create the group. */ - if (rioWriteBulkCount(r,'*',5) == 0) return 0; - if (rioWriteBulkString(r,"XGROUP",6) == 0) return 0; - if (rioWriteBulkString(r,"CREATE",6) == 0) return 0; - if (rioWriteBulkObject(r,key) == 0) return 0; - if (rioWriteBulkString(r,(char*)ri.key,ri.key_len) == 0) return 0; - if (rioWriteBulkStreamID(r,&group->last_id) == 0) return 0; + if (!rioWriteBulkCount(r,'*',5) || + !rioWriteBulkString(r,"XGROUP",6) || + !rioWriteBulkString(r,"CREATE",6) || + !rioWriteBulkObject(r,key) || + !rioWriteBulkString(r,(char*)ri.key,ri.key_len) || + !rioWriteBulkStreamID(r,&group->last_id)) + { + raxStop(&ri); + streamIteratorStop(&si); + return 0; + } /* Generate XCLAIMs for each consumer that happens to * have pending entries. Empty consumers have no semantical @@ -1361,6 +1384,10 @@ int rewriteStreamObject(rio *r, robj *key, robj *o) { ri.key_len,consumer, ri_pel.key,nack) == 0) { + raxStop(&ri_pel); + raxStop(&ri_cons); + raxStop(&ri); + streamIteratorStop(&si); return 0; } } @@ -1415,7 +1442,7 @@ int rewriteAppendOnlyFileRio(rio *aof) { for (j = 0; j < cserver.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; redisDb *db = g_pserver->db+j; - dict *d = db->pdict; + dict *d = db->dict; if (dictSize(d) == 0) continue; di = dictGetSafeIterator(d); @@ -1502,7 +1529,7 @@ werr: * are inserted using a single command. */ int rewriteAppendOnlyFile(char *filename) { rio aof; - FILE *fp; + FILE *fp = NULL; char tmpfile[256]; char byte; int nodata = 0; @@ -1580,9 +1607,10 @@ int rewriteAppendOnlyFile(char *filename) { goto werr; /* Make sure data will not remain on the OS's output buffers */ - if (fflush(fp) == EOF) goto werr; - if (fsync(fileno(fp)) == -1) goto werr; - if (fclose(fp) == EOF) goto werr; + if (fflush(fp)) goto werr; + if (fsync(fileno(fp))) goto werr; + if (fclose(fp)) { fp = NULL; goto werr; } + fp = NULL; /* Use RENAME to make sure the DB file is changed atomically only * if the generate DB file is ok. */ @@ -1598,7 +1626,7 @@ int rewriteAppendOnlyFile(char *filename) { werr: serverLog(LL_WARNING,"Write error writing append only file on disk: %s", strerror(errno)); - fclose(fp); + if (fp) fclose(fp); unlink(tmpfile); stopSaving(0); return C_ERR; @@ -1712,7 +1740,7 @@ int rewriteAppendOnlyFileBackground(void) { if (hasActiveChildProcess()) return C_ERR; if (aofCreatePipes() != C_OK) return C_ERR; openChildInfoPipe(); - if ((childpid = redisFork()) == 0) { + if ((childpid = redisFork(CHILD_TYPE_AOF)) == 0) { char tmpfile[256]; /* Child */ @@ -1720,7 +1748,7 @@ int rewriteAppendOnlyFileBackground(void) { redisSetCpuAffinity(g_pserver->aof_rewrite_cpulist); snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); if (rewriteAppendOnlyFile(tmpfile) == C_OK) { - sendChildCOWInfo(CHILD_INFO_TYPE_AOF, "AOF rewrite"); + sendChildCOWInfo(CHILD_TYPE_AOF, "AOF rewrite"); exitFromChild(0); } else { exitFromChild(1); @@ -1740,6 +1768,7 @@ int rewriteAppendOnlyFileBackground(void) { g_pserver->aof_rewrite_scheduled = 0; g_pserver->aof_rewrite_time_start = time(NULL); g_pserver->aof_child_pid = childpid; + updateDictResizePolicy(); /* We set appendseldb to -1 in order to force the next call to the * feedAppendOnlyFile() to issue a SELECT command, so the differences * accumulated by the parent into g_pserver->aof_rewrite_buf will start @@ -1769,10 +1798,10 @@ void aofRemoveTempFile(pid_t childpid) { char tmpfile[256]; snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) childpid); - unlink(tmpfile); + bg_unlink(tmpfile); snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) childpid); - unlink(tmpfile); + bg_unlink(tmpfile); } /* Update the g_pserver->aof_current_size field explicitly using stat(2) @@ -1927,7 +1956,7 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) { "Background AOF rewrite terminated with error"); } else { /* SIGUSR1 is whitelisted, so we have a way to kill a child without - * tirggering an error condition. */ + * triggering an error condition. */ if (bysignal != SIGUSR1) g_pserver->aof_lastbgrewrite_status = C_ERR; diff --git a/src/asciilogo.h b/src/asciilogo.h index 1cbcce142..43ad70e79 100644 --- a/src/asciilogo.h +++ b/src/asciilogo.h @@ -28,14 +28,21 @@ */ const char *ascii_logo = -" \n" -" \n" -" KeyDB %s (%s/%d) %s bit\n" -" \n" -" Running in %s mode\n" -" Port: %d\n" -" PID: %ld\n" -" \n" -" %s\n" -" \n" +" \n" +" _ \n" +" _-(+)-_ \n" +" _-- / \\ --_ \n" +" _-- / \\ --_ KeyDB %s (%s/%d) %s bit \n" +" __-- / \\ --__ \n" +" (+) _ / \\ _ (+) Running in %s mode\n" +" | -- / \\ -- | Port: %d\n" +" | /--_ _ _--\\ | PID: %ld\n" +" | / -(+)- \\ | \n" +" | / | \\ | https://docs.keydb.dev \n" +" | / | \\ | \n" +" | / | \\ | \n" +" (+)_ -- -- -- | -- -- -- _(+) \n" +" --_ | _-- \n" +" --_ | _-- \n" +" -(+)- %s\n" " \n"; diff --git a/src/atomicvar.h b/src/atomicvar.h index 160056cd7..ecd26ad70 100644 --- a/src/atomicvar.h +++ b/src/atomicvar.h @@ -21,7 +21,7 @@ * * Never use return value from the macros, instead use the AtomicGetIncr() * if you need to get the current value and increment it atomically, like - * in the followign example: + * in the following example: * * long oldvalue; * atomicGetIncr(myvar,oldvalue,1); diff --git a/src/bio.cpp b/src/bio.cpp index b3e9d0927..2c28e57a0 100644 --- a/src/bio.cpp +++ b/src/bio.cpp @@ -168,10 +168,7 @@ void *bioProcessBackgroundJobs(void *arg) { redisSetCpuAffinity(g_pserver->bio_cpulist); - /* Make the thread killable at any time, so that bioKillThreads() - * can work reliably. */ - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); + makeThreadKillable(); pthread_mutex_lock(&bio_mutex[type]); /* Block SIGALRM so we are sure that only the main thread will @@ -206,7 +203,7 @@ void *bioProcessBackgroundJobs(void *arg) { /* What we free changes depending on what arguments are set: * arg1 -> free the object at pointer. * arg2 & arg3 -> free two dictionaries (a Redis DB). - * only arg3 -> free the skiplist. */ + * only arg3 -> free the radix tree. */ if (job->arg1) lazyfreeFreeObjectFromBioThread((robj*)job->arg1); else if (job->arg2 && job->arg3) @@ -268,10 +265,11 @@ void bioKillThreads(void) { int err, j; for (j = 0; j < BIO_NUM_OPS; j++) { + if (bio_threads[j] == pthread_self()) continue; if (bio_threads[j] && pthread_cancel(bio_threads[j]) == 0) { if ((err = pthread_join(bio_threads[j],NULL)) != 0) { serverLog(LL_WARNING, - "Bio thread for job type #%d can be joined: %s", + "Bio thread for job type #%d can not be joined: %s", j, strerror(err)); } else { serverLog(LL_WARNING, diff --git a/src/bitops.cpp b/src/bitops.cpp index 3643a4f49..8b26114db 100644 --- a/src/bitops.cpp +++ b/src/bitops.cpp @@ -36,7 +36,7 @@ /* Count number of bits set in the binary array pointed by 's' and long * 'count' bytes. The implementation of this function is required to - * work with a input string length up to 512 MB. */ + * work with an input string length up to 512 MB. */ size_t redisPopcount(const void *s, long count) { size_t bits = 0; unsigned char *p = (unsigned char*)s; @@ -107,7 +107,7 @@ long redisBitpos(const void *s, unsigned long count, int bit) { int found; /* Process whole words first, seeking for first word that is not - * all ones or all zeros respectively if we are lookig for zeros + * all ones or all zeros respectively if we are looking for zeros * or ones. This is much faster with large strings having contiguous * blocks of 1 or 0 bits compared to the vanilla bit per bit processing. * @@ -257,7 +257,7 @@ int64_t getSignedBitfield(unsigned char *p, uint64_t offset, uint64_t bits) { /* If the top significant bit is 1, propagate it to all the * higher bits for two's complement representation of signed * integers. */ - if (value & ((uint64_t)1 << (bits-1))) + if (bits < 64 && (value & ((uint64_t)1 << (bits-1)))) value |= ((uint64_t)-1) << bits; return value; } @@ -356,7 +356,6 @@ int checkSignedBitfieldOverflow(int64_t value, int64_t incr, uint64_t bits, int handle_wrap: { - uint64_t mask = ((uint64_t)-1) << bits; uint64_t msb = (uint64_t)1 << (bits-1); uint64_t a = value, b = incr, c; c = a+b; /* Perform addition as unsigned so that's defined. */ @@ -364,10 +363,13 @@ handle_wrap: /* If the sign bit is set, propagate to all the higher order * bits, to cap the negative value. If it's clear, mask to * the positive integer limit. */ - if (c & msb) { - c |= mask; - } else { - c &= ~mask; + if (bits < 64) { + uint64_t mask = ((uint64_t)-1) << bits; + if (c & msb) { + c |= mask; + } else { + c &= ~mask; + } } *limit = c; } @@ -496,7 +498,7 @@ robj *lookupStringForBitCommand(client *c, size_t maxbit) { * in 'len'. The user is required to pass (likely stack allocated) buffer * 'llbuf' of at least LONG_STR_SIZE bytes. Such a buffer is used in the case * the object is integer encoded in order to provide the representation - * without usign heap allocation. + * without using heap allocation. * * The function returns the pointer to the object array of bytes representing * the string it contains, that may be a pointer to 'llbuf' or to the @@ -831,11 +833,12 @@ void bitopCommand(client *c) { setKey(c,c->db,targetkey,o); notifyKeyspaceEvent(NOTIFY_STRING,"set",targetkey,c->db->id); decrRefCount(o); + g_pserver->dirty++; } else if (dbDelete(c->db,targetkey)) { signalModifiedKey(c,c->db,targetkey); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",targetkey,c->db->id); + g_pserver->dirty++; } - g_pserver->dirty++; addReplyLongLong(c,maxlen); /* Return the output string length in bytes. */ } diff --git a/src/blocked.cpp b/src/blocked.cpp index 7f96fcfec..ecde109dc 100644 --- a/src/blocked.cpp +++ b/src/blocked.cpp @@ -53,7 +53,7 @@ * to 0, no timeout is processed). * It usually just needs to send a reply to the client. * - * When implementing a new type of blocking opeation, the implementation + * When implementing a new type of blocking operation, the implementation * should modify unblockClient() and replyToBlockedClientTimedOut() in order * to handle the btype-specific behavior of this two functions. * If the blocking operation waits for certain keys to change state, the @@ -128,7 +128,7 @@ void processUnblockedClients(int iel) { /* This function will schedule the client for reprocessing at a safe time. * - * This is useful when a client was blocked for some reason (blocking opeation, + * This is useful when a client was blocked for some reason (blocking operation, * CLIENT PAUSE, or whatever), because it may end with some accumulated query * buffer that needs to be processed ASAP: * @@ -188,9 +188,9 @@ void replyToBlockedClientTimedOut(client *c) { if (c->btype == BLOCKED_LIST || c->btype == BLOCKED_ZSET || c->btype == BLOCKED_STREAM) { - addReplyNullArrayAsync(c); + addReplyNullArray(c); } else if (c->btype == BLOCKED_WAIT) { - addReplyLongLongAsync(c,replicationCountAcksByOffset(c->bpop.reploffset)); + addReplyLongLong(c,replicationCountAcksByOffset(c->bpop.reploffset)); } else if (c->btype == BLOCKED_MODULE) { moduleBlockedClientTimedOut(c); } else { @@ -216,7 +216,7 @@ void disconnectAllBlockedClients(void) { fastlock_lock(&c->lock); if (c->flags & CLIENT_BLOCKED) { - addReplySdsAsync(c,sdsnew( + addReplySds(c,sdsnew( "-UNBLOCKED force unblock from blocking operation, " "instance state changed (master -> replica?)\r\n")); unblockClient(c); @@ -373,7 +373,7 @@ void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) { /* If the group was not found, send an error * to the consumer. */ if (!group) { - addReplyErrorAsync(receiver, + addReplyError(receiver, "-NOGROUP the consumer group this client " "was blocked on no longer exists"); unblockClient(receiver); @@ -404,12 +404,12 @@ void serveClientsBlockedOnStreamKey(robj *o, readyList *rl) { * extracted from it. Wrapped in a single-item * array, since we have just one key. */ if (receiver->resp == 2) { - addReplyArrayLenAsync(receiver,1); - addReplyArrayLenAsync(receiver,2); + addReplyArrayLen(receiver,1); + addReplyArrayLen(receiver,2); } else { - addReplyMapLenAsync(receiver,1); + addReplyMapLen(receiver,1); } - addReplyBulkAsync(receiver,rl->key); + addReplyBulk(receiver,rl->key); streamPropInfo pi = { rl->key, @@ -522,7 +522,7 @@ void handleClientsBlockedOnKeys(void) { serverTL->fixed_time_expire++; updateCachedTime(0); - /* Serve clients blocked on list key. */ + /* Serve clients blocked on the key. */ robj *o = lookupKeyWrite(rl->db,rl->key); if (o != NULL) { @@ -672,6 +672,13 @@ void signalKeyAsReady(redisDb *db, robj *key) { /* Key was already signaled? No need to queue it again. */ if (dictFind(db->ready_keys,key) != NULL) return; + if (key->getrefcount() == OBJ_STATIC_REFCOUNT) { + // Sometimes a key may be stack allocated, we'll need to dupe it + robj *newKey = createStringObject(szFromObj(key), sdslen(szFromObj(key))); + newKey->setrefcount(0); // Start with 0 but don't free + key = newKey; + } + /* Ok, we need to queue this key into g_pserver->ready_keys. */ rl = (readyList*)zmalloc(sizeof(*rl), MALLOC_SHARED); rl->key = key; diff --git a/src/childinfo.cpp b/src/childinfo.cpp index 66ad8b8fd..77900ac11 100644 --- a/src/childinfo.cpp +++ b/src/childinfo.cpp @@ -76,11 +76,11 @@ void receiveChildInfo(void) { if (read(g_pserver->child_info_pipe[0],&g_pserver->child_info_data,wlen) == wlen && g_pserver->child_info_data.magic == CHILD_INFO_MAGIC) { - if (g_pserver->child_info_data.process_type == CHILD_INFO_TYPE_RDB) { + if (g_pserver->child_info_data.process_type == CHILD_TYPE_RDB) { g_pserver->stat_rdb_cow_bytes = g_pserver->child_info_data.cow_size; - } else if (g_pserver->child_info_data.process_type == CHILD_INFO_TYPE_AOF) { + } else if (g_pserver->child_info_data.process_type == CHILD_TYPE_AOF) { g_pserver->stat_aof_cow_bytes = g_pserver->child_info_data.cow_size; - } else if (g_pserver->child_info_data.process_type == CHILD_INFO_TYPE_MODULE) { + } else if (g_pserver->child_info_data.process_type == CHILD_TYPE_MODULE) { g_pserver->stat_module_cow_bytes = g_pserver->child_info_data.cow_size; } } diff --git a/src/cluster.cpp b/src/cluster.cpp index a91d0d62a..e60807180 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -77,6 +77,9 @@ uint64_t clusterGetMaxEpoch(void); int clusterBumpConfigEpochWithoutConsensus(void); void moduleCallClusterReceivers(const char *sender_id, uint64_t module_id, uint8_t type, const unsigned char *payload, uint32_t len); +#define RCVBUF_INIT_LEN 1024 +#define RCVBUF_MAX_PREALLOC (1<<20) /* 1MB */ + struct redisMaster *getFirstMaster() { serverAssert(listLength(g_pserver->masters) <= 1); @@ -394,7 +397,7 @@ void clusterSaveConfigOrDie(int do_fsync) { } } -/* Lock the cluster config using flock(), and leaks the file descritor used to +/* Lock the cluster config using flock(), and leaks the file descriptor used to * acquire the lock so that the file will be locked forever. * * This works because we always update nodes.conf with a new version @@ -435,7 +438,15 @@ int clusterLockConfig(char *filename) { return C_ERR; } /* Lock acquired: leak the 'fd' by not closing it, so that we'll retain the - * lock to the file as long as the process exists. */ + * lock to the file as long as the process exists. + * + * After fork, the child process will get the fd opened by the parent process, + * we need save `fd` to `cluster_config_file_lock_fd`, so that in redisFork(), + * it will be closed in the child process. + * If it is not closed, when the main process is killed -9, but the child process + * (redis-aof-rewrite) is still alive, the fd(lock) will still be held by the + * child process, and the main process will fail to get lock, means fail to start. */ + g_pserver->cluster_config_file_lock_fd = fd; #endif /* __sun */ return C_OK; @@ -490,6 +501,7 @@ void clusterInit(void) { /* Lock the cluster config file to make sure every node uses * its own nodes.conf. */ + g_pserver->cluster_config_file_lock_fd = -1; if (clusterLockConfig(g_pserver->cluster_configfile) == C_ERR) exit(1); @@ -557,13 +569,13 @@ void clusterInit(void) { /* Reset a node performing a soft or hard reset: * - * 1) All other nodes are forget. + * 1) All other nodes are forgotten. * 2) All the assigned / open slots are released. * 3) If the node is a slave, it turns into a master. - * 5) Only for hard reset: a new Node ID is generated. - * 6) Only for hard reset: currentEpoch and configEpoch are set to 0. - * 7) The new configuration is saved and the cluster state updated. - * 8) If the node was a slave, the whole data set is flushed away. */ + * 4) Only for hard reset: a new Node ID is generated. + * 5) Only for hard reset: currentEpoch and configEpoch are set to 0. + * 6) The new configuration is saved and the cluster state updated. + * 7) If the node was a slave, the whole data set is flushed away. */ void clusterReset(int hard) { dictIterator *di; dictEntry *de; @@ -630,7 +642,8 @@ clusterLink *createClusterLink(clusterNode *node) { clusterLink *link = (clusterLink*)zmalloc(sizeof(*link), MALLOC_LOCAL); link->ctime = mstime(); link->sndbuf = sdsempty(); - link->rcvbuf = sdsempty(); + link->rcvbuf = (char*)zmalloc(link->rcvbuf_alloc = RCVBUF_INIT_LEN); + link->rcvbuf_len = 0; link->node = node; link->conn = NULL; return link; @@ -657,7 +670,7 @@ void freeClusterLink(clusterLink *link) { link->conn = NULL; } sdsfree(link->sndbuf); - sdsfree(link->rcvbuf); + zfree(link->rcvbuf); if (link->node) link->node->link = NULL; zfree(link); @@ -675,7 +688,7 @@ static void clusterConnAcceptHandler(connection *conn) { /* Create a link object we use to handle the connection. * It gets passed to the readable handler when data is available. - * Initiallly the link->node pointer is set to NULL as we don't know + * Initially the link->node pointer is set to NULL as we don't know * which node is, but the right node is references once we know the * node identity. */ link = createClusterLink(NULL); @@ -708,7 +721,17 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) { return; } - connection *conn = g_pserver->tls_cluster ? connCreateAcceptedTLS(cfd,1) : connCreateAcceptedSocket(cfd); + connection *conn = g_pserver->tls_cluster ? + connCreateAcceptedTLS(cfd, TLS_CLIENT_AUTH_YES) : connCreateAcceptedSocket(cfd); + + /* Make sure connection is not in an error state */ + if (connGetState(conn) != CONN_STATE_ACCEPTING) { + serverLog(LL_VERBOSE, + "Error creating an accepting connection for cluster node: %s", + connGetLastError(conn)); + connClose(conn); + return; + } connNonBlock(conn); connEnableTcpNoDelay(conn); @@ -729,6 +752,16 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) { } } +/* Return the approximated number of sockets we are using in order to + * take the cluster bus connections. */ +unsigned long getClusterConnectionsCount(void) { + /* We decrement the number of nodes by one, since there is the + * "myself" node too in the list. Each node uses two file descriptors, + * one incoming and one outgoing, thus the multiplication by 2. */ + return g_pserver->cluster_enabled ? + ((dictSize(g_pserver->cluster->nodes)-1)*2) : 0; +} + /* ----------------------------------------------------------------------------- * Key space handling * -------------------------------------------------------------------------- */ @@ -1069,7 +1102,7 @@ uint64_t clusterGetMaxEpoch(void) { * 3) Persist the configuration on disk before sending packets with the * new configuration. * - * If the new config epoch is generated and assigend, C_OK is returned, + * If the new config epoch is generated and assigned, C_OK is returned, * otherwise C_ERR is returned (since the node has already the greatest * configuration around) and no operation is performed. * @@ -1142,7 +1175,7 @@ int clusterBumpConfigEpochWithoutConsensus(void) { * * In general we want a system that eventually always ends with different * masters having different configuration epochs whatever happened, since - * nothign is worse than a split-brain condition in a distributed system. + * nothing is worse than a split-brain condition in a distributed system. * * BEHAVIOR * @@ -1201,7 +1234,7 @@ void clusterHandleConfigEpochCollision(clusterNode *sender) { * entries from the black list. This is an O(N) operation but it is not a * problem since add / exists operations are called very infrequently and * the hash table is supposed to contain very little elements at max. - * However without the cleanup during long uptimes and with some automated + * However without the cleanup during long uptime and with some automated * node add/removal procedures, entries could accumulate. */ void clusterBlacklistCleanup(void) { dictIterator *di; @@ -1292,8 +1325,11 @@ void markNodeAsFailingIfNeeded(clusterNode *node) { node->fail_time = mstime(); /* Broadcast the failing node name to everybody, forcing all the other - * reachable nodes to flag the node as FAIL. */ - if (nodeIsMaster(myself)) clusterSendFail(node->name); + * reachable nodes to flag the node as FAIL. + * We do that even if this node is a replica and not a master: anyway + * the failing state is triggered collecting failure reports from masters, + * so here the replica is only helping propagating this status. */ + clusterSendFail(node->name); clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE|CLUSTER_TODO_SAVE_CONFIG); } @@ -1352,12 +1388,12 @@ int clusterHandshakeInProgress(char *ip, int port, int cport) { return de != NULL; } -/* Start an handshake with the specified address if there is not one +/* Start a handshake with the specified address if there is not one * already in progress. Returns non-zero if the handshake was actually * started. On error zero is returned and errno is set to one of the * following values: * - * EAGAIN - There is already an handshake in progress for this address. + * EAGAIN - There is already a handshake in progress for this address. * EINVAL - IP or port are not valid. */ int clusterStartHandshake(char *ip, int port, int cport) { clusterNode *n; @@ -1738,7 +1774,7 @@ int clusterProcessPacket(clusterLink *link) { /* Perform sanity checks */ if (totlen < 16) return 1; /* At least signature, version, totlen, count. */ - if (totlen > sdslen(link->rcvbuf)) return 1; + if (totlen > link->rcvbuf_len) return 1; if (ntohs(hdr->ver) != CLUSTER_PROTO_VER) { /* Can't handle messages of different versions. */ @@ -1786,7 +1822,7 @@ int clusterProcessPacket(clusterLink *link) { } else if (type == CLUSTERMSG_TYPE_MODULE) { uint32_t explen = sizeof(clusterMsg)-sizeof(union clusterMsgData); - explen += sizeof(clusterMsgDataPublish) - + explen += sizeof(clusterMsgModule) - 3 + ntohl(hdr->data.module.msg.len); if (totlen != explen) return 1; } @@ -1803,7 +1839,7 @@ int clusterProcessPacket(clusterLink *link) { if (sender) sender->data_received = now; if (sender && !nodeInHandshake(sender)) { - /* Update our curretEpoch if we see a newer epoch in the cluster. */ + /* Update our currentEpoch if we see a newer epoch in the cluster. */ senderCurrentEpoch = ntohu64(hdr->currentEpoch); senderConfigEpoch = ntohu64(hdr->configEpoch); if (senderCurrentEpoch > g_pserver->cluster->currentEpoch) @@ -2295,7 +2331,7 @@ void clusterReadHandler(connection *conn) { unsigned int readlen, rcvbuflen; while(1) { /* Read as long as there is data to read. */ - rcvbuflen = sdslen(link->rcvbuf); + rcvbuflen = link->rcvbuf_len; if (rcvbuflen < 8) { /* First, obtain the first 8 bytes to get the full message * length. */ @@ -2331,7 +2367,15 @@ void clusterReadHandler(connection *conn) { return; } else { /* Read data and recast the pointer to the new buffer. */ - link->rcvbuf = sdscatlen(link->rcvbuf,buf,nread); + size_t unused = link->rcvbuf_alloc - link->rcvbuf_len; + if ((size_t)nread > unused) { + size_t required = link->rcvbuf_len + nread; + /* If less than 1mb, grow to twice the needed size, if larger grow by 1mb. */ + link->rcvbuf_alloc = required < RCVBUF_MAX_PREALLOC ? required * 2: required + RCVBUF_MAX_PREALLOC; + link->rcvbuf = (char*)zrealloc(link->rcvbuf, link->rcvbuf_alloc); + } + memcpy(link->rcvbuf + link->rcvbuf_len, buf, nread); + link->rcvbuf_len += nread; hdr = (clusterMsg*) link->rcvbuf; rcvbuflen += nread; } @@ -2339,8 +2383,11 @@ void clusterReadHandler(connection *conn) { /* Total length obtained? Process this packet. */ if (rcvbuflen >= 8 && rcvbuflen == ntohl(hdr->totlen)) { if (clusterProcessPacket(link)) { - sdsfree(link->rcvbuf); - link->rcvbuf = sdsempty(); + if (link->rcvbuf_alloc > RCVBUF_INIT_LEN) { + zfree(link->rcvbuf); + link->rcvbuf = (char*)zmalloc(link->rcvbuf_alloc = RCVBUF_INIT_LEN); + } + link->rcvbuf_len = 0; } else { return; /* Link no longer valid. */ } @@ -2421,7 +2468,7 @@ void clusterBuildMessageHdr(clusterMsg *hdr, int type) { * first byte is zero, they'll do auto discovery. */ memset(hdr->myip,0,NET_IP_STR_LEN); if (g_pserver->cluster_announce_ip) { - strncpy(hdr->myip,g_pserver->cluster_announce_ip,NET_IP_STR_LEN); + strncpy(hdr->myip,g_pserver->cluster_announce_ip,NET_IP_STR_LEN-1); hdr->myip[NET_IP_STR_LEN-1] = '\0'; } @@ -2498,7 +2545,7 @@ void clusterSetGossipEntry(clusterMsg *hdr, int i, clusterNode *n) { } /* Send a PING or PONG packet to the specified node, making sure to add enough - * gossip informations. */ + * gossip information. */ void clusterSendPing(clusterLink *link, int type) { unsigned char *buf; clusterMsg *hdr; @@ -2518,7 +2565,7 @@ void clusterSendPing(clusterLink *link, int type) { * node_timeout we exchange with each other node at least 4 packets * (we ping in the worst case in node_timeout/2 time, and we also * receive two pings from the host), we have a total of 8 packets - * in the node_timeout*2 falure reports validity time. So we have + * in the node_timeout*2 failure reports validity time. So we have * that, for a single PFAIL node, we can expect to receive the following * number of failure reports (in the specified window of time): * @@ -2545,7 +2592,7 @@ void clusterSendPing(clusterLink *link, int type) { * faster to propagate to go from PFAIL to FAIL state. */ int pfail_wanted = g_pserver->cluster->stats_pfail_nodes; - /* Compute the maxium totlen to allocate our buffer. We'll fix the totlen + /* Compute the maximum totlen to allocate our buffer. We'll fix the totlen * later according to the number of gossip sections we really were able * to put inside the packet. */ totlen = sizeof(clusterMsg)-sizeof(union clusterMsgData); @@ -2582,7 +2629,7 @@ void clusterSendPing(clusterLink *link, int type) { if (thisNode->flags & (CLUSTER_NODE_HANDSHAKE|CLUSTER_NODE_NOADDR) || (thisNode->link == NULL && thisNode->numslots == 0)) { - freshnodes--; /* Tecnically not correct, but saves CPU. */ + freshnodes--; /* Technically not correct, but saves CPU. */ continue; } @@ -3167,7 +3214,7 @@ void clusterHandleSlaveFailover(void) { } } - /* If the previous failover attempt timedout and the retry time has + /* If the previous failover attempt timeout and the retry time has * elapsed, we can setup a new one. */ if (auth_age > auth_retry_time) { g_pserver->cluster->failover_auth_time = mstime() + @@ -3273,7 +3320,7 @@ void clusterHandleSlaveFailover(void) { * * Slave migration is the process that allows a slave of a master that is * already covered by at least another slave, to "migrate" to a master that - * is orpaned, that is, left with no working slaves. + * is orphaned, that is, left with no working slaves. * ------------------------------------------------------------------------- */ /* This function is responsible to decide if this replica should be migrated @@ -3290,7 +3337,7 @@ void clusterHandleSlaveFailover(void) { * the nodes anyway, so we spend time into clusterHandleSlaveMigration() * if definitely needed. * - * The fuction is called with a pre-computed max_slaves, that is the max + * The function is called with a pre-computed max_slaves, that is the max * number of working (not in FAIL state) slaves for a single master. * * Additional conditions for migration are examined inside the function. @@ -3409,7 +3456,7 @@ void clusterHandleSlaveMigration(int max_slaves) { * data loss due to the asynchronous master-slave replication. * -------------------------------------------------------------------------- */ -/* Reset the manual failover state. This works for both masters and slavesa +/* Reset the manual failover state. This works for both masters and slaves * as all the state about manual failover is cleared. * * The function can be used both to initialize the manual failover state at @@ -3495,7 +3542,7 @@ void clusterCron(void) { * duplicating the string. This way later we can check if * the address really changed. */ prev_ip = zstrdup(prev_ip); - strncpy(myself->ip,g_pserver->cluster_announce_ip,NET_IP_STR_LEN); + strncpy(myself->ip,g_pserver->cluster_announce_ip,NET_IP_STR_LEN-1); myself->ip[NET_IP_STR_LEN-1] = '\0'; } else { myself->ip[0] = '\0'; /* Force autodetection. */ @@ -3701,7 +3748,7 @@ void clusterCron(void) { replicationAddMaster(myself->slaveof->ip, myself->slaveof->port); } - /* Abourt a manual failover if the timeout is reached. */ + /* Abort a manual failover if the timeout is reached. */ manualFailoverCheckTimeout(); if (nodeIsSlave(myself)) { @@ -3806,12 +3853,12 @@ int clusterNodeSetSlotBit(clusterNode *n, int slot) { * target for replicas migration, if and only if at least one of * the other masters has slaves right now. * - * Normally masters are valid targerts of replica migration if: + * Normally masters are valid targets of replica migration if: * 1. The used to have slaves (but no longer have). * 2. They are slaves failing over a master that used to have slaves. * * However new masters with slots assigned are considered valid - * migration tagets if the rest of the cluster is not a slave-less. + * migration targets if the rest of the cluster is not a slave-less. * * See https://github.com/antirez/redis/issues/3043 for more info. */ if (n->numslots == 1 && clusterMastersHaveSlaves()) @@ -3995,7 +4042,7 @@ void clusterUpdateState(void) { * A) If no other node is in charge according to the current cluster * configuration, we add these slots to our node. * B) If according to our config other nodes are already in charge for - * this lots, we set the slots as IMPORTING from our point of view + * this slots, we set the slots as IMPORTING from our point of view * in order to justify we have those slots, and in order to make * keydb-trib aware of the issue, so that it can try to fix it. * 2) If we find data in a DB different than DB0 we return C_ERR to @@ -4024,7 +4071,7 @@ int verifyClusterConfigWithData(void) { /* Make sure we only have keys in DB0. */ for (j = 1; j < cserver.dbnum; j++) { - if (dictSize(g_pserver->db[j].pdict)) return C_ERR; + if (dictSize(g_pserver->db[j].dict)) return C_ERR; } /* Check that all the slots we see populated memory have a corresponding @@ -4141,11 +4188,15 @@ sds clusterGenNodeDescription(clusterNode *node) { else ci = sdscatlen(ci," - ",3); + unsigned long long nodeEpoch = node->configEpoch; + if (nodeIsSlave(node) && node->slaveof) { + nodeEpoch = node->slaveof->configEpoch; + } /* Latency from the POV of this node, config epoch, link status */ ci = sdscatprintf(ci,"%lld %lld %llu %s", (long long) node->ping_sent, (long long) node->pong_received, - (unsigned long long) node->configEpoch, + nodeEpoch, (node->link || node->flags & CLUSTER_NODE_MYSELF) ? "connected" : "disconnected"); @@ -4401,7 +4452,7 @@ NULL clusterReplyMultiBulkSlots(c); } else if (!strcasecmp(szFromObj(c->argv[1]),"flushslots") && c->argc == 2) { /* CLUSTER FLUSHSLOTS */ - if (dictSize(g_pserver->db[0].pdict) != 0) { + if (dictSize(g_pserver->db[0].dict) != 0) { addReplyError(c,"DB must be empty to perform CLUSTER FLUSHSLOTS."); return; } @@ -4521,7 +4572,7 @@ NULL } /* If this slot is in migrating status but we have no keys * for it assigning the slot to another node will clear - * the migratig status. */ + * the migrating status. */ if (countKeysInSlot(slot) == 0 && g_pserver->cluster->migrating_slots_to[slot]) g_pserver->cluster->migrating_slots_to[slot] = NULL; @@ -4734,7 +4785,7 @@ NULL * slots nor keys to accept to replicate some other node. * Slaves can switch to another master without issues. */ if (nodeIsMaster(myself) && - (myself->numslots != 0 || dictSize(g_pserver->db[0].pdict) != 0)) { + (myself->numslots != 0 || dictSize(g_pserver->db[0].dict) != 0)) { addReplyError(c, "To set a master the node must be empty and " "without assigned slots."); @@ -4866,7 +4917,7 @@ NULL g_pserver->cluster->currentEpoch = epoch; /* No need to fsync the config here since in the unlucky event * of a failure to persist the config, the conflict resolution code - * will assign an unique config to this node. */ + * will assign a unique config to this node. */ clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE| CLUSTER_TODO_SAVE_CONFIG); addReply(c,shared.ok); @@ -4891,7 +4942,7 @@ NULL /* Slaves can be reset while containing data, but not master nodes * that must be empty. */ - if (nodeIsMaster(myself) && dictSize(c->db->pdict) != 0) { + if (nodeIsMaster(myself) && dictSize(c->db->dict) != 0) { addReplyError(c,"CLUSTER RESET can't be called with " "master nodes containing keys"); return; @@ -4914,7 +4965,7 @@ void createDumpPayload(rio *payload, robj_roptr o, robj *key) { unsigned char buf[2]; uint64_t crc; - /* Serialize the object in a RDB-like format. It consist of an object type + /* Serialize the object in an RDB-like format. It consist of an object type * byte followed by the serialized object. This is understood by RESTORE. */ rioInitWithBuffer(payload,sdsempty()); serverAssert(rdbSaveObjectType(payload,o)); @@ -4983,6 +5034,48 @@ void dumpCommand(client *c) { return; } +/* KEYDB.MVCCRESTORE key mvcc expire serialized-value */ +void mvccrestoreCommand(client *c) { + long long mvcc, expire; + robj *key = c->argv[1], *obj = nullptr; + int type; + + if (getLongLongFromObjectOrReply(c, c->argv[2], &mvcc, "Invalid MVCC Tstamp") != C_OK) + return; + + if (getLongLongFromObjectOrReply(c, c->argv[3], &expire, "Invalid expire") != C_OK) + return; + + /* Verify RDB version and data checksum unles the client is already a replica or master */ + if (!(c->flags & (CLIENT_SLAVE | CLIENT_MASTER))) { + if (verifyDumpPayload((unsigned char*)ptrFromObj(c->argv[4]),sdslen(szFromObj(c->argv[4]))) == C_ERR) + { + addReplyError(c,"DUMP payload version or checksum are wrong"); + return; + } + } + + rio payload; + rioInitWithBuffer(&payload,szFromObj(c->argv[4])); + if (((type = rdbLoadObjectType(&payload)) == -1) || + ((obj = rdbLoadObject(type,&payload,szFromObj(key), OBJ_MVCC_INVALID)) == NULL)) + { + addReplyError(c,"Bad data format"); + return; + } + setMvccTstamp(obj, mvcc); + + /* Create the key and set the TTL if any */ + dbMerge(c->db,key,obj,true); + if (expire >= 0) { + setExpire(c,c->db,key,nullptr,expire); + } + signalModifiedKey(c,c->db,key); + notifyKeyspaceEvent(NOTIFY_GENERIC,"restore",key,c->db->id); + addReply(c,shared.ok); + g_pserver->dirty++; +} + /* RESTORE key ttl serialized-value [REPLACE] */ void restoreCommand(client *c) { long long ttl, lfu_freq = -1, lru_idle = -1, lru_clock = -1; @@ -5025,7 +5118,8 @@ void restoreCommand(client *c) { } /* Make sure this key does not already exist here... */ - if (!replace && lookupKeyWrite(c->db,c->argv[1]) != NULL) { + robj *key = c->argv[1]; + if (!replace && lookupKeyWrite(c->db,key) != NULL) { addReply(c,shared.busykeyerr); return; } @@ -5047,24 +5141,38 @@ void restoreCommand(client *c) { rioInitWithBuffer(&payload,szFromObj(c->argv[3])); if (((type = rdbLoadObjectType(&payload)) == -1) || - ((obj = rdbLoadObject(type,&payload,szFromObj(c->argv[1]), OBJ_MVCC_INVALID)) == NULL)) + ((obj = rdbLoadObject(type,&payload,szFromObj(key), OBJ_MVCC_INVALID)) == NULL)) { addReplyError(c,"Bad data format"); return; } /* Remove the old key if needed. */ - if (replace) dbDelete(c->db,c->argv[1]); + int deleted = 0; + if (replace) + deleted = dbDelete(c->db,key); + + if (ttl && !absttl) ttl+=mstime(); + if (ttl && checkAlreadyExpired(ttl)) { + if (deleted) { + rewriteClientCommandVector(c,2,shared.del,key); + signalModifiedKey(c,c->db,key); + notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id); + g_pserver->dirty++; + } + decrRefCount(obj); + addReply(c, shared.ok); + return; + } /* Create the key and set the TTL if any */ - dbAdd(c->db,c->argv[1],obj); + dbAdd(c->db,key,obj); if (ttl) { - if (!absttl) ttl+=mstime(); - setExpire(c,c->db,c->argv[1],nullptr,ttl); + setExpire(c,c->db,key,nullptr,ttl); } objectSetLRUOrLFU(obj,lfu_freq,lru_idle,lru_clock,1000); - signalModifiedKey(c,c->db,c->argv[1]); - notifyKeyspaceEvent(NOTIFY_GENERIC,"restore",c->argv[1],c->db->id); + signalModifiedKey(c,c->db,key); + notifyKeyspaceEvent(NOTIFY_GENERIC,"restore",key,c->db->id); addReply(c,shared.ok); g_pserver->dirty++; } @@ -5572,7 +5680,7 @@ void readwriteCommand(client *c) { * resharding in progress). * * On success the function returns the node that is able to serve the request. - * If the node is not 'myself' a redirection must be perfomed. The kind of + * If the node is not 'myself' a redirection must be performed. The kind of * redirection is specified setting the integer passed by reference * 'error_code', which will be set to CLUSTER_REDIR_ASK or * CLUSTER_REDIR_MOVED. @@ -5650,7 +5758,10 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in margc = ms->commands[i].argc; margv = ms->commands[i].argv; - keyindex = getKeysFromCommand(mcmd,margv,margc,&numkeys); + getKeysResult result = GETKEYS_RESULT_INIT; + numkeys = getKeysFromCommand(mcmd,margv,margc,&result); + keyindex = result.keys; + for (j = 0; j < numkeys; j++) { robj *thiskey = margv[keyindex[j]]; int thisslot = keyHashSlot((char*)ptrFromObj(thiskey), @@ -5668,7 +5779,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in * not trapped earlier in processCommand(). Report the same * error to the client. */ if (n == NULL) { - getKeysFreeResult(keyindex); + getKeysFreeResult(&result); if (error_code) *error_code = CLUSTER_REDIR_DOWN_UNBOUND; return NULL; @@ -5692,7 +5803,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in if (!equalStringObjects(firstkey,thiskey)) { if (slot != thisslot) { /* Error: multiple keys from different slots. */ - getKeysFreeResult(keyindex); + getKeysFreeResult(&result); if (error_code) *error_code = CLUSTER_REDIR_CROSS_SLOT; return NULL; @@ -5704,14 +5815,14 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in } } - /* Migarting / Improrting slot? Count keys we don't have. */ + /* Migrating / Importing slot? Count keys we don't have. */ if ((migrating_slot || importing_slot) && lookupKeyRead(&g_pserver->db[0],thiskey) == nullptr) { missing_keys++; } } - getKeysFreeResult(keyindex); + getKeysFreeResult(&result); } /* No key at all in command? then we can serve the request @@ -5773,10 +5884,12 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in } /* Handle the read-only client case reading from a slave: if this - * node is a slave and the request is about an hash slot our master + * node is a slave and the request is about a hash slot our master * is serving, we can reply without redirection. */ + int is_readonly_command = (c->cmd->flags & CMD_READONLY) || + (c->cmd->proc == execCommand && !(c->mstate.cmd_inv_flags & CMD_READONLY)); if (c->flags & CLIENT_READONLY && - (cmd->flags & CMD_READONLY || cmd->proc == evalCommand || + (is_readonly_command || cmd->proc == evalCommand || cmd->proc == evalShaCommand) && nodeIsSlave(myself) && myself->slaveof == n) @@ -5785,7 +5898,7 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in } /* Base case: just return the right node. However if this node is not - * myself, set error_code to MOVED since we need to issue a rediretion. */ + * myself, set error_code to MOVED since we need to issue a redirection. */ if (n != myself && error_code) *error_code = CLUSTER_REDIR_MOVED; return n; } @@ -5831,7 +5944,7 @@ void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_co * 3) The client may remain blocked forever (or up to the max timeout time) * waiting for a key change that will never happen. * - * If the client is found to be blocked into an hash slot this node no + * If the client is found to be blocked into a hash slot this node no * longer handles, the client is sent a redirection error, and the function * returns 1. Otherwise 0 is returned and no operation is performed. */ int clusterRedirectBlockedClientIfNeeded(client *c) { @@ -5860,6 +5973,15 @@ int clusterRedirectBlockedClientIfNeeded(client *c) { int slot = keyHashSlot((char*)ptrFromObj(key), sdslen(szFromObj(key))); clusterNode *node = g_pserver->cluster->slots[slot]; + /* if the client is read-only and attempting to access key that our + * replica can handle, allow it. */ + if ((c->flags & CLIENT_READONLY) && + (c->lastcmd->flags & CMD_READONLY) && + nodeIsSlave(myself) && myself->slaveof == node) + { + node = myself; + } + /* We send an error and unblock the client if: * 1) The slot is unassigned, emitting a cluster down error. * 2) The slot is not handled by this node, nor being imported. */ diff --git a/src/cluster.h b/src/cluster.h index c6d714cf3..6dfe318e2 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -42,7 +42,9 @@ typedef struct clusterLink { mstime_t ctime; /* Link creation time */ connection *conn; /* Connection to remote node */ sds sndbuf; /* Packet send buffer */ - sds rcvbuf; /* Packet reception buffer */ + char *rcvbuf; /* Packet reception buffer */ + size_t rcvbuf_len; /* Used size of rcvbuf */ + size_t rcvbuf_alloc; /* Used size of rcvbuf */ struct clusterNode *node; /* Node related to this link if any, or NULL */ } clusterLink; @@ -55,8 +57,8 @@ typedef struct clusterLink { #define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */ #define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */ #define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */ -#define CLUSTER_NODE_MIGRATE_TO 256 /* Master elegible for replica migration. */ -#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failver. */ +#define CLUSTER_NODE_MIGRATE_TO 256 /* Master eligible for replica migration. */ +#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failover. */ #define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" #define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER) @@ -168,10 +170,10 @@ typedef struct clusterState { clusterNode *mf_slave; /* Slave performing the manual failover. */ /* Manual failover state of slave. */ long long mf_master_offset; /* Master offset the slave needs to start MF - or zero if stil not received. */ + or zero if still not received. */ int mf_can_start; /* If non-zero signal that the manual failover can start requesting masters vote. */ - /* The followign fields are used by masters to take state on elections. */ + /* The following fields are used by masters to take state on elections. */ uint64_t lastVoteEpoch; /* Epoch of the last vote granted. */ int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */ /* Messages received and sent by type. */ @@ -287,6 +289,7 @@ typedef struct { clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc, int *hashslot, int *ask); int clusterRedirectBlockedClientIfNeeded(client *c); void clusterRedirectClient(client *c, clusterNode *n, int hashslot, int error_code); +unsigned long getClusterConnectionsCount(void); #ifdef __cplusplus } diff --git a/src/config.cpp b/src/config.cpp index ea8d7a507..1237c1d43 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -100,6 +100,21 @@ configEnum repl_diskless_load_enum[] = { {NULL, 0} }; +configEnum tls_auth_clients_enum[] = { + {"no", TLS_CLIENT_AUTH_NO}, + {"yes", TLS_CLIENT_AUTH_YES}, + {"optional", TLS_CLIENT_AUTH_OPTIONAL}, + {NULL, 0} +}; + +configEnum oom_score_adj_enum[] = { + {"no", OOM_SCORE_ADJ_NO}, + {"yes", OOM_SCORE_RELATIVE}, + {"relative", OOM_SCORE_RELATIVE}, + {"absolute", OOM_SCORE_ADJ_ABSOLUTE}, + {NULL, 0} +}; + /* Output buffer limits presets. */ clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = { {0, 0, 0}, /* normal */ @@ -107,6 +122,9 @@ clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = { {1024*1024*32, 1024*1024*8, 60} /* pubsub */ }; +/* OOM Score defaults */ +int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT] = { 0, 200, 800 }; + /* Generic config infrastructure function pointers * int is_valid_fn(val, err) * Return 1 when val is valid, and 0 when invalid. @@ -289,6 +307,63 @@ void queueLoadModule(sds path, sds *argv, int argc) { listAddNodeTail(g_pserver->loadmodule_queue,loadmod); } +/* Parse an array of CONFIG_OOM_COUNT sds strings, validate and populate + * g_pserver->oom_score_adj_values if valid. + */ + +static int updateOOMScoreAdjValues(sds *args, const char **err, int apply) { + int i; + int values[CONFIG_OOM_COUNT]; + + for (i = 0; i < CONFIG_OOM_COUNT; i++) { + char *eptr; + long long val = strtoll(args[i], &eptr, 10); + + if (*eptr != '\0' || val < -2000 || val > 2000) { + if (err) *err = "Invalid oom-score-adj-values, elements must be between -2000 and 2000."; + return C_ERR; + } + + values[i] = val; + } + + /* Verify that the values make sense. If they don't omit a warning but + * keep the configuration, which may still be valid for privileged processes. + */ + + if (values[CONFIG_OOM_REPLICA] < values[CONFIG_OOM_MASTER] || + values[CONFIG_OOM_BGCHILD] < values[CONFIG_OOM_REPLICA]) { + serverLog(LOG_WARNING, + "The oom-score-adj-values configuration may not work for non-privileged processes! " + "Please consult the documentation."); + } + + /* Store values, retain previous config for rollback in case we fail. */ + int old_values[CONFIG_OOM_COUNT]; + for (i = 0; i < CONFIG_OOM_COUNT; i++) { + old_values[i] = g_pserver->oom_score_adj_values[i]; + g_pserver->oom_score_adj_values[i] = values[i]; + } + + /* When parsing the config file, we want to apply only when all is done. */ + if (!apply) + return C_OK; + + /* Update */ + if (setOOMScoreAdj(-1) == C_ERR) { + /* Roll back */ + for (i = 0; i < CONFIG_OOM_COUNT; i++) + g_pserver->oom_score_adj_values[i] = old_values[i]; + + if (err) + *err = "Failed to apply oom-score-adj-values configuration, check server logs."; + + return C_ERR; + } + + return C_OK; +} + void initConfigValues() { for (standardConfig *config = configs; config->name != NULL; config++) { config->interface.init(config->data); @@ -411,7 +486,30 @@ void loadServerConfigFromString(char *config) { } else if ((!strcasecmp(argv[0],"slaveof") || !strcasecmp(argv[0],"replicaof")) && argc == 3) { slaveof_linenum = linenum; - replicationAddMaster(argv[1], atoi(argv[2])); + if (!strcasecmp(argv[1], "no") && !strcasecmp(argv[2], "one")) { + if (listLength(g_pserver->masters)) { + listIter li; + listNode *ln; + listRewind(g_pserver->masters, &li); + while ((ln = listNext(&li))) + { + struct redisMaster *mi = (struct redisMaster*)listNodeValue(ln); + zfree(mi->masterauth); + zfree(mi->masteruser); + zfree(mi->repl_transfer_tmpfile); + delete mi->staleKeyMap; + zfree(mi); + listDelNode(g_pserver->masters, ln); + } + } + continue; + } + char *ptr; + int port = strtol(argv[2], &ptr, 10); + if (port < 0 || port > 65535 || *ptr != '\0') { + err= "Invalid master port"; goto loaderr; + } + replicationAddMaster(argv[1], port); } else if (!strcasecmp(argv[0],"requirepass") && argc == 2) { if (strlen(argv[1]) > CONFIG_AUTHPASS_MAX_LEN) { err = "Password is longer than CONFIG_AUTHPASS_MAX_LEN"; @@ -422,11 +520,16 @@ void loadServerConfigFromString(char *config) { * additionally is to remember the cleartext password in this * case, for backward compatibility with Redis <= 5. */ ACLSetUser(DefaultUser,"resetpass",-1); - sds aclop = sdscatprintf(sdsempty(),">%s",argv[1]); - ACLSetUser(DefaultUser,aclop,sdslen(aclop)); - sdsfree(aclop); sdsfree(g_pserver->requirepass); - g_pserver->requirepass = sdsnew(argv[1]); + g_pserver->requirepass = NULL; + if (sdslen(argv[1])) { + sds aclop = sdscatprintf(sdsempty(),">%s",argv[1]); + ACLSetUser(DefaultUser,aclop,sdslen(aclop)); + sdsfree(aclop); + g_pserver->requirepass = sdsnew(argv[1]); + } else { + ACLSetUser(DefaultUser,"nopass",-1); + } } else if (!strcasecmp(argv[0],"list-max-ziplist-entries") && argc == 2){ /* DEAD OPTION */ } else if (!strcasecmp(argv[0],"list-max-ziplist-value") && argc == 2) { @@ -480,6 +583,8 @@ void loadServerConfigFromString(char *config) { cserver.client_obuf_limits[type].hard_limit_bytes = hard; cserver.client_obuf_limits[type].soft_limit_bytes = soft; cserver.client_obuf_limits[type].soft_limit_seconds = soft_seconds; + } else if (!strcasecmp(argv[0],"oom-score-adj-values") && argc == 1 + CONFIG_OOM_COUNT) { + if (updateOOMScoreAdjValues(&argv[1], &err, 0) == C_ERR) goto loaderr; } else if (!strcasecmp(argv[0],"notify-keyspace-events") && argc == 2) { int flags = keyspaceEventsStringToFlags(argv[1]); @@ -541,7 +646,7 @@ void loadServerConfigFromString(char *config) { } } else if (!strcasecmp(argv[0], "active-replica") && argc == 2) { g_pserver->fActiveReplica = yesnotoi(argv[1]); - if (g_pserver->repl_slave_ro) { + if (g_pserver->fActiveReplica && g_pserver->repl_slave_ro) { g_pserver->repl_slave_ro = FALSE; serverLog(LL_NOTICE, "Notice: \"active-replica yes\" implies \"replica-read-only no\""); } @@ -577,7 +682,7 @@ void loadServerConfigFromString(char *config) { return; loaderr: - fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR (Redis %s) ***\n", + fprintf(stderr, "\n*** FATAL CONFIG FILE ERROR (KeyDB %s) ***\n", KEYDB_REAL_VERSION); fprintf(stderr, "Reading the configuration file, at line %d\n", linenum); fprintf(stderr, ">>> '%s'\n", lines[i]); @@ -605,7 +710,8 @@ void loadServerConfig(char *filename, char *options) { } else { if ((fp = fopen(filename,"r")) == NULL) { serverLog(LL_WARNING, - "Fatal error, can't open config file '%s'", filename); + "Fatal error, can't open config file '%s': %s", + filename, strerror(errno)); exit(1); } } @@ -686,11 +792,16 @@ void configSetCommand(client *c) { * additionally is to remember the cleartext password in this * case, for backward compatibility with Redis <= 5. */ ACLSetUser(DefaultUser,"resetpass",-1); - sds aclop = sdscatprintf(sdsempty(),">%s",(char*)ptrFromObj(o)); - ACLSetUser(DefaultUser,aclop,sdslen(aclop)); - sdsfree(aclop); sdsfree(g_pserver->requirepass); - g_pserver->requirepass = sdsnew(szFromObj(o)); + g_pserver->requirepass = NULL; + if (sdslen(szFromObj(o))) { + sds aclop = sdscatprintf(sdsempty(),">%s",(char*)ptrFromObj(o)); + ACLSetUser(DefaultUser,aclop,sdslen(aclop)); + sdsfree(aclop); + g_pserver->requirepass = sdsnew(szFromObj(o)); + } else { + ACLSetUser(DefaultUser,"nopass",-1); + } } config_set_special_field("save") { int vlen, j; sds *v = sdssplitlen(szFromObj(o),sdslen(szFromObj(o))," ",1,&vlen); @@ -775,6 +886,17 @@ void configSetCommand(client *c) { cserver.client_obuf_limits[type].soft_limit_seconds = soft_seconds; } sdsfreesplitres(v,vlen); + } config_set_special_field("oom-score-adj-values") { + int vlen; + int success = 1; + + sds *v = sdssplitlen(szFromObj(o), sdslen(szFromObj(o)), " ", 1, &vlen); + if (vlen != CONFIG_OOM_COUNT || updateOOMScoreAdjValues(v, &errstr, 1) == C_ERR) + success = 0; + + sdsfreesplitres(v, vlen); + if (!success) + goto badfmt; } config_set_special_field("notify-keyspace-events") { int flags = keyspaceEventsStringToFlags(szFromObj(o)); @@ -987,6 +1109,26 @@ void configGetCommand(client *c) { } matches++; } + if (stringmatch(pattern,"oom-score-adj-values",0)) { + sds buf = sdsempty(); + int j; + + for (j = 0; j < CONFIG_OOM_COUNT; j++) { + buf = sdscatprintf(buf,"%d", g_pserver->oom_score_adj_values[j]); + if (j != CONFIG_OOM_COUNT-1) + buf = sdscatlen(buf," ",1); + } + + addReplyBulkCString(c,"oom-score-adj-values"); + addReplyBulkCString(c,buf); + sdsfree(buf); + matches++; + } + if (stringmatch(pattern,"active-replica",1)) { + addReplyBulkCString(c,"active-replica"); + addReplyBulkCString(c, g_pserver->fActiveReplica ? "yes" : "no"); + matches++; + } setDeferredMapLen(c,replylen,matches); } @@ -1035,6 +1177,8 @@ struct rewriteConfigState { sds *lines; /* Current lines as an array of sds strings */ int has_tail; /* True if we already added directives that were not present in the original config file. */ + int force_all; /* True if we want all keywords to be force + written. Currently only used for testing. */ }; /* Append the new line to the current configuration state. */ @@ -1082,6 +1226,7 @@ struct rewriteConfigState *rewriteConfigReadOldFile(char *path) { state->numlines = 0; state->lines = NULL; state->has_tail = 0; + state->force_all = 0; if (fp == NULL) return state; /* Read the old file line by line, populate the state. */ @@ -1160,7 +1305,7 @@ void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *opti rewriteConfigMarkAsProcessed(state,option); - if (!l && !force) { + if (!l && !force && !state->force_all) { /* Option not used previously, and we are not forced to use it. */ sdsfree(line); sdsfree(o); @@ -1257,7 +1402,7 @@ void rewriteConfigNumericalOption(struct rewriteConfigState *state, const char * rewriteConfigRewriteLine(state,option,line,force); } -/* Rewrite a octal option. */ +/* Rewrite an octal option. */ void rewriteConfigOctalOption(struct rewriteConfigState *state, const char *option, int value, int defvalue) { int force = value != defvalue; sds line = sdscatprintf(sdsempty(),"%s %o",option,value); @@ -1282,6 +1427,12 @@ void rewriteConfigSaveOption(struct rewriteConfigState *state) { int j; sds line; + /* In Sentinel mode we don't need to rewrite the save parameters */ + if (g_pserver->sentinel_mode) { + rewriteConfigMarkAsProcessed(state,"save"); + return; + } + /* Note that if there are no save parameters at all, all the current * config line with "save" will be detected as orphaned and deleted, * resulting into no RDB persistence as expected. */ @@ -1404,6 +1555,26 @@ void rewriteConfigClientoutputbufferlimitOption(struct rewriteConfigState *state } } +/* Rewrite the oom-score-adj-values option. */ +void rewriteConfigOOMScoreAdjValuesOption(struct rewriteConfigState *state) { + int force = 0; + int j; + const char *option = "oom-score-adj-values"; + sds line; + + line = sdsnew(option); + line = sdscatlen(line, " ", 1); + for (j = 0; j < CONFIG_OOM_COUNT; j++) { + if (g_pserver->oom_score_adj_values[j] != configOOMScoreAdjValuesDefaults[j]) + force = 1; + + line = sdscatprintf(line, "%d", g_pserver->oom_score_adj_values[j]); + if (j+1 != CONFIG_OOM_COUNT) + line = sdscatlen(line, " ", 1); + } + rewriteConfigRewriteLine(state,option,line,force); +} + /* Rewrite the bind option. */ void rewriteConfigBindOption(struct rewriteConfigState *state) { int force = 1; @@ -1509,60 +1680,62 @@ void rewriteConfigRemoveOrphaned(struct rewriteConfigState *state) { dictReleaseIterator(di); } -/* This function overwrites the old configuration file with the new content. - * - * 1) The old file length is obtained. - * 2) If the new content is smaller, padding is added. - * 3) A single write(2) call is used to replace the content of the file. - * 4) Later the file is truncated to the length of the new content. - * - * This way we are sure the file is left in a consistent state even if the - * process is stopped between any of the four operations. +/* This function replaces the old configuration file with the new content + * in an atomic manner. * * The function returns 0 on success, otherwise -1 is returned and errno - * set accordingly. */ + * is set accordingly. */ int rewriteConfigOverwriteFile(char *configfile, sds content) { - int retval = 0; - int fd = open(configfile,O_RDWR|O_CREAT,0644); - int content_size = sdslen(content), padding = 0; - struct stat sb; - sds content_padded; + int fd = -1; + int retval = -1; + char tmp_conffile[PATH_MAX]; + const char *tmp_suffix = ".XXXXXX"; + size_t offset = 0; + ssize_t written_bytes = 0; - /* 1) Open the old file (or create a new one if it does not - * exist), get the size. */ - if (fd == -1) return -1; /* errno set by open(). */ - if (fstat(fd,&sb) == -1) { - close(fd); - return -1; /* errno set by fstat(). */ + int tmp_path_len = snprintf(tmp_conffile, sizeof(tmp_conffile), "%s%s", configfile, tmp_suffix); + if (tmp_path_len <= 0 || (unsigned int)tmp_path_len >= sizeof(tmp_conffile)) { + serverLog(LL_WARNING, "Config file full path is too long"); + errno = ENAMETOOLONG; + return retval; } - /* 2) Pad the content at least match the old file size. */ - content_padded = sdsdup(content); - if (content_size < sb.st_size) { - /* If the old file was bigger, pad the content with - * a newline plus as many "#" chars as required. */ - padding = sb.st_size - content_size; - content_padded = sdsgrowzero(content_padded,sb.st_size); - content_padded[content_size] = '\n'; - memset(content_padded+content_size+1,'#',padding-1); +#ifdef _GNU_SOURCE + fd = mkostemp(tmp_conffile, O_CLOEXEC); +#else + /* There's a theoretical chance here to leak the FD if a module thread forks & execv in the middle */ + fd = mkstemp(tmp_conffile); +#endif + + if (fd == -1) { + serverLog(LL_WARNING, "Could not create tmp config file (%s)", strerror(errno)); + return retval; } - /* 3) Write the new content using a single write(2). */ - if (write(fd,content_padded,strlen(content_padded)) == -1) { - retval = -1; - goto cleanup; + while (offset < sdslen(content)) { + written_bytes = write(fd, content + offset, sdslen(content) - offset); + if (written_bytes <= 0) { + if (errno == EINTR) continue; /* FD is blocking, no other retryable errors */ + serverLog(LL_WARNING, "Failed after writing (%zd) bytes to tmp config file (%s)", offset, strerror(errno)); + goto cleanup; + } + offset+=written_bytes; } - /* 4) Truncate the file to the right length if we used padding. */ - if (padding) { - if (ftruncate(fd,content_size) == -1) { - /* Non critical error... */ - } + if (fsync(fd)) + serverLog(LL_WARNING, "Could not sync tmp config file to disk (%s)", strerror(errno)); + else if (fchmod(fd, 0644) == -1) + serverLog(LL_WARNING, "Could not chmod config file (%s)", strerror(errno)); + else if (rename(tmp_conffile, configfile) == -1) + serverLog(LL_WARNING, "Could not rename tmp config file (%s)", strerror(errno)); + else { + retval = 0; + serverLog(LL_DEBUG, "Rewritten config file (%s) successfully", configfile); } cleanup: - sdsfree(content_padded); close(fd); + if (retval) unlink(tmp_conffile); return retval; } @@ -1572,15 +1745,18 @@ cleanup: * * Configuration parameters that are at their default value, unless already * explicitly included in the old configuration file, are not rewritten. + * The force_all flag overrides this behavior and forces everything to be + * written. This is currently only used for testing purposes. * * On error -1 is returned and errno is set accordingly, otherwise 0. */ -int rewriteConfig(char *path) { +int rewriteConfig(char *path, int force_all) { struct rewriteConfigState *state; sds newcontent; int retval; /* Step 1: read the old config into our rewrite state. */ if ((state = rewriteConfigReadOldFile(path)) == NULL) return -1; + if (force_all) state->force_all = 1; /* Step 2: rewrite every single option, replacing or appending it inside * the rewrite state. */ @@ -1604,6 +1780,7 @@ int rewriteConfig(char *path) { rewriteConfigClientoutputbufferlimitOption(state); rewriteConfigYesNoOption(state,"active-replica",g_pserver->fActiveReplica,CONFIG_DEFAULT_ACTIVE_REPLICA); rewriteConfigStringOption(state, "version-override",KEYDB_SET_VERSION,KEYDB_REAL_VERSION); + rewriteConfigOOMScoreAdjValuesOption(state); /* Rewrite Sentinel config if in Sentinel mode. */ if (g_pserver->sentinel_mode) rewriteConfigSentinelOption(state); @@ -1767,7 +1944,7 @@ static int enumConfigSet(typeData data, sds value, int update, const char **err) } sdsrange(enumerr,0,-3); /* Remove final ", ". */ - strncpy(loadbuf, enumerr, LOADBUF_SIZE); + strncpy(loadbuf, enumerr, LOADBUF_SIZE-1); loadbuf[LOADBUF_SIZE - 1] = '\0'; sdsfree(enumerr); @@ -2072,7 +2249,7 @@ static int isValidAOFfilename(char *val, const char **err) { static int updateHZ(long long val, long long prev, const char **err) { UNUSED(prev); UNUSED(err); - /* Hz is more an hint from the user, so we accept values out of range + /* Hz is more a hint from the user, so we accept values out of range * but cap them to reasonable values. */ g_pserver->config_hz = val; if (g_pserver->config_hz < CONFIG_MIN_HZ) g_pserver->config_hz = CONFIG_MIN_HZ; @@ -2090,7 +2267,7 @@ static int updateJemallocBgThread(int val, int prev, const char **err) { static int updateReplBacklogSize(long long val, long long prev, const char **err) { /* resizeReplicationBacklog sets g_pserver->repl_backlog_size, and relies on - * being able to tell when the size changes, so restore prev becore calling it. */ + * being able to tell when the size changes, so restore prev before calling it. */ UNUSED(err); g_pserver->repl_backlog_size = prev; resizeReplicationBacklog(val); @@ -2150,17 +2327,42 @@ static int updateMaxclients(long long val, long long prev, const char **err) { } return 0; } - for (int iel = 0; iel < MAX_EVENT_LOOPS; ++iel) + /* Change the SetSize for the current thread first. If any error, return the error message to the client, + * otherwise, continue to do the same for other threads */ + if ((unsigned int) aeGetSetSize(aeGetCurrentEventLoop()) < + g_pserver->maxclients + CONFIG_FDSET_INCR) { + if (aeResizeSetSize(aeGetCurrentEventLoop(), + g_pserver->maxclients + CONFIG_FDSET_INCR) == AE_ERR) + { + *err = "The event loop API used by Redis is not able to handle the specified number of clients"; + return 0; + } + serverLog(LL_DEBUG,"Successfully changed the setsize for current thread %d", ielFromEventLoop(aeGetCurrentEventLoop())); + } + + for (int iel = 0; iel < cserver.cthreads; ++iel) + { + if (g_pserver->rgthreadvar[iel].el == aeGetCurrentEventLoop()){ + continue; + } + if ((unsigned int) aeGetSetSize(g_pserver->rgthreadvar[iel].el) < g_pserver->maxclients + CONFIG_FDSET_INCR) { - if (aeResizeSetSize(g_pserver->rgthreadvar[iel].el, - g_pserver->maxclients + CONFIG_FDSET_INCR) == AE_ERR) - { - *err = "The event loop API used by Redis is not able to handle the specified number of clients"; + int res = aePostFunction(g_pserver->rgthreadvar[iel].el, [iel] { + if (aeResizeSetSize(g_pserver->rgthreadvar[iel].el, g_pserver->maxclients + CONFIG_FDSET_INCR) == AE_ERR) { + serverLog(LL_WARNING,"Failed to change the setsize for Thread %d", iel); + } + }); + + if (res != AE_OK){ + static char msg[128]; + sprintf(msg, "Failed to post the request to change setsize for Thread %d", iel); + *err = msg; return 0; } + serverLog(LL_DEBUG,"Successfully post the request to change the setsize for thread %d", iel); } } } @@ -2174,12 +2376,28 @@ static int validateMultiMasterNoForward(int val, const char **) { return 1; } +static int updateOOMScoreAdj(int val, int prev, const char **err) { + UNUSED(prev); + + if (val) { + if (setOOMScoreAdj(-1) == C_ERR) { + *err = "Failed to set current oom_score_adj. Check server logs."; + return 0; + } + } + + return 1; +} + #ifdef USE_OPENSSL static int updateTlsCfg(char *val, char *prev, const char **err) { UNUSED(val); UNUSED(prev); UNUSED(err); - if (tlsConfigure(&g_pserver->tls_ctx_config) == C_ERR) { + + /* If TLS is enabled, try to configure OpenSSL. */ + if ((g_pserver->tls_port || g_pserver->tls_replication || g_pserver->tls_cluster) + && tlsConfigure(&g_pserver->tls_ctx_config) == C_ERR) { *err = "Unable to update TLS configuration. Check server logs."; return 0; } @@ -2205,6 +2423,7 @@ standardConfig configs[] = { createBoolConfig("daemonize", NULL, IMMUTABLE_CONFIG, cserver.daemonize, 0, NULL, NULL), createBoolConfig("lua-replicate-commands", NULL, MODIFIABLE_CONFIG, g_pserver->lua_always_replicate_commands, 1, NULL, NULL), createBoolConfig("always-show-logo", NULL, IMMUTABLE_CONFIG, g_pserver->always_show_logo, 0, NULL, NULL), + createBoolConfig("enable-motd", NULL, IMMUTABLE_CONFIG, cserver.enable_motd, 1, NULL, NULL), createBoolConfig("protected-mode", NULL, MODIFIABLE_CONFIG, g_pserver->protected_mode, 1, NULL, NULL), createBoolConfig("rdbcompression", NULL, MODIFIABLE_CONFIG, g_pserver->rdb_compression, 1, NULL, NULL), createBoolConfig("rdb-del-sync-files", NULL, MODIFIABLE_CONFIG, g_pserver->rdb_del_sync_files, 0, NULL, NULL), @@ -2254,6 +2473,7 @@ standardConfig configs[] = { createStringConfig("bio_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->bio_cpulist, NULL, NULL, NULL), createStringConfig("aof_rewrite_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->aof_rewrite_cpulist, NULL, NULL, NULL), createStringConfig("bgsave_cpulist", NULL, IMMUTABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->bgsave_cpulist, NULL, NULL, NULL), + createStringConfig("ignore-warnings", NULL, MODIFIABLE_CONFIG, ALLOW_EMPTY_STRING, g_pserver->ignore_warnings, "ARM64-COW-BUG", NULL, NULL), /* Enum Configs */ createEnumConfig("supervised", NULL, IMMUTABLE_CONFIG, supervised_mode_enum, cserver.supervised_mode, SUPERVISED_NONE, NULL, NULL), @@ -2262,6 +2482,7 @@ standardConfig configs[] = { createEnumConfig("loglevel", NULL, MODIFIABLE_CONFIG, loglevel_enum, cserver.verbosity, LL_NOTICE, NULL, NULL), createEnumConfig("maxmemory-policy", NULL, MODIFIABLE_CONFIG, maxmemory_policy_enum, g_pserver->maxmemory_policy, MAXMEMORY_NO_EVICTION, NULL, NULL), createEnumConfig("appendfsync", NULL, MODIFIABLE_CONFIG, aof_fsync_enum, g_pserver->aof_fsync, AOF_FSYNC_EVERYSEC, NULL, NULL), + createEnumConfig("oom-score-adj", NULL, MODIFIABLE_CONFIG, oom_score_adj_enum, g_pserver->oom_score_adj, OOM_SCORE_ADJ_NO, NULL, updateOOMScoreAdj), /* Integer configs */ createIntConfig("databases", NULL, IMMUTABLE_CONFIG, 1, INT_MAX, cserver.dbnum, 16, INTEGER_CONFIG, NULL, NULL), @@ -2298,8 +2519,10 @@ standardConfig configs[] = { createIntConfig("replica-quorum", NULL, MODIFIABLE_CONFIG, -1, INT_MAX, g_pserver->repl_quorum, -1, INTEGER_CONFIG, NULL, NULL), /* Unsigned int configs */ createUIntConfig("maxclients", NULL, MODIFIABLE_CONFIG, 1, UINT_MAX, g_pserver->maxclients, 10000, INTEGER_CONFIG, NULL, updateMaxclients), + createUIntConfig("loading-process-events-interval-keys", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->loading_process_events_interval_keys, 8192, MEMORY_CONFIG, NULL, NULL), /* Unsigned Long configs */ + createULongConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->loading_process_events_interval_bytes, 2*1024*1024, MEMORY_CONFIG, NULL, NULL), createULongConfig("active-defrag-max-scan-fields", NULL, MODIFIABLE_CONFIG, 1, LONG_MAX, cserver.active_defrag_max_scan_fields, 1000, INTEGER_CONFIG, NULL, NULL), /* Default: keys with more than 1000 fields will be processed separately */ createULongConfig("slowlog-max-len", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->slowlog_max_len, 128, INTEGER_CONFIG, NULL, NULL), createULongConfig("acllog-max-len", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->acllog_max_len, 128, INTEGER_CONFIG, NULL, NULL), @@ -2309,7 +2532,7 @@ standardConfig configs[] = { createLongLongConfig("cluster-node-timeout", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->cluster_node_timeout, 15000, INTEGER_CONFIG, NULL, NULL), createLongLongConfig("slowlog-log-slower-than", NULL, MODIFIABLE_CONFIG, -1, LLONG_MAX, g_pserver->slowlog_log_slower_than, 10000, INTEGER_CONFIG, NULL, NULL), createLongLongConfig("latency-monitor-threshold", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->latency_monitor_threshold, 0, INTEGER_CONFIG, NULL, NULL), - createLongLongConfig("proto-max-bulk-len", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->proto_max_bulk_len, 512ll*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Bulk request max size */ + createLongLongConfig("proto-max-bulk-len", NULL, MODIFIABLE_CONFIG, 1024*1024, LLONG_MAX, g_pserver->proto_max_bulk_len, 512ll*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Bulk request max size */ createLongLongConfig("stream-node-max-entries", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->stream_node_max_entries, 100, INTEGER_CONFIG, NULL, NULL), createLongLongConfig("repl-backlog-size", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, g_pserver->repl_backlog_size, 1024*1024, MEMORY_CONFIG, NULL, updateReplBacklogSize), /* Default: 1mb */ @@ -2337,7 +2560,7 @@ standardConfig configs[] = { createIntConfig("tls-session-cache-timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->tls_ctx_config.session_cache_timeout, 300, INTEGER_CONFIG, NULL, updateTlsCfgInt), createBoolConfig("tls-cluster", NULL, MODIFIABLE_CONFIG, g_pserver->tls_cluster, 0, NULL, NULL), createBoolConfig("tls-replication", NULL, MODIFIABLE_CONFIG, g_pserver->tls_replication, 0, NULL, NULL), - createBoolConfig("tls-auth-clients", NULL, MODIFIABLE_CONFIG, g_pserver->tls_auth_clients, 1, NULL, NULL), + createEnumConfig("tls-auth-clients", NULL, MODIFIABLE_CONFIG, tls_auth_clients_enum, g_pserver->tls_auth_clients, TLS_CLIENT_AUTH_YES, NULL, NULL), createBoolConfig("tls-prefer-server-ciphers", NULL, MODIFIABLE_CONFIG, g_pserver->tls_ctx_config.prefer_server_ciphers, 0, NULL, updateTlsCfgBool), createBoolConfig("tls-session-caching", NULL, MODIFIABLE_CONFIG, g_pserver->tls_ctx_config.session_caching, 1, NULL, updateTlsCfgBool), createStringConfig("tls-cert-file", NULL, MODIFIABLE_CONFIG, EMPTY_STRING_IS_NULL, g_pserver->tls_ctx_config.cert_file, NULL, NULL, updateTlsCfg), @@ -2387,7 +2610,7 @@ NULL addReplyError(c,"The server is running without a config file"); return; } - if (rewriteConfig(cserver.configfile) == -1) { + if (rewriteConfig(cserver.configfile, 0) == -1) { serverLog(LL_WARNING,"CONFIG REWRITE failed: %s", strerror(errno)); addReplyErrorFormat(c,"Rewriting config file: %s", strerror(errno)); } else { diff --git a/src/config.h b/src/config.h index 022cb0033..51fe6e253 100644 --- a/src/config.h +++ b/src/config.h @@ -54,6 +54,7 @@ #define HAVE_PROC_MAPS 1 #define HAVE_PROC_SMAPS 1 #define HAVE_PROC_SOMAXCONN 1 +#define HAVE_PROC_OOM_SCORE_ADJ 1 #endif /* Test for task_info() */ @@ -63,7 +64,7 @@ /* Test for backtrace() */ #if defined(__APPLE__) || (defined(__linux__) && defined(__GLIBC__)) || \ - defined(__FreeBSD__) || (defined(__OpenBSD__) && defined(USE_BACKTRACE))\ + defined(__FreeBSD__) || ((defined(__OpenBSD__) || defined(__NetBSD__)) && defined(USE_BACKTRACE))\ || defined(__DragonFly__) #define HAVE_BACKTRACE 1 #endif @@ -123,6 +124,10 @@ #define USE_SETPROCTITLE #endif +#if defined(__HAIKU__) +#define ESOCKTNOSUPPORT 0 +#endif + #if ((defined __linux && defined(__GLIBC__)) || defined __APPLE__) #define USE_SETPROCTITLE #define INIT_SETPROCTITLE_REPLACEMENT @@ -171,7 +176,7 @@ void setproctitle(const char *fmt, ...); #endif /* BYTE_ORDER */ /* Sometimes after including an OS-specific header that defines the - * endianess we end with __BYTE_ORDER but not with BYTE_ORDER that is what + * endianness we end with __BYTE_ORDER but not with BYTE_ORDER that is what * the Redis code uses. In this case let's define everything without the * underscores. */ #ifndef BYTE_ORDER @@ -241,7 +246,7 @@ void setproctitle(const char *fmt, ...); #define redis_set_thread_title(name) pthread_set_name_np(pthread_self(), name) #elif defined __NetBSD__ #include -#define redis_set_thread_title(name) pthread_setname_np(pthread_self(), name, NULL) +#define redis_set_thread_title(name) pthread_setname_np(pthread_self(), "%s", name) #else #if (defined __APPLE__ && defined(MAC_OS_X_VERSION_10_7)) #ifdef __cplusplus @@ -257,7 +262,7 @@ int pthread_setname_np(const char *name); #endif /* Check if we can use setcpuaffinity(). */ -#if (defined __linux || defined __NetBSD__ || defined __FreeBSD__) +#if (defined __linux || defined __NetBSD__ || defined __FreeBSD__ || defined __DragonFly__) #define USE_SETCPUAFFINITY #ifdef __cplusplus extern "C" diff --git a/src/connection.cpp b/src/connection.cpp index defd5eb9d..8ba75264e 100644 --- a/src/connection.cpp +++ b/src/connection.cpp @@ -85,8 +85,12 @@ connection *connCreateSocket() { /* Create a new socket-type connection that is already associated with * an accepted connection. * - * The socket is not read for I/O until connAccept() was called and + * The socket is not ready for I/O until connAccept() was called and * invoked the connection-level accept handler. + * + * Callers should use connGetState() and verify the created connection + * is not in an error state (which is not possible for a socket connection, + * but could but possible with other protocols). */ connection *connCreateAcceptedSocket(int fd) { connection *conn = connCreateSocket(); @@ -164,7 +168,12 @@ static int connSocketWrite(connection *conn, const void *data, size_t data_len) int ret = write(conn->fd, data, data_len); if (ret < 0 && errno != EAGAIN) { conn->last_errno = errno; - conn->state.store(CONN_STATE_ERROR, std::memory_order_relaxed); + + /* Don't overwrite the state of a connection that is not already + * connected, not to mess with handler callbacks. + */ + ConnectionState expected = CONN_STATE_CONNECTED; + conn->state.compare_exchange_strong(expected, CONN_STATE_ERROR, std::memory_order_relaxed); } return ret; @@ -176,7 +185,12 @@ static int connSocketRead(connection *conn, void *buf, size_t buf_len) { conn->state.store(CONN_STATE_CLOSED, std::memory_order_release); } else if (ret < 0 && errno != EAGAIN) { conn->last_errno = errno; - conn->state.store(CONN_STATE_ERROR, std::memory_order_release); + + /* Don't overwrite the state of a connection that is not already + * connected, not to mess with handler callbacks. + */ + ConnectionState expected = CONN_STATE_CONNECTED; + conn->state.compare_exchange_strong(expected, CONN_STATE_ERROR, std::memory_order_release); } return ret; @@ -256,8 +270,9 @@ static void connSocketEventHandler(struct aeEventLoop *el, int fd, void *clientD if (conn->state.load(std::memory_order_relaxed) == CONN_STATE_CONNECTING && (mask & AE_WRITABLE) && conn->conn_handler) { - if (connGetSocketError(conn)) { - conn->last_errno = errno; + int conn_error = connGetSocketError(conn); + if (conn_error) { + conn->last_errno = conn_error; conn->state.store(CONN_STATE_ERROR, std::memory_order_release); } else { conn->state.store(CONN_STATE_CONNECTED, std::memory_order_release); @@ -334,6 +349,11 @@ static ssize_t connSocketSyncReadLine(connection *conn, char *ptr, ssize_t size, return syncReadLine(conn->fd, ptr, size, timeout); } +static int connSocketGetType(struct connection *conn) { + (void) conn; + + return CONN_TYPE_SOCKET; +} ConnectionType CT_Socket = { connSocketEventHandler, @@ -348,7 +368,9 @@ ConnectionType CT_Socket = { connSocketBlockingConnect, connSocketSyncWrite, connSocketSyncRead, - connSocketSyncReadLine + connSocketSyncReadLine, + nullptr, + connSocketGetType }; diff --git a/src/connection.h b/src/connection.h index 515229d6a..606137229 100644 --- a/src/connection.h +++ b/src/connection.h @@ -52,6 +52,9 @@ typedef enum { #define CONN_FLAG_READ_THREADSAFE (1<<2) #define CONN_FLAG_WRITE_THREADSAFE (1<<3) +#define CONN_TYPE_SOCKET 1 +#define CONN_TYPE_TLS 2 + typedef void (*ConnectionCallbackFunc)(struct connection *conn); typedef struct ConnectionType { @@ -69,6 +72,7 @@ typedef struct ConnectionType { ssize_t (*sync_read)(struct connection *conn, char *ptr, ssize_t size, long long timeout); ssize_t (*sync_readline)(struct connection *conn, char *ptr, ssize_t size, long long timeout); void (*marshal_thread)(struct connection *conn); + int (*get_type)(struct connection *conn); } ConnectionType; struct connection { @@ -107,7 +111,7 @@ static inline int connAccept(connection *conn, ConnectionCallbackFunc accept_han } /* Establish a connection. The connect_handler will be called when the connection - * is established, or if an error has occured. + * is established, or if an error has occurred. * * The connection handler will be responsible to set up any read/write handlers * as needed. @@ -169,7 +173,7 @@ static inline int connSetReadHandler(connection *conn, ConnectionCallbackFunc fu /* Set a write handler, and possibly enable a write barrier, this flag is * cleared when write handler is changed or removed. - * With barroer enabled, we never fire the event if the read handler already + * With barrier enabled, we never fire the event if the read handler already * fired in the same event loop iteration. Useful when you want to persist * things to disk before sending replies, and want to do that in a group fashion. */ static inline int connSetWriteHandlerWithBarrier(connection *conn, ConnectionCallbackFunc func, int barrier, bool fThreadSafe = false) { @@ -204,6 +208,11 @@ static inline void connMarshalThread(connection *conn) { conn->type->marshal_thread(conn); } +/* Return CONN_TYPE_* for the specified connection */ +static inline int connGetType(connection *conn) { + return conn->type->get_type(conn); +} + connection *connCreateSocket(); connection *connCreateAcceptedSocket(int fd); @@ -232,6 +241,7 @@ int connSockName(connection *conn, char *ip, size_t ip_len, int *port); const char *connGetInfo(connection *conn, char *buf, size_t buf_len); /* Helpers for tls special considerations */ +sds connTLSGetPeerCert(connection *conn); int tlsHasPendingData(); int tlsProcessPendingData(); diff --git a/src/crcspeed.c b/src/crcspeed.c index d2d97a8c7..81a80ce8e 100644 --- a/src/crcspeed.c +++ b/src/crcspeed.c @@ -35,7 +35,8 @@ void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) { /* generate CRCs for all single byte sequences */ for (int n = 0; n < 256; n++) { - table[0][n] = crcfn(0, &n, 1); + unsigned char v = n; + table[0][n] = crcfn(0, &v, 1); } /* generate nested CRC table for future slice-by-8 lookup */ diff --git a/src/db.cpp b/src/db.cpp index 0beee32b3..de2ed2754 100644 --- a/src/db.cpp +++ b/src/db.cpp @@ -35,6 +35,13 @@ #include #include +/* Database backup. */ +struct dbBackup { + redisDb *dbarray; + rax *slots_to_keys; + uint64_t slots_keys_count[CLUSTER_SLOTS]; +}; + /*----------------------------------------------------------------------------- * C-level DB API *----------------------------------------------------------------------------*/ @@ -86,7 +93,7 @@ void updateDbValAccess(dictEntry *de, int flags) * implementations that should instead rely on lookupKeyRead(), * lookupKeyWrite() and lookupKeyReadWithFlags(). */ static robj *lookupKey(redisDb *db, robj *key, int flags) { - dictEntry *de = dictFind(db->pdict,ptrFromObj(key)); + dictEntry *de = dictFind(db->dict,ptrFromObj(key)); if (de) { robj *val = (robj*)dictGetVal(de); @@ -131,11 +138,8 @@ robj_roptr lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { /* Key expired. If we are in the context of a master, expireIfNeeded() * returns 0 only when the key does not exist at all, so it's safe * to return NULL ASAP. */ - if (listLength(g_pserver->masters) == 0) { - g_pserver->stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); - return NULL; - } + if (listLength(g_pserver->masters) == 0) + goto keymiss; /* However if we are in the context of a replica, expireIfNeeded() will * not really try to expire the key, it only returns information @@ -145,7 +149,7 @@ robj_roptr lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { * However, if the command caller is not the master, and as additional * safety measure, the command invoked is a read-only command, we can * safely return NULL here, and provide a more consistent behavior - * to clients accessign expired values in a read-only fashion, that + * to clients accessing expired values in a read-only fashion, that * will say the key as non existing. * * Notably this covers GETs when slaves are used to scale reads. */ @@ -154,19 +158,21 @@ robj_roptr lookupKeyReadWithFlags(redisDb *db, robj *key, int flags) { serverTL->current_client->cmd && serverTL->current_client->cmd->flags & CMD_READONLY) { - g_pserver->stat_keyspace_misses++; - notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); - return NULL; + goto keymiss; } } val = lookupKey(db,key,flags); - if (val == NULL) { + if (val == NULL) + goto keymiss; + g_pserver->stat_keyspace_hits++; + return val; + +keymiss: + if (!(flags & LOOKUP_NONOTIFY)) { g_pserver->stat_keyspace_misses++; notifyKeyspaceEvent(NOTIFY_KEY_MISS, "keymiss", key, db->id); } - else - g_pserver->stat_keyspace_hits++; - return val; + return NULL; } /* Like lookupKeyReadWithFlags(), but does not use any flag, which is the @@ -202,13 +208,15 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) { return o; } -int dbAddCore(redisDb *db, robj *key, robj *val) { +int dbAddCore(redisDb *db, robj *key, robj *val, bool fUpdateMvcc) { serverAssert(!val->FExpires()); sds copy = sdsdup(szFromObj(key)); - int retval = dictAdd(db->pdict, copy, val); + int retval = dictAdd(db->dict, copy, val); uint64_t mvcc = getMvccTstamp(); - setMvccTstamp(key, mvcc); - setMvccTstamp(val, mvcc); + if (fUpdateMvcc) { + setMvccTstamp(key, mvcc); + setMvccTstamp(val, mvcc); + } if (retval == DICT_OK) { @@ -232,7 +240,7 @@ int dbAddCore(redisDb *db, robj *key, robj *val) { * The program is aborted if the key already exists. */ void dbAdd(redisDb *db, robj *key, robj *val) { - int retval = dbAddCore(db, key, val); + int retval = dbAddCore(db, key, val, true /* fUpdateMvcc */); serverAssertWithInfo(NULL,key,retval == DICT_OK); } @@ -261,14 +269,14 @@ void dbOverwriteCore(redisDb *db, dictEntry *de, robj *key, robj *val, bool fUpd setMvccTstamp(val, getMvccTstamp()); } - dictSetVal(db->pdict, de, val); + dictSetVal(db->dict, de, val); if (g_pserver->lazyfree_lazy_server_del) { freeObjAsync(old); - dictSetVal(db->pdict, &auxentry, NULL); + dictSetVal(db->dict, &auxentry, NULL); } - dictFreeVal(db->pdict, &auxentry); + dictFreeVal(db->dict, &auxentry); } /* Overwrite an existing key with a new value. Incrementing the reference @@ -277,7 +285,7 @@ void dbOverwriteCore(redisDb *db, dictEntry *de, robj *key, robj *val, bool fUpd * * The program is aborted if the key was not already present. */ void dbOverwrite(redisDb *db, robj *key, robj *val) { - dictEntry *de = dictFind(db->pdict,ptrFromObj(key)); + dictEntry *de = dictFind(db->dict,ptrFromObj(key)); serverAssertWithInfo(NULL,key,de != NULL); dbOverwriteCore(db, de, key, val, !!g_pserver->fActiveReplica, false); @@ -288,9 +296,9 @@ int dbMerge(redisDb *db, robj *key, robj *val, int fReplace) { if (fReplace) { - dictEntry *de = dictFind(db->pdict, ptrFromObj(key)); + dictEntry *de = dictFind(db->dict, ptrFromObj(key)); if (de == nullptr) - return (dbAddCore(db, key, val) == DICT_OK); + return (dbAddCore(db, key, val, false /* fUpdateMvcc */) == DICT_OK); robj *old = (robj*)dictGetVal(de); if (mvccFromObj(old) <= mvccFromObj(val)) @@ -303,7 +311,7 @@ int dbMerge(redisDb *db, robj *key, robj *val, int fReplace) } else { - return (dbAddCore(db, key, val) == DICT_OK); + return (dbAddCore(db, key, val, true /* fUpdateMvcc */) == DICT_OK); } } @@ -319,7 +327,7 @@ int dbMerge(redisDb *db, robj *key, robj *val, int fReplace) * The client 'c' argument may be set to NULL if the operation is performed * in a context where there is no clear client performing the operation. */ void genericSetKey(client *c, redisDb *db, robj *key, robj *val, int keepttl, int signal) { - dictEntry *de = dictFind(db->pdict, ptrFromObj(key)); + dictEntry *de = dictFind(db->dict, ptrFromObj(key)); if (de == NULL) { dbAdd(db,key,val); } else { @@ -338,7 +346,7 @@ void setKey(client *c, redisDb *db, robj *key, robj *val) { /* Return true if the specified key exists in the specified database. * LRU/LFU info is not updated in any way. */ int dbExists(redisDb *db, robj *key) { - return dictFind(db->pdict,ptrFromObj(key)) != NULL; + return dictFind(db->dict,ptrFromObj(key)) != NULL; } /* Return a random key, in form of a Redis object. @@ -348,13 +356,13 @@ int dbExists(redisDb *db, robj *key) { robj *dbRandomKey(redisDb *db) { dictEntry *de; int maxtries = 100; - int allvolatile = dictSize(db->pdict) == db->setexpire->size(); + int allvolatile = dictSize(db->dict) == db->setexpire->size(); while(1) { sds key; robj *keyobj; - de = dictGetRandomKey(db->pdict); + de = dictGetRandomKey(db->dict); if (de == NULL) return NULL; key = (sds)dictGetKey(de); @@ -392,10 +400,10 @@ int dbSyncDelete(redisDb *db, robj *key) { /* Deleting an entry from the expires dict will not free the sds of * the key, because it is shared with the main dictionary. */ - dictEntry *de = dictFind(db->pdict, szFromObj(key)); + dictEntry *de = dictFind(db->dict, szFromObj(key)); if (de != nullptr && ((robj*)dictGetVal(de))->FExpires()) removeExpireCore(db, key, de); - if (dictDelete(db->pdict,ptrFromObj(key)) == DICT_OK) { + if (dictDelete(db->dict,ptrFromObj(key)) == DICT_OK) { if (g_pserver->cluster_enabled) slotToKeyDel(szFromObj(key)); return 1; } else { @@ -448,48 +456,18 @@ robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o) { return o; } -/* Remove all keys from all the databases in a Redis g_pserver-> - * If callback is given the function is called from time to time to - * signal that work is in progress. +/* Remove all keys from the database(s) structure. The dbarray argument + * may not be the server main DBs (could be a backup). * - * The dbnum can be -1 if all the DBs should be flushed, or the specified - * DB number if we want to flush only a single Redis database number. - * - * Flags are be EMPTYDB_NO_FLAGS if no special flags are specified or - * 1. EMPTYDB_ASYNC if we want the memory to be freed in a different thread. - * 2. EMPTYDB_BACKUP if we want to empty the backup dictionaries created by - * disklessLoadMakeBackups. In that case we only free memory and avoid - * firing module events. - * and the function to return ASAP. - * - * On success the fuction returns the number of keys removed from the - * database(s). Otherwise -1 is returned in the specific case the - * DB number is out of range, and errno is set to EINVAL. */ -long long emptyDbGeneric(redisDb *dbarray, int dbnum, int flags, void(callback)(void*)) { - int async = (flags & EMPTYDB_ASYNC); - int backup = (flags & EMPTYDB_BACKUP); /* Just free the memory, nothing else */ - RedisModuleFlushInfoV1 fi = {REDISMODULE_FLUSHINFO_VERSION,!async,dbnum}; + * The dbnum can be -1 if all the DBs should be emptied, or the specified + * DB index if we want to empty only a single database. + * The function returns the number of keys removed from the database(s). */ +long long emptyDbStructure(redisDb *dbarray, int dbnum, int async, + void(callback)(void*)) +{ long long removed = 0; - - if (dbnum < -1 || dbnum >= cserver.dbnum) { - errno = EINVAL; - return -1; - } - - /* Pre-flush actions */ - if (!backup) { - /* Fire the flushdb modules event. */ - moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB, - REDISMODULE_SUBEVENT_FLUSHDB_START, - &fi); - - /* Make sure the WATCHed keys are affected by the FLUSH* commands. - * Note that we need to call the function while the keys are still - * there. */ - signalFlushedDb(dbnum); - } - int startdb, enddb; + if (dbnum == -1) { startdb = 0; enddb = cserver.dbnum-1; @@ -498,38 +476,147 @@ long long emptyDbGeneric(redisDb *dbarray, int dbnum, int flags, void(callback)( } for (int j = startdb; j <= enddb; j++) { - removed += dictSize(dbarray[j].pdict); + removed += dictSize(dbarray[j].dict); if (async) { emptyDbAsync(&dbarray[j]); } else { - dictEmpty(dbarray[j].pdict,callback); + dictEmpty(dbarray[j].dict,callback); dbarray[j].setexpire->clear(); } - } - - /* Post-flush actions */ - if (!backup) { - if (g_pserver->cluster_enabled) { - if (async) { - slotToKeyFlushAsync(); - } else { - slotToKeyFlush(); - } - } - if (dbnum == -1) flushSlaveKeysWithExpireList(); - - /* Also fire the end event. Note that this event will fire almost - * immediately after the start event if the flush is asynchronous. */ - moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB, - REDISMODULE_SUBEVENT_FLUSHDB_END, - &fi); + /* Because all keys of database are removed, reset average ttl. */ + dbarray[j].avg_ttl = 0; + dbarray[j].last_expire_set = 0; } return removed; } +/* Remove all keys from all the databases in a Redis server. + * If callback is given the function is called from time to time to + * signal that work is in progress. + * + * The dbnum can be -1 if all the DBs should be flushed, or the specified + * DB number if we want to flush only a single Redis database number. + * + * Flags are be EMPTYDB_NO_FLAGS if no special flags are specified or + * EMPTYDB_ASYNC if we want the memory to be freed in a different thread + * and the function to return ASAP. + * + * On success the function returns the number of keys removed from the + * database(s). Otherwise -1 is returned in the specific case the + * DB number is out of range, and errno is set to EINVAL. */ long long emptyDb(int dbnum, int flags, void(callback)(void*)) { - return emptyDbGeneric(g_pserver->db, dbnum, flags, callback); + int async = (flags & EMPTYDB_ASYNC); + RedisModuleFlushInfoV1 fi = {REDISMODULE_FLUSHINFO_VERSION,!async,dbnum}; + long long removed = 0; + + if (dbnum < -1 || dbnum >= cserver.dbnum) { + errno = EINVAL; + return -1; + } + + /* Fire the flushdb modules event. */ + moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB, + REDISMODULE_SUBEVENT_FLUSHDB_START, + &fi); + + /* Make sure the WATCHed keys are affected by the FLUSH* commands. + * Note that we need to call the function while the keys are still + * there. */ + signalFlushedDb(dbnum); + + /* Empty redis database structure. */ + removed = emptyDbStructure(g_pserver->db, dbnum, async, callback); + + /* Flush slots to keys map if enable cluster, we can flush entire + * slots to keys map whatever dbnum because only support one DB + * in cluster mode. */ + if (g_pserver->cluster_enabled) slotToKeyFlush(async); + + if (dbnum == -1) flushSlaveKeysWithExpireList(); + + /* Also fire the end event. Note that this event will fire almost + * immediately after the start event if the flush is asynchronous. */ + moduleFireServerEvent(REDISMODULE_EVENT_FLUSHDB, + REDISMODULE_SUBEVENT_FLUSHDB_END, + &fi); + + return removed; +} + +/* Store a backup of the database for later use, and put an empty one + * instead of it. */ +dbBackup *backupDb(void) { + dbBackup *backup = (dbBackup*)zmalloc(sizeof(dbBackup)); + + /* Backup main DBs. */ + backup->dbarray = (redisDb*)zmalloc(sizeof(redisDb)*cserver.dbnum); + for (int i=0; idbarray[i] = g_pserver->db[i]; + g_pserver->db[i].dict = dictCreate(&dbDictType,NULL); + g_pserver->db[i].setexpire = new(MALLOC_LOCAL) expireset; + g_pserver->db[i].expireitr = g_pserver->db[i].setexpire->end(); + } + + /* Backup cluster slots to keys map if enable cluster. */ + if (g_pserver->cluster_enabled) { + backup->slots_to_keys = g_pserver->cluster->slots_to_keys; + memcpy(backup->slots_keys_count, g_pserver->cluster->slots_keys_count, + sizeof(g_pserver->cluster->slots_keys_count)); + g_pserver->cluster->slots_to_keys = raxNew(); + memset(g_pserver->cluster->slots_keys_count, 0, + sizeof(g_pserver->cluster->slots_keys_count)); + } + + return backup; +} + +/* Discard a previously created backup, this can be slow (similar to FLUSHALL) + * Arguments are similar to the ones of emptyDb, see EMPTYDB_ flags. */ +void discardDbBackup(dbBackup *buckup, int flags, void(callback)(void*)) { + int async = (flags & EMPTYDB_ASYNC); + + /* Release main DBs backup . */ + emptyDbStructure(buckup->dbarray, -1, async, callback); + for (int i=0; idbarray[i].dict); + delete buckup->dbarray[i].setexpire; + } + + /* Release slots to keys map backup if enable cluster. */ + if (g_pserver->cluster_enabled) freeSlotsToKeysMap(buckup->slots_to_keys, async); + + /* Release buckup. */ + zfree(buckup->dbarray); + zfree(buckup); +} + +/* Restore the previously created backup (discarding what currently resides + * in the db). + * This function should be called after the current contents of the database + * was emptied with a previous call to emptyDb (possibly using the async mode). */ +void restoreDbBackup(dbBackup *buckup) { + /* Restore main DBs. */ + for (int i=0; idb[i].dict) == 0); + serverAssert(g_pserver->db[i].setexpire->empty()); + dictRelease(g_pserver->db[i].dict); + delete g_pserver->db[i].setexpire; + g_pserver->db[i] = buckup->dbarray[i]; + } + + /* Restore slots to keys map backup if enable cluster. */ + if (g_pserver->cluster_enabled) { + serverAssert(g_pserver->cluster->slots_to_keys->numele == 0); + raxFree(g_pserver->cluster->slots_to_keys); + g_pserver->cluster->slots_to_keys = buckup->slots_to_keys; + memcpy(g_pserver->cluster->slots_keys_count, buckup->slots_keys_count, + sizeof(g_pserver->cluster->slots_keys_count)); + } + + /* Release buckup. */ + zfree(buckup->dbarray); + zfree(buckup); } int selectDb(client *c, int id) { @@ -543,7 +630,7 @@ long long dbTotalServerKeyCount() { long long total = 0; int j; for (j = 0; j < cserver.dbnum; j++) { - total += dictSize(g_pserver->db[j].pdict); + total += dictSize(g_pserver->db[j].dict); } return total; } @@ -565,7 +652,18 @@ void signalModifiedKey(client *c, redisDb *db, robj *key) { } void signalFlushedDb(int dbid) { - touchWatchedKeysOnFlush(dbid); + int startdb, enddb; + if (dbid == -1) { + startdb = 0; + enddb = cserver.dbnum-1; + } else { + startdb = enddb = dbid; + } + + for (int j = startdb; j <= enddb; j++) { + touchAllWatchedKeysInDb(&g_pserver->db[j], NULL); + } + trackingInvalidateKeysOnFlush(dbid); } @@ -680,7 +778,7 @@ void existsCommand(client *c) { int j; for (j = 1; j < c->argc; j++) { - if (lookupKeyRead(c->db,c->argv[j])) count++; + if (lookupKeyReadWithFlags(c->db,c->argv[j],LOOKUP_NOTOUCH)) count++; } addReplyLongLong(c,count); } @@ -730,7 +828,7 @@ void keysCommand(client *c) { unsigned long numkeys = 0; void *replylen = addReplyDeferredLen(c); - di = dictGetSafeIterator(c->db->pdict); + di = dictGetSafeIterator(c->db->dict); allkeys = (pattern[0] == '*' && plen == 1); while((de = dictNext(di)) != NULL) { sds key = (sds)dictGetKey(de); @@ -874,7 +972,7 @@ void scanGenericCommand(client *c, robj_roptr o, unsigned long cursor) { /* Handle the case of a hash table. */ ht = NULL; if (o == nullptr) { - ht = c->db->pdict; + ht = c->db->dict; } else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_HT) { ht = (dict*)ptrFromObj(o); } else if (o->type == OBJ_HASH && o->encoding == OBJ_ENCODING_HT) { @@ -882,7 +980,7 @@ void scanGenericCommand(client *c, robj_roptr o, unsigned long cursor) { count *= 2; /* We return key / value for this type. */ } else if (o->type == OBJ_ZSET && o->encoding == OBJ_ENCODING_SKIPLIST) { zset *zs = (zset*)ptrFromObj(o); - ht = zs->pdict; + ht = zs->dict; count *= 2; /* We return key / value for this type. */ } @@ -961,7 +1059,7 @@ void scanGenericCommand(client *c, robj_roptr o, unsigned long cursor) { /* Filter element if it is an expired key. */ if (!filter && o == nullptr && expireIfNeeded(c->db, kobj)) filter = 1; - /* Remove the element and its associted value if needed. */ + /* Remove the element and its associated value if needed. */ if (filter) { decrRefCount(kobj); listDelNode(keys, node); @@ -1007,7 +1105,7 @@ void scanCommand(client *c) { } void dbsizeCommand(client *c) { - addReplyLongLong(c,dictSize(c->db->pdict)); + addReplyLongLong(c,dictSize(c->db->dict)); } void lastsaveCommand(client *c) { @@ -1057,14 +1155,6 @@ void shutdownCommand(client *c) { return; } } - /* When SHUTDOWN is called while the server is loading a dataset in - * memory we need to make sure no attempt is performed to save - * the dataset on shutdown (otherwise it could overwrite the current DB - * with half-read data). - * - * Also when in Sentinel mode clear the SAVE flag and force NOSAVE. */ - if (g_pserver->loading || g_pserver->sentinel_mode) - flags = (flags & ~SHUTDOWN_SAVE) | SHUTDOWN_NOSAVE; if (prepareForShutdown(flags) == C_OK) throw ShutdownException(); addReplyError(c,"Errors trying to SHUTDOWN. Check logs."); } @@ -1228,23 +1318,16 @@ int dbSwapDatabases(long id1, long id2) { if (id1 < 0 || id1 >= cserver.dbnum || id2 < 0 || id2 >= cserver.dbnum) return C_ERR; if (id1 == id2) return C_OK; - redisDb aux(g_pserver->db[id1]); redisDb *db1 = &g_pserver->db[id1], *db2 = &g_pserver->db[id2]; /* Swap hash tables. Note that we don't swap blocking_keys, * ready_keys and watched_keys, since we want clients to * remain in the same DB they were. */ - db1->pdict = db2->pdict; - db1->setexpire = db2->setexpire; - db1->expireitr = db2->expireitr; - db1->avg_ttl = db2->avg_ttl; - db1->last_expire_set = db2->last_expire_set; - - db2->pdict = aux.pdict; - db2->setexpire = aux.setexpire; - db2->expireitr = aux.expireitr; - db2->avg_ttl = aux.avg_ttl; - db2->last_expire_set = aux.last_expire_set; + std::swap(db1->dict, db2->dict); + std::swap(db1->setexpire, db2->setexpire); + std::swap(db1->expireitr, db2->expireitr); + std::swap(db1->avg_ttl, db2->avg_ttl); + std::swap(db1->last_expire_set, db2->last_expire_set); /* Now we need to handle clients blocked on lists: as an effect * of swapping the two DBs, a client that was waiting for list @@ -1254,9 +1337,14 @@ int dbSwapDatabases(long id1, long id2) { * However normally we only do this check for efficiency reasons * in dbAdd() when a list is created. So here we need to rescan * the list of clients blocked on lists and signal lists as ready - * if needed. */ + * if needed. + * + * Also the swapdb should make transaction fail if there is any + * client watching keys */ scanDatabaseForReadyLists(db1); + touchAllWatchedKeysInDb(db1, db2); scanDatabaseForReadyLists(db2); + touchAllWatchedKeysInDb(db2, db1); return C_OK; } @@ -1284,6 +1372,8 @@ void swapdbCommand(client *c) { addReplyError(c,"DB index is out of range"); return; } else { + RedisModuleSwapDbInfo si = {REDISMODULE_SWAPDBINFO_VERSION,(int32_t)id1,(int32_t)id2}; + moduleFireServerEvent(REDISMODULE_EVENT_SWAPDB,0,&si); g_pserver->dirty++; addReply(c,shared.ok); } @@ -1293,7 +1383,7 @@ void swapdbCommand(client *c) { * Expires API *----------------------------------------------------------------------------*/ int removeExpire(redisDb *db, robj *key) { - dictEntry *de = dictFind(db->pdict,ptrFromObj(key)); + dictEntry *de = dictFind(db->dict,ptrFromObj(key)); return removeExpireCore(db, key, de); } int removeExpireCore(redisDb *db, robj *key, dictEntry *de) { @@ -1314,7 +1404,7 @@ int removeExpireCore(redisDb *db, robj *key, dictEntry *de) { } int removeSubkeyExpire(redisDb *db, robj *key, robj *subkey) { - dictEntry *de = dictFind(db->pdict,ptrFromObj(key)); + dictEntry *de = dictFind(db->dict,ptrFromObj(key)); serverAssertWithInfo(NULL,key,de != NULL); robj *val = (robj*)dictGetVal(de); @@ -1356,13 +1446,13 @@ void setExpire(client *c, redisDb *db, robj *key, robj *subkey, long long when) serverAssert(GlobalLocksAcquired()); /* Reuse the sds from the main dict in the expire dict */ - kde = dictFind(db->pdict,ptrFromObj(key)); + kde = dictFind(db->dict,ptrFromObj(key)); serverAssertWithInfo(NULL,key,kde != NULL); if (((robj*)dictGetVal(kde))->getrefcount(std::memory_order_relaxed) == OBJ_SHARED_REFCOUNT) { // shared objects cannot have the expire bit set, create a real object - dictSetVal(db->pdict, kde, dupStringObject((robj*)dictGetVal(kde))); + dictSetVal(db->dict, kde, dupStringObject((robj*)dictGetVal(kde))); } /* Update TTL stats (exponential moving average) */ @@ -1415,13 +1505,13 @@ void setExpire(client *c, redisDb *db, robj *key, expireEntry &&e) serverAssert(GlobalLocksAcquired()); /* Reuse the sds from the main dict in the expire dict */ - kde = dictFind(db->pdict,ptrFromObj(key)); + kde = dictFind(db->dict,ptrFromObj(key)); serverAssertWithInfo(NULL,key,kde != NULL); if (((robj*)dictGetVal(kde))->getrefcount(std::memory_order_relaxed) == OBJ_SHARED_REFCOUNT) { // shared objects cannot have the expire bit set, create a real object - dictSetVal(db->pdict, kde, dupStringObject((robj*)dictGetVal(kde))); + dictSetVal(db->dict, kde, dupStringObject((robj*)dictGetVal(kde))); } if (((robj*)dictGetVal(kde))->FExpires()) @@ -1446,7 +1536,7 @@ expireEntry *getExpire(redisDb *db, robj_roptr key) { if (db->setexpire->size() == 0) return nullptr; - de = dictFind(db->pdict, ptrFromObj(key)); + de = dictFind(db->dict, ptrFromObj(key)); if (de == NULL) return nullptr; robj *obj = (robj*)dictGetVal(de); @@ -1623,27 +1713,54 @@ int expireIfNeeded(redisDb *db, robj *key) { /* ----------------------------------------------------------------------------- * API to get key arguments from commands * ---------------------------------------------------------------------------*/ -#define MAX_KEYS_BUFFER 256 -thread_local static int getKeysTempBuffer[MAX_KEYS_BUFFER]; + +/* Prepare the getKeysResult struct to hold numkeys, either by using the + * pre-allocated keysbuf or by allocating a new array on the heap. + * + * This function must be called at least once before starting to populate + * the result, and can be called repeatedly to enlarge the result array. + */ +int *getKeysPrepareResult(getKeysResult *result, int numkeys) { + /* GETKEYS_RESULT_INIT initializes keys to NULL, point it to the pre-allocated stack + * buffer here. */ + if (!result->keys) { + serverAssert(!result->numkeys); + result->keys = result->keysbuf; + } + + /* Resize if necessary */ + if (numkeys > result->size) { + if (result->keys != result->keysbuf) { + /* We're not using a static buffer, just (re)alloc */ + result->keys = (int*)zrealloc(result->keys, numkeys * sizeof(int)); + } else { + /* We are using a static buffer, copy its contents */ + result->keys = (int*)zmalloc(numkeys * sizeof(int)); + if (result->numkeys) + memcpy(result->keys, result->keysbuf, result->numkeys * sizeof(int)); + } + result->size = numkeys; + } + + return result->keys; +} /* The base case is to use the keys position as given in the command table * (firstkey, lastkey, step). */ -int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, int *numkeys) { +int getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, getKeysResult *result) { int j, i = 0, last, *keys; UNUSED(argv); if (cmd->firstkey == 0) { - *numkeys = 0; - return NULL; + result->numkeys = 0; + return 0; } last = cmd->lastkey; if (last < 0) last = argc+last; int count = ((last - cmd->firstkey)+1); - keys = getKeysTempBuffer; - if (count > MAX_KEYS_BUFFER) - keys = (int*)zmalloc(sizeof(int)*count); + keys = getKeysPrepareResult(result, count); for (j = cmd->firstkey; j <= last; j += cmd->keystep) { if (j >= argc) { @@ -1654,23 +1771,23 @@ int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, in * return no keys and expect the command implementation to report * an arity or syntax error. */ if (cmd->flags & CMD_MODULE || cmd->arity < 0) { - getKeysFreeResult(keys); - *numkeys = 0; - return NULL; + getKeysFreeResult(result); + result->numkeys = 0; + return 0; } else { serverPanic("Redis built-in command declared keys positions not matching the arity requirements."); } } keys[i++] = j; } - *numkeys = i; - return keys; + result->numkeys = i; + return i; } /* Return all the arguments that are keys in the command passed via argc / argv. * * The command returns the positions of all the key arguments inside the array, - * so the actual return value is an heap allocated array of integers. The + * so the actual return value is a heap allocated array of integers. The * length of the array is returned by reference into *numkeys. * * 'cmd' must be point to the corresponding entry into the redisCommand @@ -1678,26 +1795,26 @@ int *getKeysUsingCommandTable(struct redisCommand *cmd,robj **argv, int argc, in * * This function uses the command table if a command-specific helper function * is not required, otherwise it calls the command-specific function. */ -int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { +int getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) { if (cmd->flags & CMD_MODULE_GETKEYS) { - return moduleGetCommandKeysViaAPI(cmd,argv,argc,numkeys); + return moduleGetCommandKeysViaAPI(cmd,argv,argc,result); } else if (!(cmd->flags & CMD_MODULE) && cmd->getkeys_proc) { - return cmd->getkeys_proc(cmd,argv,argc,numkeys); + return cmd->getkeys_proc(cmd,argv,argc,result); } else { - return getKeysUsingCommandTable(cmd,argv,argc,numkeys); + return getKeysUsingCommandTable(cmd,argv,argc,result); } } /* Free the result of getKeysFromCommand. */ -void getKeysFreeResult(int *result) { - if (result != getKeysTempBuffer) - zfree(result); +void getKeysFreeResult(getKeysResult *result) { + if (result && result->keys != result->keysbuf) + zfree(result->keys); } /* Helper function to extract keys from following commands: * ZUNIONSTORE ... * ZINTERSTORE ... */ -int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys) { +int zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result) { int i, num, *keys; UNUSED(cmd); @@ -1705,30 +1822,30 @@ int *zunionInterGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *nu /* Sanity check. Don't return any key if the command is going to * reply with syntax error. */ if (num < 1 || num > (argc-3)) { - *numkeys = 0; - return NULL; + result->numkeys = 0; + return 0; } /* Keys in z{union,inter}store come from two places: * argv[1] = storage key, * argv[3...n] = keys to intersect */ - keys = getKeysTempBuffer; - if (num+1>MAX_KEYS_BUFFER) - keys = (int*)zmalloc(sizeof(int)*(num+1)); + /* Total keys = {union,inter} keys + storage key */ + keys = getKeysPrepareResult(result, num+1); + result->numkeys = num+1; /* Add all key positions for argv[3...n] to keys[] */ for (i = 0; i < num; i++) keys[i] = 3+i; /* Finally add the argv[1] key position (the storage key target). */ keys[num] = 1; - *numkeys = num+1; /* Total keys = {union,inter} keys + storage key */ - return keys; + + return result->numkeys; } /* Helper function to extract keys from the following commands: * EVAL