From bfd639d4851b9a014245df5a295fce1ee0f9db3e Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 29 May 2021 07:07:54 +0000 Subject: [PATCH 01/11] Fix cluster test failures Former-commit-id: 8cf1daf823be0a40301a5dc288ec18f5bb98d60b --- tests/cluster/tests/12.1-replica-migration-3.tcl | 4 ++-- tests/cluster/tests/includes/utils.tcl | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/cluster/tests/12.1-replica-migration-3.tcl b/tests/cluster/tests/12.1-replica-migration-3.tcl index 46a9f79e3..8b2558264 100644 --- a/tests/cluster/tests/12.1-replica-migration-3.tcl +++ b/tests/cluster/tests/12.1-replica-migration-3.tcl @@ -36,7 +36,7 @@ test "Set allow-replica-migration no" { set master0_id [dict get [get_myself 0] id] test "Resharding all the master #0 slots away from it" { set output [exec \ - ../../../src/redis-cli --cluster rebalance \ + ../../../src/keydb-cli --cluster rebalance \ 127.0.0.1:[get_instance_attrib redis 0 port] \ {*}[rediscli_tls_config "../../../tests"] \ --cluster-weight ${master0_id}=0 >@ stdout ] @@ -44,7 +44,7 @@ test "Resharding all the master #0 slots away from it" { test "Wait cluster to be stable" { wait_for_condition 1000 50 { - [catch {exec ../../../src/redis-cli --cluster \ + [catch {exec ../../../src/keydb-cli --cluster \ check 127.0.0.1:[get_instance_attrib redis 0 port] \ {*}[rediscli_tls_config "../../../tests"] \ }] == 0 diff --git a/tests/cluster/tests/includes/utils.tcl b/tests/cluster/tests/includes/utils.tcl index 48c40a050..855ca3c85 100644 --- a/tests/cluster/tests/includes/utils.tcl +++ b/tests/cluster/tests/includes/utils.tcl @@ -8,18 +8,18 @@ proc config_set_all_nodes {keyword value} { proc fix_cluster {addr} { set code [catch { - exec ../../../src/redis-cli {*}[rediscli_tls_config "../../../tests"] --cluster fix $addr << yes + exec ../../../src/keydb-cli {*}[rediscli_tls_config "../../../tests"] --cluster fix $addr << yes } result] if {$code != 0} { - puts "redis-cli --cluster fix returns non-zero exit code, output below:\n$result" + puts "keydb-cli --cluster fix returns non-zero exit code, output below:\n$result" } - # Note: redis-cli --cluster fix may return a non-zero exit code if nodes don't agree, + # Note: keydb-cli --cluster fix may return a non-zero exit code if nodes don't agree, # but we can ignore that and rely on the check below. assert_cluster_state ok wait_for_condition 100 100 { - [catch {exec ../../../src/redis-cli {*}[rediscli_tls_config "../../../tests"] --cluster check $addr} result] == 0 + [catch {exec ../../../src/keydb-cli {*}[rediscli_tls_config "../../../tests"] --cluster check $addr} result] == 0 } else { - puts "redis-cli --cluster check returns non-zero exit code, output below:\n$result" + puts "keydb-cli --cluster check returns non-zero exit code, output below:\n$result" fail "Cluster could not settle with configuration" } } From 7069fcc695e6c4fa550b15fcfbc030a1d3fd7f02 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 29 May 2021 07:08:01 +0000 Subject: [PATCH 02/11] Fix branding Former-commit-id: b20803b7669b10804fb0f355e302898ddaa19906 --- TLS.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/TLS.md b/TLS.md index 1afa2e8fa..6373ff871 100644 --- a/TLS.md +++ b/TLS.md @@ -13,13 +13,13 @@ Run `make BUILD_TLS=yes`. ### Tests -To run Redis test suite with TLS, you'll need TLS support for TCL (i.e. +To run KeyDB test suite with TLS, you'll need TLS support for TCL (i.e. `tcl-tls` package on Debian/Ubuntu). 1. Run `./utils/gen-test-certs.sh` to generate a root CA and a server certificate. -2. Run `./runtest --tls` or `./runtest-cluster --tls` to run Redis and Redis +2. Run `./runtest --tls` or `./runtest-cluster --tls` to run KeyDB and KeyDB Cluster tests in TLS mode. ### Running manually @@ -27,23 +27,23 @@ To run Redis test suite with TLS, you'll need TLS support for TCL (i.e. To manually run a Redis server with TLS mode (assuming `gen-test-certs.sh` was invoked so sample certificates/keys are available): - ./src/redis-server --tls-port 6379 --port 0 \ - --tls-cert-file ./tests/tls/redis.crt \ - --tls-key-file ./tests/tls/redis.key \ + ./src/keydb-server --tls-port 6379 --port 0 \ + --tls-cert-file ./tests/tls/keydb.crt \ + --tls-key-file ./tests/tls/keydb.key \ --tls-ca-cert-file ./tests/tls/ca.crt -To connect to this Redis server with `redis-cli`: +To connect to this Redis server with `keydb-cli`: - ./src/redis-cli --tls \ - --cert ./tests/tls/redis.crt \ - --key ./tests/tls/redis.key \ + ./src/keydb-cli --tls \ + --cert ./tests/tls/keydb.crt \ + --key ./tests/tls/keydb.key \ --cacert ./tests/tls/ca.crt This will disable TCP and enable TLS on port 6379. It's also possible to have both TCP and TLS available, but you'll need to assign different ports. To make a Replica connect to the master using TLS, use `--tls-replication yes`, -and to make Redis Cluster use TLS across nodes use `--tls-cluster yes`. +and to make KeyDB Cluster use TLS across nodes use `--tls-cluster yes`. Connections ----------- @@ -56,18 +56,18 @@ Note that unlike Redis, KeyDB fully supports multithreading of TLS connections. To-Do List ---------- -- [ ] redis-benchmark support. The current implementation is a mix of using +- [ ] keydb-benchmark support. The current implementation is a mix of using hiredis for parsing and basic networking (establishing connections), but directly manipulating sockets for most actions. This will need to be cleaned up for proper TLS support. The best approach is probably to migrate to hiredis async mode. -- [ ] redis-cli `--slave` and `--rdb` support. +- [ ] keydb-cli `--slave` and `--rdb` support. Multi-port ---------- Consider the implications of allowing TLS to be configured on a separate port, -making Redis listening on multiple ports: +making KeyDB listening on multiple ports: 1. Startup banner port notification 2. Proctitle From 4f690c1f813dce449abca6f994070b3d5daafe40 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:06:20 +0000 Subject: [PATCH 03/11] Don't leak AE control pipes to fork children Former-commit-id: 1ed2e8b5bcb485f7d1e073545a190bb95405a7d6 --- src/ae.cpp | 8 ++++++++ src/ae.h | 1 + src/sentinel.cpp | 4 ++++ src/server.cpp | 5 +++++ 4 files changed, 18 insertions(+) diff --git a/src/ae.cpp b/src/ae.cpp index efd9b0cb1..5399b3967 100644 --- a/src/ae.cpp +++ b/src/ae.cpp @@ -808,3 +808,11 @@ int aeThreadOwnsLock() { return g_lock.fOwnLock(); } + +void aeClosePipesForForkChild(aeEventLoop *el) +{ + close(el->fdCmdRead); + el->fdCmdRead = -1; + close(el->fdCmdWrite); + el->fdCmdWrite = -1; +} \ No newline at end of file diff --git a/src/ae.h b/src/ae.h index 780083da8..9e7704703 100644 --- a/src/ae.h +++ b/src/ae.h @@ -161,6 +161,7 @@ int aeGetSetSize(aeEventLoop *eventLoop); aeEventLoop *aeGetCurrentEventLoop(); int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); void aeSetDontWait(aeEventLoop *eventLoop, int noWait); +void aeClosePipesForForkChild(aeEventLoop *eventLoop); void setAeLockSetThreadSpinWorker(spin_worker worker); void aeAcquireLock(); diff --git a/src/sentinel.cpp b/src/sentinel.cpp index 2a8dfd739..c879f235d 100644 --- a/src/sentinel.cpp +++ b/src/sentinel.cpp @@ -878,6 +878,10 @@ void sentinelRunPendingScripts(void) { } else if (pid == 0) { /* Child */ tlsCleanup(); + for (int iel = 0; iel < cserver.cthreads; ++iel) { + aeClosePipesForForkChild(g_pserver->rgthreadvar[iel].el); + } + aeClosePipesForForkChild(g_pserver->modulethreadvar.el); execve(sj->argv[0],sj->argv,environ); /* If we are here an error occurred. */ _exit(2); /* Don't retry execution. */ diff --git a/src/server.cpp b/src/server.cpp index 5b481667d..3d5c70faa 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -6192,6 +6192,11 @@ void closeChildUnusedResourceAfterFork() { if (g_pserver->cluster_enabled && g_pserver->cluster_config_file_lock_fd != -1) close(g_pserver->cluster_config_file_lock_fd); /* don't care if this fails */ + for (int iel = 0; iel < cserver.cthreads; ++iel) { + aeClosePipesForForkChild(g_pserver->rgthreadvar[iel].el); + } + aeClosePipesForForkChild(g_pserver->modulethreadvar.el); + /* Clear cserver.pidfile, this is the parent pidfile which should not * be touched (or deleted) by the child (on exit / crash) */ zfree(cserver.pidfile); From eb1bf61a587a056fe6205597faef0aebdd94ce16 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:06:47 +0000 Subject: [PATCH 04/11] Don't leave dangling client pointers in the process list Former-commit-id: 5d16f519508fcb96a8803fcefa69d6c75fa174ac --- src/networking.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/networking.cpp b/src/networking.cpp index b775e2435..202525e6d 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -1527,6 +1527,8 @@ void unlinkClient(client *c) { c->fPendingAsyncWrite = FALSE; } + serverTL->vecclientsProcess.erase(std::remove(serverTL->vecclientsProcess.begin(), serverTL->vecclientsProcess.end(), c), serverTL->vecclientsProcess.end()); + /* Clear the tracking status. */ if (c->flags & CLIENT_TRACKING) disableTracking(c); } From 24377fa8fc1c1e4da5d1a5792613da2447fc0a02 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:07:31 +0000 Subject: [PATCH 05/11] Delete pipe on the right thread so event handlers are cleaned up Former-commit-id: 9742c44db52929b755d6ecce16f91bee07f495ff --- src/rdb.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rdb.cpp b/src/rdb.cpp index 6edf4231e..3d954592a 100644 --- a/src/rdb.cpp +++ b/src/rdb.cpp @@ -2897,7 +2897,11 @@ static void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) { } if (g_pserver->rdb_child_exit_pipe!=-1) close(g_pserver->rdb_child_exit_pipe); - close(g_pserver->rdb_pipe_read); + auto pipeT = g_pserver->rdb_pipe_read; + aePostFunction(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el, [pipeT]{ + aeDeleteFileEvent(serverTL->el, pipeT, AE_READABLE); + close(pipeT); + }); g_pserver->rdb_child_exit_pipe = -1; g_pserver->rdb_pipe_read = -1; zfree(g_pserver->rdb_pipe_conns); From 542a83edecb6cbd4e47f32b9d38975943cda8cd3 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:08:02 +0000 Subject: [PATCH 06/11] Handle race where a different thread cancels the replication but we haven't closed the connection Former-commit-id: 92e48f071792985c2fb7f4c722329e832cd4c77b --- src/replication.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/replication.cpp b/src/replication.cpp index 109af53e5..491e3b17f 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -1993,6 +1993,10 @@ void readSyncBulkPayload(connection *conn) { // Should we update our database, or create from scratch? int fUpdate = g_pserver->fActiveReplica || g_pserver->enable_multimaster; redisMaster *mi = (redisMaster*)connGetPrivateData(conn); + if (mi == nullptr) { + // We're about to be free'd so bail out + return; + } serverAssert(GlobalLocksAcquired()); serverAssert(mi->master == nullptr); From f3d74cf0755817d026539d79f02d1817c636437e Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:08:30 +0000 Subject: [PATCH 07/11] Ensure our connections are touched only on the right thread Former-commit-id: e8a44821d016e9f9b8ca873757d4786c4c271b7c --- src/cluster.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cluster.cpp b/src/cluster.cpp index b2391f6b4..98ae754d0 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -688,6 +688,7 @@ void freeClusterLink(clusterLink *link) { } static void clusterConnAcceptHandler(connection *conn) { + serverAssert(ielFromEventLoop(serverTL->el) == IDX_EVENT_LOOP_MAIN); clusterLink *link; if (connGetState(conn) != CONN_STATE_CONNECTED) { @@ -1791,6 +1792,8 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc * processing lead to some inconsistency error (for instance a PONG * received from the wrong sender ID). */ int clusterProcessPacket(clusterLink *link) { + serverAssert(ielFromEventLoop(serverTL->el) == IDX_EVENT_LOOP_MAIN); + clusterMsg *hdr = (clusterMsg*) link->rcvbuf; uint32_t totlen = ntohl(hdr->totlen); uint16_t type = ntohs(hdr->type); @@ -3546,6 +3549,7 @@ void clusterHandleManualFailover(void) { /* This is executed 10 times every second */ void clusterCron(void) { + serverAssert(ielFromEventLoop(serverTL->el) == IDX_EVENT_LOOP_MAIN); dictIterator *di; dictEntry *de; int update_state = 0; From 55c30c8c8115b383522136d9d600483c40f0ce89 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:45:06 +0000 Subject: [PATCH 08/11] Fix merge issue with module test script Former-commit-id: 10d254ec1bcdd9c25e373bbf882db244fecb65d1 --- runtest-moduleapi | 1 - tests/unit/moduleapi/timers.tcl | 35 --------------------------------- 2 files changed, 36 deletions(-) delete mode 100644 tests/unit/moduleapi/timers.tcl diff --git a/runtest-moduleapi b/runtest-moduleapi index 2e5bffcba..dc4c9e1ea 100755 --- a/runtest-moduleapi +++ b/runtest-moduleapi @@ -31,7 +31,6 @@ $TCLSH tests/test_helper.tcl \ --single unit/moduleapi/blockedclient \ --single unit/moduleapi/moduleloadsave \ --single unit/moduleapi/getkeys \ ---single unit/moduleapi/timers \ --single unit/moduleapi/test_lazyfree \ --single unit/moduleapi/defrag \ --single unit/moduleapi/hash \ diff --git a/tests/unit/moduleapi/timers.tcl b/tests/unit/moduleapi/timers.tcl deleted file mode 100644 index 6d6113f6b..000000000 --- a/tests/unit/moduleapi/timers.tcl +++ /dev/null @@ -1,35 +0,0 @@ -set testmodule [file normalize tests/modules/timers.so] -set timercount 4000 - - -tags "modules" { - test {Ascending module timers can load in correctly} { - start_server [list overrides [list loadmodule "$testmodule ascending $timercount"]] { - wait_for_condition [expr round($timercount/10)] 20 { - [r timer.elapsed] == $timercount - } else { - fail "Server failed to load in timers with ascending periods" - } - } - } - - test {Descending module timers can load in correctly} { - start_server [list overrides [list loadmodule "$testmodule descending $timercount"]] { - wait_for_condition [expr round($timercount/10)] 20 { - [r timer.elapsed] == $timercount - } else { - fail "Server failed to load in timers with descending periods" - } - } - } - - test {Module timers with the same period can load in correctly} { - start_server [list overrides [list loadmodule "$testmodule same $timercount"]] { - wait_for_condition [expr round($timercount/10)] 20 { - [r timer.elapsed] == $timercount - } else { - fail "Server failed to load in timers with the same period" - } - } - } -} From 54f921aae902611da6ff98beb2e6508c0ccaf7d7 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:55:39 +0000 Subject: [PATCH 09/11] Fix a leak in the benchmark Former-commit-id: aa5b27b040c69f0c7d166203bed7110bc8c0bc87 --- src/redis-benchmark.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis-benchmark.cpp b/src/redis-benchmark.cpp index d243fe6ca..bde103423 100644 --- a/src/redis-benchmark.cpp +++ b/src/redis-benchmark.cpp @@ -1981,6 +1981,7 @@ int main(int argc, const char **argv) { if (!config.csv) printf("\n"); } while(config.loop); + zfree(data); if (config.redis_config != NULL) freeRedisConfig(config.redis_config); From d1337c9bbe948da3690385d57d9219295f81d0d0 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 05:11:18 +0000 Subject: [PATCH 10/11] Don't pass on flags indicating membership in a list Former-commit-id: b1f662d6393eea77c2870c8a1f955374b1a3b57f --- src/replication.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.cpp b/src/replication.cpp index 491e3b17f..e01124fb1 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -1896,7 +1896,7 @@ void replicationCreateCachedMasterClone(redisMaster *mi) { } client *c = createClient(nullptr, ielFromEventLoop(serverTL->el)); - c->flags |= mi->master->flags; + c->flags |= mi->master->flags & ~(CLIENT_PENDING_WRITE | CLIENT_UNBLOCKED | CLIENT_CLOSE_ASAP); c->authenticated = mi->master->authenticated; c->reploff = mi->master->reploff; c->read_reploff = mi->master->read_reploff; From e4cf5f442aca94e200fbbc3d04490c0087f4ff85 Mon Sep 17 00:00:00 2001 From: John Sully Date: Wed, 2 Jun 2021 02:10:00 +0000 Subject: [PATCH 11/11] Fix merge issue with wrong return type Former-commit-id: 1640a2fefb8f09c7b6b335838f14a7fd3a51ae88 --- src/evict.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/evict.cpp b/src/evict.cpp index e3474d733..009713d73 100644 --- a/src/evict.cpp +++ b/src/evict.cpp @@ -777,11 +777,11 @@ cant_free: if (g_pserver->m_pstorageFactory) { if (mem_reported < g_pserver->maxmemory*1.2) { - return C_OK; // Allow us to temporarily go over without OOMing + return EVICT_OK; // Allow us to temporarily go over without OOMing } } - if (!cserver.delete_on_evict && result != C_OK) + if (!cserver.delete_on_evict && result == EVICT_FAIL) { for (int idb = 0; idb < cserver.dbnum; ++idb) { @@ -791,7 +791,7 @@ cant_free: serverLog(LL_WARNING, "Failed to evict keys, falling back to flushing entire cache. Consider increasing maxmemory-samples."); db->removeAllCachedValues(); if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree) - result = C_OK; + result = EVICT_OK; } } }