From 71adde26d1f2512b4ca4ae95224e997a3501dc4f Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 29 May 2021 07:07:54 +0000 Subject: [PATCH 1/9] Fix cluster test failures Former-commit-id: 8cf1daf823be0a40301a5dc288ec18f5bb98d60b --- tests/cluster/tests/12.1-replica-migration-3.tcl | 4 ++-- tests/cluster/tests/includes/utils.tcl | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/cluster/tests/12.1-replica-migration-3.tcl b/tests/cluster/tests/12.1-replica-migration-3.tcl index 46a9f79e3..8b2558264 100644 --- a/tests/cluster/tests/12.1-replica-migration-3.tcl +++ b/tests/cluster/tests/12.1-replica-migration-3.tcl @@ -36,7 +36,7 @@ test "Set allow-replica-migration no" { set master0_id [dict get [get_myself 0] id] test "Resharding all the master #0 slots away from it" { set output [exec \ - ../../../src/redis-cli --cluster rebalance \ + ../../../src/keydb-cli --cluster rebalance \ 127.0.0.1:[get_instance_attrib redis 0 port] \ {*}[rediscli_tls_config "../../../tests"] \ --cluster-weight ${master0_id}=0 >@ stdout ] @@ -44,7 +44,7 @@ test "Resharding all the master #0 slots away from it" { test "Wait cluster to be stable" { wait_for_condition 1000 50 { - [catch {exec ../../../src/redis-cli --cluster \ + [catch {exec ../../../src/keydb-cli --cluster \ check 127.0.0.1:[get_instance_attrib redis 0 port] \ {*}[rediscli_tls_config "../../../tests"] \ }] == 0 diff --git a/tests/cluster/tests/includes/utils.tcl b/tests/cluster/tests/includes/utils.tcl index 48c40a050..855ca3c85 100644 --- a/tests/cluster/tests/includes/utils.tcl +++ b/tests/cluster/tests/includes/utils.tcl @@ -8,18 +8,18 @@ proc config_set_all_nodes {keyword value} { proc fix_cluster {addr} { set code [catch { - exec ../../../src/redis-cli {*}[rediscli_tls_config "../../../tests"] --cluster fix $addr << yes + exec ../../../src/keydb-cli {*}[rediscli_tls_config "../../../tests"] --cluster fix $addr << yes } result] if {$code != 0} { - puts "redis-cli --cluster fix returns non-zero exit code, output below:\n$result" + puts "keydb-cli --cluster fix returns non-zero exit code, output below:\n$result" } - # Note: redis-cli --cluster fix may return a non-zero exit code if nodes don't agree, + # Note: keydb-cli --cluster fix may return a non-zero exit code if nodes don't agree, # but we can ignore that and rely on the check below. assert_cluster_state ok wait_for_condition 100 100 { - [catch {exec ../../../src/redis-cli {*}[rediscli_tls_config "../../../tests"] --cluster check $addr} result] == 0 + [catch {exec ../../../src/keydb-cli {*}[rediscli_tls_config "../../../tests"] --cluster check $addr} result] == 0 } else { - puts "redis-cli --cluster check returns non-zero exit code, output below:\n$result" + puts "keydb-cli --cluster check returns non-zero exit code, output below:\n$result" fail "Cluster could not settle with configuration" } } From 4142b86ff81256538aefcebb7ba6e20c73ffead8 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sat, 29 May 2021 07:08:01 +0000 Subject: [PATCH 2/9] Fix branding Former-commit-id: b20803b7669b10804fb0f355e302898ddaa19906 --- TLS.md | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/TLS.md b/TLS.md index 1afa2e8fa..6373ff871 100644 --- a/TLS.md +++ b/TLS.md @@ -13,13 +13,13 @@ Run `make BUILD_TLS=yes`. ### Tests -To run Redis test suite with TLS, you'll need TLS support for TCL (i.e. +To run KeyDB test suite with TLS, you'll need TLS support for TCL (i.e. `tcl-tls` package on Debian/Ubuntu). 1. Run `./utils/gen-test-certs.sh` to generate a root CA and a server certificate. -2. Run `./runtest --tls` or `./runtest-cluster --tls` to run Redis and Redis +2. Run `./runtest --tls` or `./runtest-cluster --tls` to run KeyDB and KeyDB Cluster tests in TLS mode. ### Running manually @@ -27,23 +27,23 @@ To run Redis test suite with TLS, you'll need TLS support for TCL (i.e. To manually run a Redis server with TLS mode (assuming `gen-test-certs.sh` was invoked so sample certificates/keys are available): - ./src/redis-server --tls-port 6379 --port 0 \ - --tls-cert-file ./tests/tls/redis.crt \ - --tls-key-file ./tests/tls/redis.key \ + ./src/keydb-server --tls-port 6379 --port 0 \ + --tls-cert-file ./tests/tls/keydb.crt \ + --tls-key-file ./tests/tls/keydb.key \ --tls-ca-cert-file ./tests/tls/ca.crt -To connect to this Redis server with `redis-cli`: +To connect to this Redis server with `keydb-cli`: - ./src/redis-cli --tls \ - --cert ./tests/tls/redis.crt \ - --key ./tests/tls/redis.key \ + ./src/keydb-cli --tls \ + --cert ./tests/tls/keydb.crt \ + --key ./tests/tls/keydb.key \ --cacert ./tests/tls/ca.crt This will disable TCP and enable TLS on port 6379. It's also possible to have both TCP and TLS available, but you'll need to assign different ports. To make a Replica connect to the master using TLS, use `--tls-replication yes`, -and to make Redis Cluster use TLS across nodes use `--tls-cluster yes`. +and to make KeyDB Cluster use TLS across nodes use `--tls-cluster yes`. Connections ----------- @@ -56,18 +56,18 @@ Note that unlike Redis, KeyDB fully supports multithreading of TLS connections. To-Do List ---------- -- [ ] redis-benchmark support. The current implementation is a mix of using +- [ ] keydb-benchmark support. The current implementation is a mix of using hiredis for parsing and basic networking (establishing connections), but directly manipulating sockets for most actions. This will need to be cleaned up for proper TLS support. The best approach is probably to migrate to hiredis async mode. -- [ ] redis-cli `--slave` and `--rdb` support. +- [ ] keydb-cli `--slave` and `--rdb` support. Multi-port ---------- Consider the implications of allowing TLS to be configured on a separate port, -making Redis listening on multiple ports: +making KeyDB listening on multiple ports: 1. Startup banner port notification 2. Proctitle From 06560f9f85607a7641c823626e4b50e64c905376 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:06:20 +0000 Subject: [PATCH 3/9] Don't leak AE control pipes to fork children Former-commit-id: 1ed2e8b5bcb485f7d1e073545a190bb95405a7d6 --- src/ae.cpp | 8 ++++++++ src/ae.h | 1 + src/sentinel.cpp | 4 ++++ src/server.cpp | 5 +++++ 4 files changed, 18 insertions(+) diff --git a/src/ae.cpp b/src/ae.cpp index efd9b0cb1..5399b3967 100644 --- a/src/ae.cpp +++ b/src/ae.cpp @@ -808,3 +808,11 @@ int aeThreadOwnsLock() { return g_lock.fOwnLock(); } + +void aeClosePipesForForkChild(aeEventLoop *el) +{ + close(el->fdCmdRead); + el->fdCmdRead = -1; + close(el->fdCmdWrite); + el->fdCmdWrite = -1; +} \ No newline at end of file diff --git a/src/ae.h b/src/ae.h index 780083da8..9e7704703 100644 --- a/src/ae.h +++ b/src/ae.h @@ -161,6 +161,7 @@ int aeGetSetSize(aeEventLoop *eventLoop); aeEventLoop *aeGetCurrentEventLoop(); int aeResizeSetSize(aeEventLoop *eventLoop, int setsize); void aeSetDontWait(aeEventLoop *eventLoop, int noWait); +void aeClosePipesForForkChild(aeEventLoop *eventLoop); void setAeLockSetThreadSpinWorker(spin_worker worker); void aeAcquireLock(); diff --git a/src/sentinel.cpp b/src/sentinel.cpp index 2a8dfd739..c879f235d 100644 --- a/src/sentinel.cpp +++ b/src/sentinel.cpp @@ -878,6 +878,10 @@ void sentinelRunPendingScripts(void) { } else if (pid == 0) { /* Child */ tlsCleanup(); + for (int iel = 0; iel < cserver.cthreads; ++iel) { + aeClosePipesForForkChild(g_pserver->rgthreadvar[iel].el); + } + aeClosePipesForForkChild(g_pserver->modulethreadvar.el); execve(sj->argv[0],sj->argv,environ); /* If we are here an error occurred. */ _exit(2); /* Don't retry execution. */ diff --git a/src/server.cpp b/src/server.cpp index 5b481667d..3d5c70faa 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -6192,6 +6192,11 @@ void closeChildUnusedResourceAfterFork() { if (g_pserver->cluster_enabled && g_pserver->cluster_config_file_lock_fd != -1) close(g_pserver->cluster_config_file_lock_fd); /* don't care if this fails */ + for (int iel = 0; iel < cserver.cthreads; ++iel) { + aeClosePipesForForkChild(g_pserver->rgthreadvar[iel].el); + } + aeClosePipesForForkChild(g_pserver->modulethreadvar.el); + /* Clear cserver.pidfile, this is the parent pidfile which should not * be touched (or deleted) by the child (on exit / crash) */ zfree(cserver.pidfile); From f82860104efa7ad323296b88a340ee652729de75 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:06:47 +0000 Subject: [PATCH 4/9] Don't leave dangling client pointers in the process list Former-commit-id: 5d16f519508fcb96a8803fcefa69d6c75fa174ac --- src/networking.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/networking.cpp b/src/networking.cpp index b775e2435..202525e6d 100644 --- a/src/networking.cpp +++ b/src/networking.cpp @@ -1527,6 +1527,8 @@ void unlinkClient(client *c) { c->fPendingAsyncWrite = FALSE; } + serverTL->vecclientsProcess.erase(std::remove(serverTL->vecclientsProcess.begin(), serverTL->vecclientsProcess.end(), c), serverTL->vecclientsProcess.end()); + /* Clear the tracking status. */ if (c->flags & CLIENT_TRACKING) disableTracking(c); } From f1ba462df67f3787342833371885e8b96d1cf70a Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:07:31 +0000 Subject: [PATCH 5/9] Delete pipe on the right thread so event handlers are cleaned up Former-commit-id: 9742c44db52929b755d6ecce16f91bee07f495ff --- src/rdb.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/rdb.cpp b/src/rdb.cpp index 6edf4231e..3d954592a 100644 --- a/src/rdb.cpp +++ b/src/rdb.cpp @@ -2897,7 +2897,11 @@ static void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) { } if (g_pserver->rdb_child_exit_pipe!=-1) close(g_pserver->rdb_child_exit_pipe); - close(g_pserver->rdb_pipe_read); + auto pipeT = g_pserver->rdb_pipe_read; + aePostFunction(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el, [pipeT]{ + aeDeleteFileEvent(serverTL->el, pipeT, AE_READABLE); + close(pipeT); + }); g_pserver->rdb_child_exit_pipe = -1; g_pserver->rdb_pipe_read = -1; zfree(g_pserver->rdb_pipe_conns); From 1a734223ca5ffc4860491a5d3a14dd59b010bc59 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:08:02 +0000 Subject: [PATCH 6/9] Handle race where a different thread cancels the replication but we haven't closed the connection Former-commit-id: 92e48f071792985c2fb7f4c722329e832cd4c77b --- src/replication.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/replication.cpp b/src/replication.cpp index 109af53e5..491e3b17f 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -1993,6 +1993,10 @@ void readSyncBulkPayload(connection *conn) { // Should we update our database, or create from scratch? int fUpdate = g_pserver->fActiveReplica || g_pserver->enable_multimaster; redisMaster *mi = (redisMaster*)connGetPrivateData(conn); + if (mi == nullptr) { + // We're about to be free'd so bail out + return; + } serverAssert(GlobalLocksAcquired()); serverAssert(mi->master == nullptr); From 42f5d1a175c53f47564c9c3e1b2ed0642b5af65d Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:08:30 +0000 Subject: [PATCH 7/9] Ensure our connections are touched only on the right thread Former-commit-id: e8a44821d016e9f9b8ca873757d4786c4c271b7c --- src/cluster.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cluster.cpp b/src/cluster.cpp index b2391f6b4..98ae754d0 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -688,6 +688,7 @@ void freeClusterLink(clusterLink *link) { } static void clusterConnAcceptHandler(connection *conn) { + serverAssert(ielFromEventLoop(serverTL->el) == IDX_EVENT_LOOP_MAIN); clusterLink *link; if (connGetState(conn) != CONN_STATE_CONNECTED) { @@ -1791,6 +1792,8 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc * processing lead to some inconsistency error (for instance a PONG * received from the wrong sender ID). */ int clusterProcessPacket(clusterLink *link) { + serverAssert(ielFromEventLoop(serverTL->el) == IDX_EVENT_LOOP_MAIN); + clusterMsg *hdr = (clusterMsg*) link->rcvbuf; uint32_t totlen = ntohl(hdr->totlen); uint16_t type = ntohs(hdr->type); @@ -3546,6 +3549,7 @@ void clusterHandleManualFailover(void) { /* This is executed 10 times every second */ void clusterCron(void) { + serverAssert(ielFromEventLoop(serverTL->el) == IDX_EVENT_LOOP_MAIN); dictIterator *di; dictEntry *de; int update_state = 0; From e5691036d92a9c11cd87a8ab44ce4fe701c79715 Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:45:06 +0000 Subject: [PATCH 8/9] Fix merge issue with module test script Former-commit-id: 10d254ec1bcdd9c25e373bbf882db244fecb65d1 --- runtest-moduleapi | 1 - tests/unit/moduleapi/timers.tcl | 35 --------------------------------- 2 files changed, 36 deletions(-) delete mode 100644 tests/unit/moduleapi/timers.tcl diff --git a/runtest-moduleapi b/runtest-moduleapi index 2e5bffcba..dc4c9e1ea 100755 --- a/runtest-moduleapi +++ b/runtest-moduleapi @@ -31,7 +31,6 @@ $TCLSH tests/test_helper.tcl \ --single unit/moduleapi/blockedclient \ --single unit/moduleapi/moduleloadsave \ --single unit/moduleapi/getkeys \ ---single unit/moduleapi/timers \ --single unit/moduleapi/test_lazyfree \ --single unit/moduleapi/defrag \ --single unit/moduleapi/hash \ diff --git a/tests/unit/moduleapi/timers.tcl b/tests/unit/moduleapi/timers.tcl deleted file mode 100644 index 6d6113f6b..000000000 --- a/tests/unit/moduleapi/timers.tcl +++ /dev/null @@ -1,35 +0,0 @@ -set testmodule [file normalize tests/modules/timers.so] -set timercount 4000 - - -tags "modules" { - test {Ascending module timers can load in correctly} { - start_server [list overrides [list loadmodule "$testmodule ascending $timercount"]] { - wait_for_condition [expr round($timercount/10)] 20 { - [r timer.elapsed] == $timercount - } else { - fail "Server failed to load in timers with ascending periods" - } - } - } - - test {Descending module timers can load in correctly} { - start_server [list overrides [list loadmodule "$testmodule descending $timercount"]] { - wait_for_condition [expr round($timercount/10)] 20 { - [r timer.elapsed] == $timercount - } else { - fail "Server failed to load in timers with descending periods" - } - } - } - - test {Module timers with the same period can load in correctly} { - start_server [list overrides [list loadmodule "$testmodule same $timercount"]] { - wait_for_condition [expr round($timercount/10)] 20 { - [r timer.elapsed] == $timercount - } else { - fail "Server failed to load in timers with the same period" - } - } - } -} From 6331caca6a6000af8d5717c85f6cb9b297c5f28c Mon Sep 17 00:00:00 2001 From: John Sully Date: Sun, 30 May 2021 02:55:39 +0000 Subject: [PATCH 9/9] Fix a leak in the benchmark Former-commit-id: aa5b27b040c69f0c7d166203bed7110bc8c0bc87 --- src/redis-benchmark.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis-benchmark.cpp b/src/redis-benchmark.cpp index d243fe6ca..bde103423 100644 --- a/src/redis-benchmark.cpp +++ b/src/redis-benchmark.cpp @@ -1981,6 +1981,7 @@ int main(int argc, const char **argv) { if (!config.csv) printf("\n"); } while(config.loop); + zfree(data); if (config.redis_config != NULL) freeRedisConfig(config.redis_config);