
Adds an option to build RDMA support as a module: make BUILD_RDMA=module To start valkey-server with RDMA, use a command line like the following: ./src/valkey-server --loadmodule src/valkey-rdma.so \ port=6379 bind=xx.xx.xx.xx * Implement server side of connection module only, this means we can *NOT* compile RDMA support as built-in. * Add necessary information in README.md * Support 'CONFIG SET/GET', for example, 'CONFIG Set rdma.port 6380', then check this by 'rdma res show cm_id' and valkey-cli (with RDMA support, but not implemented in this patch). * The full listeners show like: listener0:name=tcp,bind=*,bind=-::*,port=6379 listener1:name=unix,bind=/var/run/valkey.sock listener2:name=rdma,bind=xx.xx.xx.xx,bind=yy.yy.yy.yy,port=6379 listener3:name=tls,bind=*,bind=-::*,port=16379 Because the lack of RDMA support from TCL, use a simple C program to test Valkey Over RDMA (under tests/rdma/). This is a quite raw version with basic library dependence: libpthread, libibverbs, librdmacm. Run using the script: ./runtest-rdma [ OPTIONS ] To run RDMA in GitHub actions, a kernel module RXE for emulated soft RDMA, needs to be installed. The kernel module source code is fetched a repo containing only the RXE kernel driver from the Linux kernel, but stored in an separate repo to avoid cloning the whole Linux kernel repo. ---- Since 2021/06, I created a [PR](https://github.com/redis/redis/pull/9161) for *Redis Over RDMA* proposal. Then I did some work to [fully abstract connection and make TLS dynamically loadable](https://github.com/redis/redis/pull/9320), a new connection type could be built into Redis statically, or a separated shared library(loaded by Redis on startup) since Redis 7.2.0. Base on the new connection framework, I created a new [PR](https://github.com/redis/redis/pull/11182), some guys(@xiezhq-hermann @zhangyiming1201 @JSpewock @uvletter @FujiZ) noticed, played and tested this PR. However, because of the lack of time and knowledge from the maintainers, this PR has been pending about 2 years. Related doc: [Introduce *Valkey Over RDMA* specification](https://github.com/valkey-io/valkey-doc/pull/123). (same as Redis, and this should be same) Changes in this PR: - implement *Valkey Over RDMA*. (compact the Valkey style) Finally, if this feature is considered to merge, I volunteer to maintain it. --------- Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
146 lines
5.1 KiB
Python
Executable File
146 lines
5.1 KiB
Python
Executable File
#!/usr/bin/python3
|
|
"""
|
|
==========================================================================
|
|
run.py - script to setup/cleanup soft RDMA devices.
|
|
note that is script need root privilege.
|
|
--------------------------------------------------------------------------
|
|
Copyright (C) 2024 zhenwei pi <pizhenwei@bytedance.com>
|
|
|
|
This work is licensed under BSD 3-Clause, License 1 of the COPYING file in
|
|
the top-level directory.
|
|
==========================================================================
|
|
"""
|
|
import os
|
|
import subprocess
|
|
import netifaces
|
|
import time
|
|
import argparse
|
|
|
|
|
|
def prepare_ib():
|
|
cmd = "modprobe rdma_cm && modprobe udp_tunnel && modprobe ip6_udp_tunnel && modprobe ib_uverbs"
|
|
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
|
|
if p.wait():
|
|
outs, _ = p.communicate()
|
|
print("Valkey Over RDMA probe modules of IB [FAILED]")
|
|
print("---------------\n" + outs.decode() + "---------------\n")
|
|
os._exit(1);
|
|
|
|
print("Valkey Over RDMA probe modules of IB [OK]")
|
|
|
|
|
|
def prepare_rxe(interface):
|
|
# is there any builtin rdma_rxe.ko?
|
|
p = subprocess.Popen("modprobe rdma_rxe 2> /dev/null", shell=True, stdout=subprocess.PIPE)
|
|
if p.wait():
|
|
valkeydir = os.path.dirname(os.path.abspath(__file__)) + "/../.."
|
|
rxedir = valkeydir + "/tests/rdma/rxe"
|
|
rxekmod = rxedir + "/rdma_rxe.ko"
|
|
print(rxedir)
|
|
print(rxekmod)
|
|
if not os.path.exists(rxekmod):
|
|
print("Neither kernel builtin nor out-of-tree rdma_rxe.ko found. Abort")
|
|
print("Please run the following commands to build out-of-tree RXE on Linux-6.5, then retry:")
|
|
print("\t~# mkdir -p " + rxedir)
|
|
print("\t~# git clone https://github.com/pizhenwei/rxe.git " + rxedir)
|
|
print("\t~# cd " + rxedir)
|
|
print("\t~# make")
|
|
os._exit(1);
|
|
|
|
cmd = "insmod " + rxekmod
|
|
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
|
|
if p.wait():
|
|
os._exit(1);
|
|
|
|
print("Valkey Over RDMA install RXE [OK]")
|
|
|
|
softrdma = "rxe_" + interface
|
|
cmd = "rdma link add " + softrdma + " type rxe netdev " + interface
|
|
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
|
|
if p.wait():
|
|
outs, _ = p.communicate()
|
|
print("Valkey Over RDMA install RXE [FAILED]")
|
|
print("---------------\n" + outs.decode() + "---------------\n")
|
|
os._exit(1);
|
|
|
|
print("Valkey Over RDMA add RXE device <%s> [OK]" % softrdma)
|
|
|
|
|
|
# find any IPv4 available networking interface
|
|
def find_iface():
|
|
interfaces = netifaces.interfaces()
|
|
for interface in interfaces:
|
|
if interface == "lo":
|
|
continue
|
|
|
|
addrs = netifaces.ifaddresses(interface)
|
|
if netifaces.AF_INET not in addrs:
|
|
continue
|
|
|
|
return interface
|
|
|
|
|
|
def setup_rdma(driver, interface):
|
|
if interface == None:
|
|
interface = find_iface()
|
|
|
|
prepare_ib()
|
|
if driver == "rxe":
|
|
prepare_rxe(interface)
|
|
else:
|
|
print("rxe is currently supported only")
|
|
os._exit(1);
|
|
|
|
|
|
# iterate /sys/class/infiniband, find any all virtual RDMA device, and remove them
|
|
def cleanup_rdma():
|
|
# Ex, /sys/class/infiniband/mlx5_0
|
|
# Ex, /sys/class/infiniband/rxe_eth0
|
|
# Ex, /sys/class/infiniband/siw_eth0
|
|
ibclass = "/sys/class/infiniband/"
|
|
try:
|
|
for dev in os.listdir(ibclass):
|
|
# Ex, /sys/class/infiniband/rxe_eth0/ports/1/gid_attrs/ndevs/0
|
|
origpath = os.readlink(ibclass + dev)
|
|
if "virtual" in origpath:
|
|
subprocess.Popen("rdma link del " + dev, shell=True).wait()
|
|
print("Remove virtual RDMA device : " + dev + " [OK]")
|
|
except os.error:
|
|
return None
|
|
|
|
# try to remove RXE driver from kernel, ignore error
|
|
subprocess.Popen("rmmod rdma_rxe 2> /dev/null", shell=True).wait()
|
|
|
|
# try to remove SIW driver from kernel, ignore error
|
|
subprocess.Popen("rmmod rdma_siw 2> /dev/null", shell=True).wait()
|
|
|
|
return None
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description = "Script to setup/cleanup soft RDMA devices, note that root privilege is required",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
parser.add_argument("-o", "--operation", type=str,
|
|
help="[setup|cleanup] setup or cleanup soft RDMA environment")
|
|
parser.add_argument("-d", "--driver", type=str, default="rxe",
|
|
help="[rxe|siw] specify soft RDMA driver, rxe by default")
|
|
parser.add_argument("-i", "--interface", type=str,
|
|
help="[IFACE] network interface, auto-select any available interface by default")
|
|
args = parser.parse_args()
|
|
|
|
# test UID. none-root user must stop on none RDMA platform, show some hints and exit.
|
|
if os.geteuid():
|
|
print("You are not root privileged. Abort.")
|
|
print("Or you may setup RXE manually in root privileged by commands:")
|
|
print("\t~# modprobe rdma_rxe")
|
|
print("\t~# rdma link add rxe0 type rxe netdev [IFACE]")
|
|
os._exit(1);
|
|
|
|
if args.operation == "cleanup":
|
|
cleanup_rdma()
|
|
elif args.operation == "setup":
|
|
setup_rdma(args.driver, args.interface)
|
|
|
|
os._exit(0);
|