Improved hashing algorithm in luaS_newlstr (#1168)
**Overview** This PR introduces the use of [MurmurHash3](https://en.wikipedia.org/wiki/MurmurHash) as the hashing function for Lua's luaS_newlstr function, replacing the previous simple hash function. The change aims to improve performance, particularly for large strings. **Changes** Implemented MurmurHash3 algorithm in lstring.c Updated luaS_newlstr to use MurmurHash3 for string hashing **Performance Testing:** Test Setup: 1. Ran a valkey server 2. Loaded 1000 keys with large values (100KB each) to the server using a Lua script ``` local numKeys = 1000 for i = 1, numKeys do local key = "large_key_" .. i local largeValue = string.rep("x", 1024*100) redis.call("SET", key, largeValue) end ``` 3. Used a Lua script to randomly select and retrieve keys ``` local randomKey = redis.call("RANDOMKEY") local result = redis.call("GET", randomKey) ``` 4. Benchmarked using valkey-benchmark: `./valkey-benchmark -n 100000 evalsha c157a37967e69569339a39a953c046fc2ecb4258 0` Results: A | Unstable | This PR | Change -- | -- | -- | -- Throughput | 6,835.74 requests per second | 17,061.94 requests per second | **+150% increase** Avg Latency | 7.218 ms | 2.838 ms | **-61% decrease** Min Latency | 3.144 ms | 1.320 ms | **-58% decrease** P50 Latency | 8.463 ms | 3.167 ms | **-63% decrease** P95 Latency | 8.863 ms | 3.527 ms | **-60% decrease** P99 Latency | 9.063 ms | 3.663 ms | **-60% decrease** Max Latency | 63.871 ms | 55.327 ms | **-13% decrease** Summary: * Throughput: Improved by 150%. * Latency: Significant reductions in average, minimum, and percentile latencies (P50, P95, P99), leading to much faster response times. * Max Latency: Slightly decreased by 13%, indicating fewer outlier delays after the fix. --------- Signed-off-by: Shai Zarka <zarkash@amazon.com> Signed-off-by: zarkash-aws <zarkash@amazon.com> Signed-off-by: Madelyn Olson <madelyneolson@gmail.com> Co-authored-by: Madelyn Olson <madelyneolson@gmail.com> Co-authored-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
This commit is contained in:
parent
b927fb09d4
commit
06cfe2c254
1
deps/README.md
vendored
1
deps/README.md
vendored
@ -94,6 +94,7 @@ and our version:
|
||||
1. Makefile is modified to allow a different compiler than GCC.
|
||||
2. We have the implementation source code, and directly link to the following external libraries: `lua_cjson.o`, `lua_struct.o`, `lua_cmsgpack.o` and `lua_bit.o`.
|
||||
3. There is a security fix in `ldo.c`, line 498: The check for `LUA_SIGNATURE[0]` is removed in order to avoid direct bytecode execution.
|
||||
4. In `lstring.c`, the luaS_newlstr function's hash calculation has been upgraded from a simple hash function to MurmurHash3, implemented within the same file, to enhance performance, particularly for operations involving large strings.
|
||||
|
||||
Hdr_Histogram
|
||||
---
|
||||
|
52
deps/lua/src/lstring.c
vendored
52
deps/lua/src/lstring.c
vendored
@ -6,6 +6,7 @@
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define lstring_c
|
||||
#define LUA_CORE
|
||||
@ -71,14 +72,55 @@ static TString *newlstr (lua_State *L, const char *str, size_t l,
|
||||
return ts;
|
||||
}
|
||||
|
||||
uint32_t murmur32(const uint8_t* key, size_t len, uint32_t seed) {
|
||||
static const uint32_t c1 = 0xcc9e2d51;
|
||||
static const uint32_t c2 = 0x1b873593;
|
||||
static const uint32_t r1 = 15;
|
||||
static const uint32_t r2 = 13;
|
||||
static const uint32_t m = 5;
|
||||
static const uint32_t n = 0xe6546b64;
|
||||
uint32_t hash = seed;
|
||||
|
||||
const int nblocks = len / 4;
|
||||
const uint32_t* blocks = (const uint32_t*) key;
|
||||
for (int i = 0; i < nblocks; i++) {
|
||||
uint32_t k = blocks[i];
|
||||
k *= c1;
|
||||
k = (k << r1) | (k >> (32 - r1));
|
||||
k *= c2;
|
||||
|
||||
hash ^= k;
|
||||
hash = ((hash << r2) | (hash >> (32 - r2))) * m + n;
|
||||
}
|
||||
|
||||
const uint8_t* tail = (const uint8_t*) (key + nblocks * 4);
|
||||
uint32_t k1 = 0;
|
||||
switch (len & 3) {
|
||||
case 3:
|
||||
k1 ^= tail[2] << 16;
|
||||
case 2:
|
||||
k1 ^= tail[1] << 8;
|
||||
case 1:
|
||||
k1 ^= tail[0];
|
||||
k1 *= c1;
|
||||
k1 = (k1 << r1) | (k1 >> (32 - r1));
|
||||
k1 *= c2;
|
||||
hash ^= k1;
|
||||
}
|
||||
|
||||
hash ^= len;
|
||||
hash ^= (hash >> 16);
|
||||
hash *= 0x85ebca6b;
|
||||
hash ^= (hash >> 13);
|
||||
hash *= 0xc2b2ae35;
|
||||
hash ^= (hash >> 16);
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
TString *luaS_newlstr (lua_State *L, const char *str, size_t l) {
|
||||
GCObject *o;
|
||||
unsigned int h = cast(unsigned int, l); /* seed */
|
||||
size_t step = 1;
|
||||
size_t l1;
|
||||
for (l1=l; l1>=step; l1-=step) /* compute hash */
|
||||
h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
|
||||
unsigned int h = murmur32((uint8_t *)str, l, (uint32_t)l);
|
||||
for (o = G(L)->strt.hash[lmod(h, G(L)->strt.size)];
|
||||
o != NULL;
|
||||
o = o->gch.next) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user