From 744b13e302559fc6da30749df266810882b5df99 Mon Sep 17 00:00:00 2001 From: mwish Date: Wed, 28 Aug 2024 11:44:32 +0800 Subject: [PATCH] Using intrinsics to optimize counting HyperLogLog trailing bits (#846) Godbolt link: https://godbolt.org/z/3YPvxsr5s __builtin_ctz would generate shorter code than hand-written loop. --------- Signed-off-by: mwish Signed-off-by: Binbin Signed-off-by: Madelyn Olson Co-authored-by: Binbin Co-authored-by: Madelyn Olson --- src/hyperloglog.c | 19 ++++++------------- src/intrinsics.h | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 13 deletions(-) create mode 100644 src/intrinsics.h diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 9769533d5..79d81ac8f 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -30,6 +30,7 @@ */ #include "server.h" +#include "intrinsics.h" #include #include @@ -455,7 +456,7 @@ uint64_t MurmurHash64A(const void *key, int len, unsigned int seed) { * of the pattern 000..1 of the element hash. As a side effect 'regp' is * set to the register index this element hashes to. */ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) { - uint64_t hash, bit, index; + uint64_t hash, index; int count; /* Count the number of zeroes starting from bit HLL_REGISTERS @@ -465,21 +466,13 @@ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) { * Note that the final "1" ending the sequence of zeroes must be * included in the count, so if we find "001" the count is 3, and * the smallest count possible is no zeroes at all, just a 1 bit - * at the first position, that is a count of 1. - * - * This may sound like inefficient, but actually in the average case - * there are high probabilities to find a 1 after a few iterations. */ + * at the first position, that is a count of 1. */ hash = MurmurHash64A(ele, elesize, 0xadc83b19ULL); index = hash & HLL_P_MASK; /* Register index. */ hash >>= HLL_P; /* Remove bits used to address the register. */ - hash |= ((uint64_t)1 << HLL_Q); /* Make sure the loop terminates - and count will be <= Q+1. */ - bit = 1; - count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */ - while ((hash & bit) == 0) { - count++; - bit <<= 1; - } + hash |= ((uint64_t)1 << HLL_Q); /* Make sure count will be <= Q+1. */ + count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */ + count += builtin_ctzll(hash); *regp = (int)index; return count; } diff --git a/src/intrinsics.h b/src/intrinsics.h new file mode 100644 index 000000000..3be419915 --- /dev/null +++ b/src/intrinsics.h @@ -0,0 +1,21 @@ +#ifndef __INTRINSICS_H +#define __INTRINSICS_H + +#include + +/* Count the number of trailing zero bits in a 64-bit integer. */ +static inline int32_t builtin_ctzll(uint64_t value) { + if (value == 0) return 64; +#if defined(__clang__) || defined(__GNUC__) + return __builtin_ctzll(value); +#else + int bitpos = 0; + while (value & 1 == 0) { + value >>= 1; + ++bitpos; + } + return bitpos; +#endif +} + +#endif /* __INTRINSICS_H */