Using intrinsics to optimize counting HyperLogLog trailing bits (#846)
Godbolt link: https://godbolt.org/z/3YPvxsr5s __builtin_ctz would generate shorter code than hand-written loop. --------- Signed-off-by: mwish <maplewish117@gmail.com> Signed-off-by: Binbin <binloveplay1314@qq.com> Signed-off-by: Madelyn Olson <madelyneolson@gmail.com> Co-authored-by: Binbin <binloveplay1314@qq.com> Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
This commit is contained in:
parent
4fe8320711
commit
744b13e302
@ -30,6 +30,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "server.h"
|
#include "server.h"
|
||||||
|
#include "intrinsics.h"
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
@ -455,7 +456,7 @@ uint64_t MurmurHash64A(const void *key, int len, unsigned int seed) {
|
|||||||
* of the pattern 000..1 of the element hash. As a side effect 'regp' is
|
* of the pattern 000..1 of the element hash. As a side effect 'regp' is
|
||||||
* set to the register index this element hashes to. */
|
* set to the register index this element hashes to. */
|
||||||
int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
|
int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
|
||||||
uint64_t hash, bit, index;
|
uint64_t hash, index;
|
||||||
int count;
|
int count;
|
||||||
|
|
||||||
/* Count the number of zeroes starting from bit HLL_REGISTERS
|
/* Count the number of zeroes starting from bit HLL_REGISTERS
|
||||||
@ -465,21 +466,13 @@ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
|
|||||||
* Note that the final "1" ending the sequence of zeroes must be
|
* Note that the final "1" ending the sequence of zeroes must be
|
||||||
* included in the count, so if we find "001" the count is 3, and
|
* included in the count, so if we find "001" the count is 3, and
|
||||||
* the smallest count possible is no zeroes at all, just a 1 bit
|
* the smallest count possible is no zeroes at all, just a 1 bit
|
||||||
* at the first position, that is a count of 1.
|
* at the first position, that is a count of 1. */
|
||||||
*
|
|
||||||
* This may sound like inefficient, but actually in the average case
|
|
||||||
* there are high probabilities to find a 1 after a few iterations. */
|
|
||||||
hash = MurmurHash64A(ele, elesize, 0xadc83b19ULL);
|
hash = MurmurHash64A(ele, elesize, 0xadc83b19ULL);
|
||||||
index = hash & HLL_P_MASK; /* Register index. */
|
index = hash & HLL_P_MASK; /* Register index. */
|
||||||
hash >>= HLL_P; /* Remove bits used to address the register. */
|
hash >>= HLL_P; /* Remove bits used to address the register. */
|
||||||
hash |= ((uint64_t)1 << HLL_Q); /* Make sure the loop terminates
|
hash |= ((uint64_t)1 << HLL_Q); /* Make sure count will be <= Q+1. */
|
||||||
and count will be <= Q+1. */
|
count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
|
||||||
bit = 1;
|
count += builtin_ctzll(hash);
|
||||||
count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
|
|
||||||
while ((hash & bit) == 0) {
|
|
||||||
count++;
|
|
||||||
bit <<= 1;
|
|
||||||
}
|
|
||||||
*regp = (int)index;
|
*regp = (int)index;
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
21
src/intrinsics.h
Normal file
21
src/intrinsics.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#ifndef __INTRINSICS_H
|
||||||
|
#define __INTRINSICS_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/* Count the number of trailing zero bits in a 64-bit integer. */
|
||||||
|
static inline int32_t builtin_ctzll(uint64_t value) {
|
||||||
|
if (value == 0) return 64;
|
||||||
|
#if defined(__clang__) || defined(__GNUC__)
|
||||||
|
return __builtin_ctzll(value);
|
||||||
|
#else
|
||||||
|
int bitpos = 0;
|
||||||
|
while (value & 1 == 0) {
|
||||||
|
value >>= 1;
|
||||||
|
++bitpos;
|
||||||
|
}
|
||||||
|
return bitpos;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __INTRINSICS_H */
|
Loading…
x
Reference in New Issue
Block a user