Skip to content

Commit 3c2a952

Browse files
committed
Merge branch 'oertl-hyperloglog-improvement' into unstable
2 parents 87cc948 + 36b78e8 commit 3c2a952

File tree

1 file changed

+127
-120
lines changed

1 file changed

+127
-120
lines changed

src/hyperloglog.c

+127-120
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ struct hllhdr {
192192
#define HLL_VALID_CACHE(hdr) (((hdr)->card[7] & (1<<7)) == 0)
193193

194194
#define HLL_P 14 /* The greater is P, the smaller the error. */
195+
#define HLL_Q (64-HLL_P) /* The number of bits of the hash value used for
196+
determining the number of leading zeros. */
195197
#define HLL_REGISTERS (1<<HLL_P) /* With P=14, 16384 registers. */
196198
#define HLL_P_MASK (HLL_REGISTERS-1) /* Mask to index register. */
197199
#define HLL_BITS 6 /* Enough to count up to 63 leading zeroes. */
@@ -384,6 +386,7 @@ static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected\r\n";
384386
*(p) = (_l>>8) | HLL_SPARSE_XZERO_BIT; \
385387
*((p)+1) = (_l&0xff); \
386388
} while(0)
389+
#define HLL_ALPHA_INF 0.721347520444481703680 /* constant for 0.5/ln(2) */
387390

388391
/* ========================= HyperLogLog algorithm ========================= */
389392

@@ -401,11 +404,11 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
401404
uint64_t k;
402405

403406
#if (BYTE_ORDER == LITTLE_ENDIAN)
404-
#ifdef USE_ALIGNED_ACCESS
405-
memcpy(&k,data,sizeof(uint64_t));
406-
#else
407+
#ifdef USE_ALIGNED_ACCESS
408+
memcpy(&k,data,sizeof(uint64_t));
409+
#else
407410
k = *((uint64_t*)data);
408-
#endif
411+
#endif
409412
#else
410413
k = (uint64_t) data[0];
411414
k |= (uint64_t) data[1] << 8;
@@ -451,7 +454,7 @@ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
451454

452455
/* Count the number of zeroes starting from bit HLL_REGISTERS
453456
* (that is a power of two corresponding to the first bit we don't use
454-
* as index). The max run can be 64-P+1 bits.
457+
* as index). The max run can be 64-P+1 = Q+1 bits.
455458
*
456459
* Note that the final "1" ending the sequence of zeroes must be
457460
* included in the count, so if we find "001" the count is 3, and
@@ -462,8 +465,10 @@ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) {
462465
* there are high probabilities to find a 1 after a few iterations. */
463466
hash = MurmurHash64A(ele,elesize,0xadc83b19ULL);
464467
index = hash & HLL_P_MASK; /* Register index. */
465-
hash |= ((uint64_t)1<<63); /* Make sure the loop terminates. */
466-
bit = HLL_REGISTERS; /* First bit not used to address the register. */
468+
hash >>= HLL_P; /* Remove bits used to address the register. */
469+
hash |= ((uint64_t)1<<HLL_Q); /* Make sure the loop terminates
470+
and count will be <= Q+1. */
471+
bit = 1;
467472
count = 1; /* Initialized to 1 since we count the "00000...1" pattern. */
468473
while((hash & bit) == 0) {
469474
count++;
@@ -510,13 +515,9 @@ int hllDenseAdd(uint8_t *registers, unsigned char *ele, size_t elesize) {
510515
return hllDenseSet(registers,index,count);
511516
}
512517

513-
/* Compute SUM(2^-reg) in the dense representation.
514-
* PE is an array with a pre-computer table of values 2^-reg indexed by reg.
515-
* As a side effect the integer pointed by 'ezp' is set to the number
516-
* of zero registers. */
517-
double hllDenseSum(uint8_t *registers, double *PE, int *ezp) {
518-
double E = 0;
519-
int j, ez = 0;
518+
/* Compute the register histogram in the dense representation. */
519+
void hllDenseRegHisto(uint8_t *registers, int* reghisto) {
520+
int j;
520521

521522
/* Redis default is to use 16384 registers 6 bits each. The code works
522523
* with other values by modifying the defines, but for our target value
@@ -527,47 +528,49 @@ double hllDenseSum(uint8_t *registers, double *PE, int *ezp) {
527528
r10, r11, r12, r13, r14, r15;
528529
for (j = 0; j < 1024; j++) {
529530
/* Handle 16 registers per iteration. */
530-
r0 = r[0] & 63; if (r0 == 0) ez++;
531-
r1 = (r[0] >> 6 | r[1] << 2) & 63; if (r1 == 0) ez++;
532-
r2 = (r[1] >> 4 | r[2] << 4) & 63; if (r2 == 0) ez++;
533-
r3 = (r[2] >> 2) & 63; if (r3 == 0) ez++;
534-
r4 = r[3] & 63; if (r4 == 0) ez++;
535-
r5 = (r[3] >> 6 | r[4] << 2) & 63; if (r5 == 0) ez++;
536-
r6 = (r[4] >> 4 | r[5] << 4) & 63; if (r6 == 0) ez++;
537-
r7 = (r[5] >> 2) & 63; if (r7 == 0) ez++;
538-
r8 = r[6] & 63; if (r8 == 0) ez++;
539-
r9 = (r[6] >> 6 | r[7] << 2) & 63; if (r9 == 0) ez++;
540-
r10 = (r[7] >> 4 | r[8] << 4) & 63; if (r10 == 0) ez++;
541-
r11 = (r[8] >> 2) & 63; if (r11 == 0) ez++;
542-
r12 = r[9] & 63; if (r12 == 0) ez++;
543-
r13 = (r[9] >> 6 | r[10] << 2) & 63; if (r13 == 0) ez++;
544-
r14 = (r[10] >> 4 | r[11] << 4) & 63; if (r14 == 0) ez++;
545-
r15 = (r[11] >> 2) & 63; if (r15 == 0) ez++;
546-
547-
/* Additional parens will allow the compiler to optimize the
548-
* code more with a loss of precision that is not very relevant
549-
* here (floating point math is not commutative!). */
550-
E += (PE[r0] + PE[r1]) + (PE[r2] + PE[r3]) + (PE[r4] + PE[r5]) +
551-
(PE[r6] + PE[r7]) + (PE[r8] + PE[r9]) + (PE[r10] + PE[r11]) +
552-
(PE[r12] + PE[r13]) + (PE[r14] + PE[r15]);
531+
r0 = r[0] & 63;
532+
r1 = (r[0] >> 6 | r[1] << 2) & 63;
533+
r2 = (r[1] >> 4 | r[2] << 4) & 63;
534+
r3 = (r[2] >> 2) & 63;
535+
r4 = r[3] & 63;
536+
r5 = (r[3] >> 6 | r[4] << 2) & 63;
537+
r6 = (r[4] >> 4 | r[5] << 4) & 63;
538+
r7 = (r[5] >> 2) & 63;
539+
r8 = r[6] & 63;
540+
r9 = (r[6] >> 6 | r[7] << 2) & 63;
541+
r10 = (r[7] >> 4 | r[8] << 4) & 63;
542+
r11 = (r[8] >> 2) & 63;
543+
r12 = r[9] & 63;
544+
r13 = (r[9] >> 6 | r[10] << 2) & 63;
545+
r14 = (r[10] >> 4 | r[11] << 4) & 63;
546+
r15 = (r[11] >> 2) & 63;
547+
548+
reghisto[r0]++;
549+
reghisto[r1]++;
550+
reghisto[r2]++;
551+
reghisto[r3]++;
552+
reghisto[r4]++;
553+
reghisto[r5]++;
554+
reghisto[r6]++;
555+
reghisto[r7]++;
556+
reghisto[r8]++;
557+
reghisto[r9]++;
558+
reghisto[r10]++;
559+
reghisto[r11]++;
560+
reghisto[r12]++;
561+
reghisto[r13]++;
562+
reghisto[r14]++;
563+
reghisto[r15]++;
564+
553565
r += 12;
554566
}
555567
} else {
556-
for (j = 0; j < HLL_REGISTERS; j++) {
568+
for(j = 0; j < HLL_REGISTERS; j++) {
557569
unsigned long reg;
558-
559570
HLL_DENSE_GET_REGISTER(reg,registers,j);
560-
if (reg == 0) {
561-
ez++;
562-
/* Increment E at the end of the loop. */
563-
} else {
564-
E += PE[reg]; /* Precomputed 2^(-reg[j]). */
565-
}
571+
reghisto[reg]++;
566572
}
567-
E += ez; /* Add 2^0 'ez' times. */
568573
}
569-
*ezp = ez;
570-
return E;
571574
}
572575

573576
/* ================== Sparse representation implementation ================= */
@@ -903,76 +906,96 @@ int hllSparseAdd(robj *o, unsigned char *ele, size_t elesize) {
903906
return hllSparseSet(o,index,count);
904907
}
905908

906-
/* Compute SUM(2^-reg) in the sparse representation.
907-
* PE is an array with a pre-computer table of values 2^-reg indexed by reg.
908-
* As a side effect the integer pointed by 'ezp' is set to the number
909-
* of zero registers. */
910-
double hllSparseSum(uint8_t *sparse, int sparselen, double *PE, int *ezp, int *invalid) {
911-
double E = 0;
912-
int ez = 0, idx = 0, runlen, regval;
909+
/* Compute the register histogram in the sparse representation. */
910+
void hllSparseRegHisto(uint8_t *sparse, int sparselen, int *invalid, int* reghisto) {
911+
int idx = 0, runlen, regval;
913912
uint8_t *end = sparse+sparselen, *p = sparse;
914913

915914
while(p < end) {
916915
if (HLL_SPARSE_IS_ZERO(p)) {
917916
runlen = HLL_SPARSE_ZERO_LEN(p);
918917
idx += runlen;
919-
ez += runlen;
920-
/* Increment E at the end of the loop. */
918+
reghisto[0] += runlen;
921919
p++;
922920
} else if (HLL_SPARSE_IS_XZERO(p)) {
923921
runlen = HLL_SPARSE_XZERO_LEN(p);
924922
idx += runlen;
925-
ez += runlen;
926-
/* Increment E at the end of the loop. */
923+
reghisto[0] += runlen;
927924
p += 2;
928925
} else {
929926
runlen = HLL_SPARSE_VAL_LEN(p);
930927
regval = HLL_SPARSE_VAL_VALUE(p);
931928
idx += runlen;
932-
E += PE[regval]*runlen;
929+
reghisto[regval] += runlen;
933930
p++;
934931
}
935932
}
936933
if (idx != HLL_REGISTERS && invalid) *invalid = 1;
937-
E += ez; /* Add 2^0 'ez' times. */
938-
*ezp = ez;
939-
return E;
940934
}
941935

942936
/* ========================= HyperLogLog Count ==============================
943937
* This is the core of the algorithm where the approximated count is computed.
944-
* The function uses the lower level hllDenseSum() and hllSparseSum() functions
945-
* as helpers to compute the SUM(2^-reg) part of the computation, which is
946-
* representation-specific, while all the rest is common. */
947-
948-
/* Implements the SUM operation for uint8_t data type which is only used
949-
* internally as speedup for PFCOUNT with multiple keys. */
950-
double hllRawSum(uint8_t *registers, double *PE, int *ezp) {
951-
double E = 0;
952-
int j, ez = 0;
938+
* The function uses the lower level hllDenseRegHisto() and hllSparseRegHisto()
939+
* functions as helpers to compute histogram of register values part of the
940+
* computation, which is representation-specific, while all the rest is common. */
941+
942+
/* Implements the register histogram calculation for uint8_t data type
943+
* which is only used internally as speedup for PFCOUNT with multiple keys. */
944+
void hllRawRegHisto(uint8_t *registers, int* reghisto) {
953945
uint64_t *word = (uint64_t*) registers;
954946
uint8_t *bytes;
947+
int j;
955948

956949
for (j = 0; j < HLL_REGISTERS/8; j++) {
957950
if (*word == 0) {
958-
ez += 8;
951+
reghisto[0] += 8;
959952
} else {
960953
bytes = (uint8_t*) word;
961-
if (bytes[0]) E += PE[bytes[0]]; else ez++;
962-
if (bytes[1]) E += PE[bytes[1]]; else ez++;
963-
if (bytes[2]) E += PE[bytes[2]]; else ez++;
964-
if (bytes[3]) E += PE[bytes[3]]; else ez++;
965-
if (bytes[4]) E += PE[bytes[4]]; else ez++;
966-
if (bytes[5]) E += PE[bytes[5]]; else ez++;
967-
if (bytes[6]) E += PE[bytes[6]]; else ez++;
968-
if (bytes[7]) E += PE[bytes[7]]; else ez++;
954+
reghisto[bytes[0]]++;
955+
reghisto[bytes[1]]++;
956+
reghisto[bytes[2]]++;
957+
reghisto[bytes[3]]++;
958+
reghisto[bytes[4]]++;
959+
reghisto[bytes[5]]++;
960+
reghisto[bytes[6]]++;
961+
reghisto[bytes[7]]++;
969962
}
970963
word++;
971964
}
972-
E += ez; /* 2^(-reg[j]) is 1 when m is 0, add it 'ez' times for every
973-
zero register in the HLL. */
974-
*ezp = ez;
975-
return E;
965+
}
966+
967+
/* Helper function sigma as defined in
968+
* "New cardinality estimation algorithms for HyperLogLog sketches"
969+
* Otmar Ertl, arXiv:1702.01284 */
970+
double hllSigma(double x) {
971+
if (x == 1.) return INFINITY;
972+
double zPrime;
973+
double y = 1;
974+
double z = x;
975+
do {
976+
x *= x;
977+
zPrime = z;
978+
z += x * y;
979+
y += y;
980+
} while(zPrime != z);
981+
return z;
982+
}
983+
984+
/* Helper function tau as defined in
985+
* "New cardinality estimation algorithms for HyperLogLog sketches"
986+
* Otmar Ertl, arXiv:1702.01284 */
987+
double hllTau(double x) {
988+
if (x == 0. || x == 1.) return 0.;
989+
double zPrime;
990+
double y = 1.0;
991+
double z = 1 - x;
992+
do {
993+
x = sqrt(x);
994+
zPrime = z;
995+
y *= 0.5;
996+
z -= pow(1 - x, 2)*y;
997+
} while(zPrime != z);
998+
return z / 3;
976999
}
9771000

9781001
/* Return the approximated cardinality of the set based on the harmonic
@@ -988,49 +1011,33 @@ double hllRawSum(uint8_t *registers, double *PE, int *ezp) {
9881011
* keys (no need to work with 6-bit integers encoding). */
9891012
uint64_t hllCount(struct hllhdr *hdr, int *invalid) {
9901013
double m = HLL_REGISTERS;
991-
double E, alpha = 0.7213/(1+1.079/m);
992-
int j, ez; /* Number of registers equal to 0. */
993-
994-
/* We precompute 2^(-reg[j]) in a small table in order to
995-
* speedup the computation of SUM(2^-register[0..i]). */
996-
static int initialized = 0;
997-
static double PE[64];
998-
if (!initialized) {
999-
PE[0] = 1; /* 2^(-reg[j]) is 1 when m is 0. */
1000-
for (j = 1; j < 64; j++) {
1001-
/* 2^(-reg[j]) is the same as 1/2^reg[j]. */
1002-
PE[j] = 1.0/(1ULL << j);
1003-
}
1004-
initialized = 1;
1005-
}
1014+
double E;
1015+
int j;
1016+
int reghisto[HLL_Q+2] = {0};
10061017

1007-
/* Compute SUM(2^-register[0..i]). */
1018+
/* Compute register histogram */
10081019
if (hdr->encoding == HLL_DENSE) {
1009-
E = hllDenseSum(hdr->registers,PE,&ez);
1020+
hllDenseRegHisto(hdr->registers,reghisto);
10101021
} else if (hdr->encoding == HLL_SPARSE) {
1011-
E = hllSparseSum(hdr->registers,
1012-
sdslen((sds)hdr)-HLL_HDR_SIZE,PE,&ez,invalid);
1022+
hllSparseRegHisto(hdr->registers,
1023+
sdslen((sds)hdr)-HLL_HDR_SIZE,invalid,reghisto);
10131024
} else if (hdr->encoding == HLL_RAW) {
1014-
E = hllRawSum(hdr->registers,PE,&ez);
1025+
hllRawRegHisto(hdr->registers,reghisto);
10151026
} else {
10161027
serverPanic("Unknown HyperLogLog encoding in hllCount()");
10171028
}
10181029

1019-
/* Apply loglog-beta to the raw estimate. See:
1020-
* "LogLog-Beta and More: A New Algorithm for Cardinality Estimation
1021-
* Based on LogLog Counting" Jason Qin, Denys Kim, Yumei Tung
1022-
* arXiv:1612.02284 */
1023-
double zl = log(ez + 1);
1024-
double beta = -0.370393911*ez +
1025-
0.070471823*zl +
1026-
0.17393686*pow(zl,2) +
1027-
0.16339839*pow(zl,3) +
1028-
-0.09237745*pow(zl,4) +
1029-
0.03738027*pow(zl,5) +
1030-
-0.005384159*pow(zl,6) +
1031-
0.00042419*pow(zl,7);
1032-
1033-
E = llroundl(alpha*m*(m-ez)*(1/(E+beta)));
1030+
/* Estimate cardinality form register histogram. See:
1031+
* "New cardinality estimation algorithms for HyperLogLog sketches"
1032+
* Otmar Ertl, arXiv:1702.01284 */
1033+
double z = m * hllTau((m-reghisto[HLL_Q+1])/(double)m);
1034+
for (j = HLL_Q; j >= 1; --j) {
1035+
z += reghisto[j];
1036+
z *= 0.5;
1037+
}
1038+
z += m * hllSigma(reghisto[0]/(double)m);
1039+
E = llroundl(HLL_ALPHA_INF*m*m/z);
1040+
10341041
return (uint64_t) E;
10351042
}
10361043

0 commit comments

Comments
 (0)