Deflake hashtable random fairness test (#1618)

Fixes the unit test for hashtable random fairness intermittent failures when running with the `--accurate` flag. https://github.com/valkey-io/valkey/actions/runs/12969591890/job/36173815884#step:10:105 The test case picks a random element out of 400, repeated 1M times, and then checks that 60% of the elements are picked within 3 standard deviations from the number of times they're expected to be picked. In this test run (with `--accurate`), the expected number is 2500 and the standard deviation is 50, which is only 2% of the expected value. This makes the check too strict and makes the test flaky. As an alternative, we allow 80% of the elements to be picked within 10% of the expected number. With this alternative condition, we can also raise the check for the non-edge case from 60% to 80% of the elements to be within 3 standard deviations. (With fewer repetitions, 3 standard deviations is greater than 10% of the expected value, so this new condition only affects the `--accurate` test run.) Additional change: Set a random seed to the hash function in the test suite. Until now, we only seeded the random number generator. Signed-off-by: Viktor Söderqvist <viktor.soderqvist@est.tech>
2025-01-27 10:13:46 +01:00 · 2025-01-27 10:13:46 +01:00 · a18fcdb371
commit a18fcdb371
parent 66577573f2
1 changed files with 20 additions and 2 deletions
--- a/src/unit/test_hashtable.c
+++ b/src/unit/test_hashtable.c
@ -21,6 +21,9 @@ static void randomSeed(void) {
    getRandomBytes((void *)&seed, sizeof(seed));
    init_genrand64(seed);
    srandom((unsigned)seed);
+    uint8_t hashseed[16];
+    getRandomBytes(hashseed, sizeof(hashseed));
+    hashtableSetHashFunctionSeed(hashseed);
 }

 /* An entry holding a string key and a string value in one allocation. */
@ -749,7 +752,7 @@ int test_random_entry(int argc, char **argv, int flags) {
    /* With large n, the distribution approaches a normal distribution and we
     * can use p68 = within 1 std dev, p95 = within 2 std dev, p99.7 = within 3
     * std dev. */
-    long p68 = 0, p95 = 0, p99 = 0, p4dev = 0, p5dev = 0;
+    long p68 = 0, p95 = 0, p99 = 0, p4dev = 0, p5dev = 0, p10percent = 0;
    for (size_t j = 0; j < count; j++) {
        double dev = expected - times_picked[j];
        p68 += (dev >= -std_dev && dev <= std_dev);
@ -757,7 +760,9 @@ int test_random_entry(int argc, char **argv, int flags) {
        p99 += (dev >= -std_dev * 3 && dev <= std_dev * 3);
        p4dev += (dev >= -std_dev * 4 && dev <= std_dev * 4);
        p5dev += (dev >= -std_dev * 5 && dev <= std_dev * 5);
+        p10percent += (dev >= -0.1 * expected && dev <= 0.1 * expected);
    }
+
    printf("Random entry fairness test\n");
    printf("  Pick one of %zu entries, %ld times.\n", count, num_rounds);
    printf("  Expecting each entry to be picked %.2lf times, std dev %.3lf.\n", expected, std_dev);
@ -766,12 +771,25 @@ int test_random_entry(int argc, char **argv, int flags) {
    printf("  Within 3 std dev (p99) = %.2lf%%\n", 100 * p99 / m);
    printf("  Within 4 std dev       = %.2lf%%\n", 100 * p4dev / m);
    printf("  Within 5 std dev       = %.2lf%%\n", 100 * p5dev / m);
+    printf("  Within 10%% dev         = %.2lf%%\n", 100 * p10percent / m);

    /* Conclusion? The number of trials (n) relative to the probabilities (p and
     * 1 − p) must be sufficiently large (n * p ≥ 5 and n * (1 − p) ≥ 5) to
     * approximate a binomial distribution with a normal distribution. */
    if (n / m >= 5 && n * (1 - 1 / m) >= 5) {
-        TEST_ASSERT_MESSAGE("Too unfair randomness", 100 * p99 / m >= 60.0);
+        /* Check that 80% of the elements are picked within 3 std deviations of
+         * the expected number. This is a low bar, since typically the 99% of
+         * the elements are within this range.
+         *
+         * There is an edge case. When n is very large and m is very small, the
+         * std dev of a binomial distribution is very small, which becomes too
+         * strict for our bucket layout and makes the test flaky. For example
+         * with m = 400 and n = 1M, we get an expected value of 2500 and a std
+         * dev of 50, which is just 2% of the expected value. We lower the bar
+         * for this case and accept that 80% of elements are just within 10% of
+         * the expected value. */
+        TEST_ASSERT_MESSAGE("Too unfair randomness",
+                            100 * p99 / m >= 80.0 || 100 * p10percent / m >= 80.0);
    } else {
        printf("To uncertain numbers to draw any conclusions about fairness.\n");
    }