Skip to content

Commit

Permalink
turn testEncodings into an assert
Browse files Browse the repository at this point in the history
  • Loading branch information
jbellis committed Oct 1, 2023
1 parent 05fc5e3 commit 31ac3f3
Showing 1 changed file with 11 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import java.util.stream.IntStream;

import static java.lang.Math.abs;
import static java.lang.Math.log;
import static org.junit.jupiter.api.Assertions.assertEquals;

@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
Expand Down Expand Up @@ -82,13 +83,21 @@ private void testEncodings(int dimension, int codebooks) {
delta += abs(f.similarityTo(j) - vsf.compare(q, vectors.get(j)));
}
}
System.out.printf("delta for %s is %s for dimension %d and %d codebooks%n", vsf, delta, dimension, codebooks);
// https://chat.openai.com/share/7ced3fc8-275a-4134-978c-c822275c3e1f
// is there a better way to check for within-expected bounds?
var expectedDelta = vsf == VectorSimilarityFunction.EUCLIDEAN
? 96.98 * log(3.26 + dimension) / log(1.92 + codebooks) - 112.15
: 152.69 * log(3.76 + dimension) / log(1.95 + codebooks) - 180.86;
// expected is accurate to within about 10% *on average*. experimentally 25% is not quite enough
// to avoid false positives, so we pad by 40%
assert delta <= 1.4 * expectedDelta : String.format("%s > %s for %s with %d dimensions and %d codebooks", delta, expectedDelta, vsf, dimension, codebooks);
}
}

@Test
public void testEncodings() {
for (int i = 1; i <= 4; i++) {
// start with i=2 (dimension 4) b/c dimension 2 is an outlier for our error prediction
for (int i = 2; i <= 8; i++) {
for (int M = 1; M <= i; M++) {
testEncodings(2 * i, M);
}
Expand Down

0 comments on commit 31ac3f3

Please sign in to comment.