reduce objects created with bloom filter operations

rhryciuk · Jun 24, 2012 · 6e7764a · 6e7764a
1 parent 2fb867b
commit 6e7764a
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 72 deletions.
diff --git a/src/main/java/org/elasticsearch/common/bloom/BloomFilter.java b/src/main/java/org/elasticsearch/common/bloom/BloomFilter.java
@@ -19,8 +19,6 @@
 
 package org.elasticsearch.common.bloom;
 
-import java.nio.ByteBuffer;
-
 /**
  *
  */
@@ -31,20 +29,11 @@ public interface BloomFilter {
         public void add(byte[] key, int offset, int length) {
         }
 
-        @Override
-        public void add(ByteBuffer key) {
-        }
-
         @Override
         public boolean isPresent(byte[] key, int offset, int length) {
             return true;
         }
 
-        @Override
-        public boolean isPresent(ByteBuffer key) {
-            return true;
-        }
-
         @Override
         public long sizeInBytes() {
             return 0;
@@ -56,20 +45,11 @@ public long sizeInBytes() {
         public void add(byte[] key, int offset, int length) {
         }
 
-        @Override
-        public void add(ByteBuffer key) {
-        }
-
         @Override
         public boolean isPresent(byte[] key, int offset, int length) {
             return false;
         }
 
-        @Override
-        public boolean isPresent(ByteBuffer key) {
-            return false;
-        }
-
         @Override
         public long sizeInBytes() {
             return 0;
@@ -78,11 +58,7 @@ public long sizeInBytes() {
 
     void add(byte[] key, int offset, int length);
 
-    void add(ByteBuffer key);
-
     boolean isPresent(byte[] key, int offset, int length);
 
-    boolean isPresent(ByteBuffer key);
-
     long sizeInBytes();
 }
diff --git a/src/main/java/org/elasticsearch/common/bloom/ObsBloomFilter.java b/src/main/java/org/elasticsearch/common/bloom/ObsBloomFilter.java
@@ -22,8 +22,6 @@
 import org.apache.lucene.util.OpenBitSet;
 import org.elasticsearch.common.RamUsage;
 
-import java.nio.ByteBuffer;
-
 public class ObsBloomFilter implements BloomFilter {
 
     private final int hashCount;
@@ -51,29 +49,10 @@ private long buckets() {
         return size;
     }
 
-    private long[] getHashBuckets(ByteBuffer key) {
-        return getHashBuckets(key, hashCount, buckets());
-    }
-
     private long[] getHashBuckets(byte[] key, int offset, int length) {
         return getHashBuckets(key, offset, length, hashCount, buckets());
     }
 
-    // Murmur is faster than an SHA-based approach and provides as-good collision
-    // resistance.  The combinatorial generation approach described in
-    // http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
-    // does prove to work in actual tests, and is obviously faster
-    // than performing further iterations of murmur.
-    static long[] getHashBuckets(ByteBuffer b, int hashCount, long max) {
-        long[] result = new long[hashCount];
-        long hash1 = MurmurHash.hash64(b, b.position(), b.remaining(), 0L);
-        long hash2 = MurmurHash.hash64(b, b.position(), b.remaining(), hash1);
-        for (int i = 0; i < hashCount; ++i) {
-            result[i] = Math.abs((hash1 + (long) i * hash2) % max);
-        }
-        return result;
-    }
-
     // Murmur is faster than an SHA-based approach and provides as-good collision
     // resistance.  The combinatorial generation approach described in
     // http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
@@ -91,29 +70,22 @@ static long[] getHashBuckets(byte[] b, int offset, int length, int hashCount, lo
 
     @Override
     public void add(byte[] key, int offset, int length) {
-        for (long bucketIndex : getHashBuckets(key, offset, length)) {
-            bitset.fastSet(bucketIndex);
-        }
-    }
-
-    public void add(ByteBuffer key) {
-        for (long bucketIndex : getHashBuckets(key)) {
+        // inline the hash buckets so we don't have to create the int[] each time...
+        long hash1 = MurmurHash.hash64(key, offset, length, 0L);
+        long hash2 = MurmurHash.hash64(key, offset, length, hash1);
+        for (int i = 0; i < hashCount; ++i) {
+            long bucketIndex = Math.abs((hash1 + (long) i * hash2) % size);
             bitset.fastSet(bucketIndex);
         }
     }
 
     @Override
     public boolean isPresent(byte[] key, int offset, int length) {
-        for (long bucketIndex : getHashBuckets(key, offset, length)) {
-            if (!bitset.fastGet(bucketIndex)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    public boolean isPresent(ByteBuffer key) {
-        for (long bucketIndex : getHashBuckets(key)) {
+        // inline the hash buckets so we don't have to create the int[] each time...
+        long hash1 = MurmurHash.hash64(key, offset, length, 0L);
+        long hash2 = MurmurHash.hash64(key, offset, length, hash1);
+        for (int i = 0; i < hashCount; ++i) {
+            long bucketIndex = Math.abs((hash1 + (long) i * hash2) % size);
             if (!bitset.fastGet(bucketIndex)) {
                 return false;
             }

diff --git a/src/test/java/org/elasticsearch/test/unit/common/bloom/BoomFilterTests.java b/src/test/java/org/elasticsearch/test/unit/common/bloom/BoomFilterTests.java
@@ -24,8 +24,6 @@
 import org.elasticsearch.common.bloom.BloomFilterFactory;
 import org.testng.annotations.Test;
 
-import java.nio.ByteBuffer;
-
 import static org.hamcrest.MatcherAssert.assertThat;
 import static org.hamcrest.Matchers.equalTo;
 
@@ -38,15 +36,15 @@ public class BoomFilterTests {
     @Test
     public void testSimpleOps() {
         BloomFilter filter = BloomFilterFactory.getFilter(10, 15);
-        filter.add(wrap("1"));
-        assertThat(filter.isPresent(wrap("1")), equalTo(true));
-        assertThat(filter.isPresent(wrap("2")), equalTo(false));
-        filter.add(wrap("2"));
-        assertThat(filter.isPresent(wrap("1")), equalTo(true));
-        assertThat(filter.isPresent(wrap("2")), equalTo(true));
+        filter.add(wrap("1"), 0, wrap("1").length);
+        assertThat(filter.isPresent(wrap("1"), 0, wrap("1").length), equalTo(true));
+        assertThat(filter.isPresent(wrap("2"), 0, wrap("2").length), equalTo(false));
+        filter.add(wrap("2"), 0, wrap("2").length);
+        assertThat(filter.isPresent(wrap("1"), 0, wrap("1").length), equalTo(true));
+        assertThat(filter.isPresent(wrap("2"), 0, wrap("2").length), equalTo(true));
     }
 
-    private ByteBuffer wrap(String key) {
-        return ByteBuffer.wrap(key.getBytes(Charsets.UTF_8));
+    private byte[] wrap(String key) {
+        return key.getBytes(Charsets.UTF_8);
     }
 }