[hbase] Use a PageFilter in HBase scans.

Other bindings limit the number of results retrieved from the server. The HBase bindings just close the scanner once they have received the desired number of records. Adding a PageFilter matches the behavior of other bindings, and may improve performance.
supler · Sep 24, 2015 · 744e859 · 744e859
1 parent 4aede92
commit 744e859
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 9 deletions.
diff --git a/hbase098/src/main/java/com/yahoo/ycsb/db/HBaseClient.java b/hbase098/src/main/java/com/yahoo/ycsb/db/HBaseClient.java
@@ -17,32 +17,25 @@
 
 package com.yahoo.ycsb.db;
 
-
 import com.yahoo.ycsb.DBException;
 import com.yahoo.ycsb.ByteIterator;
 import com.yahoo.ycsb.ByteArrayByteIterator;
+import com.yahoo.ycsb.measurements.Measurements;
 
 import java.io.IOException;
 import java.util.*;
-//import java.util.HashMap;
-//import java.util.Properties;
-//import java.util.Set;
-//import java.util.Vector;
 
-import com.yahoo.ycsb.measurements.Measurements;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.client.HTable;
-//import org.apache.hadoop.hbase.client.Scanner;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
-//import org.apache.hadoop.hbase.io.Cell;
-//import org.apache.hadoop.hbase.io.RowResult;
+import org.apache.hadoop.hbase.filter.PageFilter;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 
@@ -246,6 +239,7 @@ public int scan(String table, String startkey, int recordcount, Set<String> fiel
         //HBase has no record limit.  Here, assume recordcount is small enough to bring back in one call.
         //We get back recordcount records
         s.setCaching(recordcount);
+        s.setFilter(new PageFilter(recordcount));
 
         //add specified fields or else all fields
         if (fields == null)
@@ -284,6 +278,9 @@ public int scan(String table, String startkey, int recordcount, Set<String> fiel
                 //add rowResult to result vector
                 result.add(rowResult);
                 numResults++;
+
+                // PageFilter does not guarantee that the number of results is <= pageSize, so this
+                // break is required.
                 if (numResults >= recordcount) //if hit recordcount, bail out
                 {
                     break;

diff --git a/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java b/hbase10/src/main/java/com/yahoo/ycsb/db/HBaseClient10.java
@@ -42,6 +42,7 @@
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.PageFilter;
 import org.apache.hadoop.hbase.util.Bytes;
 
 import java.io.IOException;
@@ -291,6 +292,7 @@ public int scan(String table, String startkey, int recordcount, Set<String> fiel
         //HBase has no record limit.  Here, assume recordcount is small enough to bring back in one call.
         //We get back recordcount records
         s.setCaching(recordcount);
+        s.setFilter(new PageFilter(recordcount));
 
         //add specified fields or else all fields
         if (fields == null)
@@ -332,6 +334,9 @@ public int scan(String table, String startkey, int recordcount, Set<String> fiel
                 //add rowResult to result vector
                 result.add(rowResult);
                 numResults++;
+
+                // PageFilter does not guarantee that the number of results is <= pageSize, so this
+                // break is required.
                 if (numResults >= recordcount) //if hit recordcount, bail out
                 {
                     break;