Skip to content

Commit

Permalink
Faster StoreView & test
Browse files Browse the repository at this point in the history
No TreeSet; just collect all matching Keys & sort
  • Loading branch information
cliffclick committed May 19, 2014
1 parent 1c76300 commit b678759
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 44 deletions.
2 changes: 1 addition & 1 deletion prj.el
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
'(jde-run-option-debug nil)
'(jde-run-option-vm-args (quote ("-XX:+PrintGC")))
'(jde-compile-option-directory "./target/classes")
'(jde-run-option-application-args (quote ("-beta" "-mainClass" "org.junit.runner.JUnitCore" "water.fvec.ParserTest2")))
'(jde-run-option-application-args (quote ("-beta" "-mainClass" "org.junit.runner.JUnitCore" "water.KVSpeedTest")))
'(jde-debugger (quote ("JDEbug")))
'(jde-compile-option-source (quote ("1.6")))
'(jde-compile-option-classpath (quote ("./target/classes" "./lib/javassist.jar" "./lib/hadoop/cdh4/hadoop-common.jar" "./lib/hadoop/cdh4/hadoop-auth.jar" "./lib/hadoop/cdh4/slf4j-api-1.6.1.jar" "./lib/hadoop/cdh4/slf4j-nop-1.6.1.jar" "./lib/hadoop/cdh4/hadoop-hdfs.jar" "./lib/hadoop/cdh4/protobuf-java-2.4.0a.jar" "./lib/apache/commons-codec-1.4.jar" "./lib/apache/commons-configuration-1.6.jar" "./lib/apache/commons-lang-2.4.jar" "./lib/apache/commons-logging-1.1.1.jar" "./lib/apache/httpclient-4.1.1.jar" "./lib/apache/httpcore-4.1.jar" "./lib/junit/junit-4.11.jar" "./lib/apache/guava-12.0.1.jar" "./lib/gson/gson-2.2.2.jar" "./lib/poi/poi-3.8-20120326.jar" "./lib/poi/poi-ooxml-3.8-20120326.jar" "./lib/poi/poi-ooxml-schemas-3.8-20120326.jar" "./lib/poi/dom4j-1.6.1.jar" "./lib/Jama/Jama.jar" "./lib/s3/aws-java-sdk-1.3.27.jar" "./lib/log4j/log4j-1.2.15.jar" "./lib/joda/joda-time-2.3.jar")))
Expand Down
72 changes: 31 additions & 41 deletions src/main/java/water/api/StoreView.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@

package water.api;

import com.google.gson.*;
import java.util.*;

import water.*;
import water.fvec.Frame;
import water.fvec.Vec;

import com.google.gson.JsonArray;
import com.google.gson.JsonObject;

public class StoreView extends Request {

public static final int MAX_VIEW = 1000000;
Expand All @@ -23,52 +19,37 @@ public class StoreView extends Request {
// get the offset index
final int offset = _offset.value();
final int view = _view.value();
// write the response
final H2O cloud = H2O.CLOUD; // Current eldest Cloud
Key[] keys = new Key[view]; // Limit size of what we'll display on this page
int len = 0;
int off = 0;
String filter = _filter.value();
// Gather some keys that pass all filters
// - Sort all the keys for pretty display and reliable ordering
Set<Key> keySet = new TreeSet(H2O.globalKeySet(null));
int kcnt = keySet.size();
ArrayList<Key> akeys = new ArrayList<Key>();
int allkeys = 0; // compute all viewable keys
for( Key key : keySet ) {
kcnt--;
if( filter != null && // Have a filter?
key.toString().indexOf(filter) == -1 )
continue; // Ignore this filtered-out key
if( !key.user_allowed() ) // Also filter out for user-keys
continue;
if( H2O.get(key) == null ) continue; // Ignore misses
if( off >= offset) { // Skip first _offset keys
if (len<view) {
keys[len++] = key; // Capture the key
if( len == view && kcnt > 0) { // Last key for the view
// List is full; stop
result.addProperty(Constants.MORE,true);
}
}
} else off++;
allkeys++;
}
if (off<offset)
return Response.error("Not enough keys - request offset is " + off + " but K/V contains "+len+" keys.");
for( Key key : H2O.globalKeySet(null) )
if( key.user_allowed() && // Filter out for user-keys
(filter == null || // Have a filter?
key.toString().contains(filter)) && // Pass filter
H2O.get(key) != null ) // Ignore misses
akeys.add(key);

// Sort all the keys for pretty display and reliable ordering
Key[] keys = akeys.toArray(new Key[akeys.size()]);
Arrays.sort(keys);
if( keys.length<offset )
return Response.error("Not enough keys - request offset is " + offset + " but K/V contains "+keys.length+" keys.");
if( keys.length > offset+view )
result.addProperty(Constants.MORE,true);

// Now build the result JSON with all available keys
final H2O cloud = H2O.CLOUD; // Current eldest Cloud
JsonArray ary = new JsonArray();
for( int i=0; i<len; i++ ) {
if( i >= len ) break;
int len = Math.min(keys.length,offset+view);
for( int i=offset; i<len; i++ ) {
Value val = DKV.get(keys[i]);
if( val != null ) {
JsonObject jo = formatKeyRow(cloud,keys[i],val);
ary.add(jo);
}
if( val != null )
ary.add(formatKeyRow(cloud,keys[i],val));
}

result.add(KEYS,ary);
result.addProperty(NUM_KEYS, len);
result.addProperty(NUM_KEYS, len-offset);
result.addProperty(CLOUD_NAME, H2O.NAME);
result.addProperty(NODE_NAME, H2O.SELF.toString());
Response r = Response.done(result);
Expand All @@ -94,6 +75,15 @@ static private String noNaN( double d ) {
return (Double.isNaN(d) || Double.isInfinite(d)) ? "" : Double.toString(d);
}

// Used by tests
public String setAndServe(String offset) {
_offset.reset(); _offset.check(null,offset);
_view .reset(); _view .check(null,"20");
_filter.reset();
return new Gson().toJson(serve()._response);
}


private JsonObject formatKeyRow(H2O cloud, Key key, Value val) {
JsonObject result = new JsonObject();
result.addProperty(KEY, key.toString());
Expand Down
56 changes: 54 additions & 2 deletions src/test/java/water/KVSpeedTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package water;

import com.google.gson.*;
import org.junit.*;

// Weeny speed-test harness. Not intended for use with any real testing.
Expand All @@ -24,8 +25,7 @@ public class KVSpeedTest extends TestUtil {
System.out.println("(put+get+remove+get)/sec="+(now-start)+"ms / "+ITERS+
" = "+((double)(now-start)/ITERS));
}
for( int i=0; i<keys.length; i++ )
DKV.remove(keys[i]);
for( Key key : keys ) DKV.remove(key);
}


Expand Down Expand Up @@ -53,4 +53,56 @@ public void impl_testKeys(Key[]keys,Value[]vals) {
@Test @Ignore public void dummy_test() {
/* this is just a dummy test to avoid JUnit complains about missing test */
}


// Inject a million system keys and a dozen user keys around the cluster.
// Verify that StoreView and TypeAhead remain fast.
@Test public void fastGlobalKeySearch() {
final long t_start = System.currentTimeMillis();
final int NUMKEYS=100; // fast test for junits
//final int NUMKEYS=1000000; // a million keys
new DoKeys(true ,NUMKEYS,15).invokeOnAllNodes();
final long t_make = System.currentTimeMillis();

// Skip 1st 10 keys of a StoreView. Return the default of 20 more
// user-mode keys.
String json = new water.api.StoreView().setAndServe("10");
//System.out.println(json);
final long t_view = System.currentTimeMillis();

new DoKeys(false,NUMKEYS,15).invokeOnAllNodes();
final long t_remove = System.currentTimeMillis();
//System.out.print("Make: "+((t_make -t_start)*1.0/NUMKEYS)+"\n"+
// "View: "+((t_view -t_make ) )+"ms"+"\n"+
// "Remv: "+((t_remove-t_view )*1.0/NUMKEYS)+"\n"
// );
}

// Bulk inject keys on the local node without any network traffic
private static class DoKeys extends DRemoteTask<DoKeys> {
private final boolean _insert;
private final int _sysnkeys, _usernkeys;
DoKeys( boolean insert, int sysnkeys, int usernkeys ) { _insert=insert; _sysnkeys = sysnkeys; _usernkeys = usernkeys;}
@Override public void lcompute() {
long l=0;
for( int i=0; i<_sysnkeys+_usernkeys; i++ ) {
byte[] kb = new byte[2+4+8];
kb[0] = i<_sysnkeys ? Key.BUILT_IN_KEY : (byte)'_'; // System Key vs User Key
kb[1] = 0; // No replicas
kb[2] = 'A'; kb[3] = 'B'; kb[4] = 'C'; kb[5] = 'D';
while( true ) {
UDP.set8(kb,6,l++);
Key k = Key.make(kb);
if( k.home() ) {
if( _insert ) DKV.put(k,new Value(k,kb),_fs);
else DKV.remove(k,_fs);
break;
}
}
}
tryComplete();
}
@Override public void reduce( DoKeys ignore ) { }
}

}
1 change: 1 addition & 0 deletions src/test/java/water/TestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class TimerStatement extends Statement {
if( leaked_keys > 0 ) {
for( Key k : H2O.localKeySet() ) {
Value value = DKV.get(k);
if( value==null ) { leaked_keys--; continue; }
Object o = value.type() != TypeMap.PRIM_B ? value.get() : "byte[]";
// Ok to leak VectorGroups
if( o instanceof Vec.VectorGroup )
Expand Down

0 comments on commit b678759

Please sign in to comment.