From 641ddfbbfee930b5d11fd2f360177472cff56bd4 Mon Sep 17 00:00:00 2001 From: Cliff Click Date: Thu, 3 Apr 2014 09:20:50 -0700 Subject: [PATCH] Better caching, more slice smarts Do DeepSlice rollups BEFORE the MR job. Better handling printing Frames with no names. --- src/main/java/water/fvec/Frame.java | 20 ++++++++++++++------ src/main/java/water/fvec/Vec.java | 26 +++++++++++++------------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/main/java/water/fvec/Frame.java b/src/main/java/water/fvec/Frame.java index 2030464297..812084b902 100644 --- a/src/main/java/water/fvec/Frame.java +++ b/src/main/java/water/fvec/Frame.java @@ -158,8 +158,9 @@ public Frame add( String name, Vec vec ) { /** Appends an entire Frame */ public Frame add( Frame fr, String names[] ) { assert _vecs.length==0 || anyVec().group().equals(fr.anyVec().group()) : "Adding a vector from different vector group. Current frame contains "+Arrays.toString(_names)+ " vectors. New frame contains "+Arrays.toString(fr.names()) + " vectors."; - for( String name : names ) - if( find(name) != -1 ) throw new IllegalArgumentException("Duplicate name '"+name+"' in Frame"); + if( _names != null && fr._names != null ) + for( String name : names ) + if( find(name) != -1 ) throw new IllegalArgumentException("Duplicate name '"+name+"' in Frame"); final int len0= _names!=null ? _names.length : 0; final int len1= names!=null ? names.length : 0; final int len = len0+len1; @@ -660,12 +661,12 @@ else if (ocols instanceof Frame) { // Do Da Slice // orows is either a long[] or a Vec if (orows == null) - return new DeepSlice((long[])orows,c2).doAll(c2.length,this).outputFrame(names(c2),domains(c2)); + return new DeepSlice((long[])orows,c2,vecs()).doAll(c2.length,this).outputFrame(names(c2),domains(c2)); else if (orows instanceof long[]) { final long CHK_ROWS=1000000; long[] rows = (long[])orows; if( rows.length==0 || rows[0] < 0 ) - return new DeepSlice(rows,c2).doAll(c2.length, this).outputFrame(names(c2), domains(c2)); + return new DeepSlice(rows,c2,vecs()).doAll(c2.length, this).outputFrame(names(c2), domains(c2)); // Vec'ize the index array Futures fs = new Futures(); AppendableVec av = new AppendableVec("rownames"); @@ -745,8 +746,15 @@ private static class Slice extends MRTask2 { private static class DeepSlice extends MRTask2 { final int _cols[]; final long _rows[]; + final byte _isInt[]; boolean _ex = true; - DeepSlice( long rows[], int cols[]) { _cols=cols; _rows=rows;} + DeepSlice( long rows[], int cols[], Vec vecs[] ) { + _cols=cols; + _rows=rows; + _isInt = new byte[cols.length]; + for( int i=0; i { for( int i=0; i<_cols.length; i++ ) { Chunk oc = chks[_cols[i]]; NewChunk nc = nchks[ i ]; - if( oc._vec.isInt() ) { // Slice on integer columns + if( _isInt[_cols[i]] == 1 ) { // Slice on integer columns for( int j=rlo; j= 0 ) { // KV store has a better answer if( vthis == this ) return this; _min = vthis._min; _max = vthis._max; @@ -534,24 +535,23 @@ public Chunk chunkForChunkIdx(int cidx) { return c; } /** The Chunk for a row#. Warning: this loads the data locally! */ - public final Chunk chunkForRow(long i) { - return chunkForChunkIdx(elem2ChunkIdx(i)); - } + private Chunk chunkForRow_impl(long i) { return chunkForChunkIdx(elem2ChunkIdx(i)); } // Cache of last Chunk accessed via at/set api transient Chunk _cache; - private Chunk c(long i) { + /** The Chunk for a row#. Warning: this loads the data locally! */ + public final Chunk chunkForRow(long i) { Chunk c = _cache; - return (c != null && c._chk2==null && c._start <= i && i < c._start+c._len) ? c : (_cache = chunkForRow(i)); + return (c != null && c._chk2==null && c._start <= i && i < c._start+c._len) ? c : (_cache = chunkForRow_impl(i)); } /** Fetch element the slow way, as a long. Floating point values are * silently rounded to an integer. Throws if the value is missing. */ - public final long at8( long i ) { return c(i).at8(i); } + public final long at8( long i ) { return chunkForRow(i).at8(i); } /** Fetch element the slow way, as a double. Missing values are * returned as Double.NaN instead of throwing. */ - public final double at( long i ) { return c(i).at(i); } + public final double at( long i ) { return chunkForRow(i).at(i); } /** Fetch the missing-status the slow way. */ - public final boolean isNA(long row){ return c(row).isNA(row); } + public final boolean isNA(long row){ return chunkForRow(row).isNA(row); } /** Write element the slow way, as a long. There is no way to write a @@ -561,18 +561,18 @@ private Chunk c(long i) { * common compatible data representation. * * */ - public final long set( long i, long l) {return c(i).set(i,l);} + public final long set( long i, long l) {return chunkForRow(i).set(i,l);} /** Write element the slow way, as a double. Double.NaN will be treated as * a set of a missing element. * */ - public final double set( long i, double d) {return c(i).set(i,d);} + public final double set( long i, double d) {return chunkForRow(i).set(i,d);} /** Write element the slow way, as a float. Float.NaN will be treated as * a set of a missing element. * */ - public final float set( long i, float f) {return c(i).set(i,f);} + public final float set( long i, float f) {return chunkForRow(i).set(i,f);} /** Set the element as missing the slow way. */ - public final boolean setNA( long i ) { return c(i).setNA(i);} + public final boolean setNA( long i ) { return chunkForRow(i).setNA(i);} /** Pretty print the Vec: [#elems, min/mean/max]{chunks,...} */ @Override public String toString() {