Skip to content

Commit

Permalink
Parse Time
Browse files Browse the repository at this point in the history
Ok, still only a tiny subset of time parses.
Includes a notion of how time was parsed & "timeness" in Vecs.
Parse looks for a consistent time parse, and failing that NA's the col
Inspect2 will display time with the same pattern used to parse time.
Adds in the JODA library, used for time shinannegans, plus some test
files.
  • Loading branch information
cliffclick committed Feb 22, 2014
1 parent 93e5c6e commit 843ef46
Show file tree
Hide file tree
Showing 12 changed files with 252 additions and 201 deletions.
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ JAR_ROOT=lib

# additional dependencies, relative to this file, but all dependencies should be
# inside the JAR_ROOT tree so that they are packed to the jar file properly
DEPENDENCIES="${JAR_ROOT}/jama/*${SEP}${JAR_ROOT}/apache/*${SEP}${JAR_ROOT}/junit/*${SEP}${JAR_ROOT}/gson/*${SEP}${JAR_ROOT}/javassist.jar${SEP}${JAR_ROOT}/poi/*${SEP}${JAR_ROOT}/s3/*${SEP}${JAR_ROOT}/jets3t/*${SEP}${JAR_ROOT}/log4j/*${SEP}${JAR_ROOT}/mockito/*${SEP}${JAR_ROOT}/jogamp/*"
DEPENDENCIES="${JAR_ROOT}/jama/*${SEP}${JAR_ROOT}/apache/*${SEP}${JAR_ROOT}/junit/*${SEP}${JAR_ROOT}/gson/*${SEP}${JAR_ROOT}/javassist.jar${SEP}${JAR_ROOT}/poi/*${SEP}${JAR_ROOT}/s3/*${SEP}${JAR_ROOT}/jets3t/*${SEP}${JAR_ROOT}/log4j/*${SEP}${JAR_ROOT}/joda/*${SEP}${JAR_ROOT}/mockito/*${SEP}${JAR_ROOT}/jogamp/*"

DEFAULT_HADOOP_VERSION="cdh3"
OUTDIR="target"
Expand Down
Binary file added lib/joda/joda-time-2.3.jar
Binary file not shown.
6 changes: 3 additions & 3 deletions prj.el
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
(jde-set-variables
'(jde-javadoc-gen-destination-directory "./doc")
'(jde-run-working-directory "$DESK/h2o")
'(jde-run-option-classpath (quote ("./target/classes" "./lib/javassist.jar" "./lib/hadoop/cdh4/hadoop-common.jar" "./lib/hadoop/cdh4/hadoop-auth.jar" "./lib/hadoop/cdh4/slf4j-api-1.6.1.jar" "./lib/hadoop/cdh4/slf4j-nop-1.6.1.jar" "./lib/hadoop/cdh4/hadoop-hdfs.jar" "./lib/hadoop/cdh4/protobuf-java-2.4.0a.jar" "./lib/apache/commons-codec-1.4.jar" "./lib/apache/commons-configuration-1.6.jar" "./lib/apache/commons-lang-2.4.jar" "./lib/apache/commons-logging-1.1.1.jar" "./lib/apache/httpclient-4.1.1.jar" "./lib/apache/httpcore-4.1.jar" "./lib/junit/junit-4.11.jar" "./lib/apache/guava-12.0.1.jar" "./lib/gson/gson-2.2.2.jar" "./lib/poi/poi-3.8-20120326.jar" "./lib/poi/poi-ooxml-3.8-20120326.jar" "./lib/poi/poi-ooxml-schemas-3.8-20120326.jar" "./lib/poi/dom4j-1.6.1.jar" "./lib/Jama/Jama.jar" "./lib/s3/aws-java-sdk-1.3.27.jar" "./lib/log4j/log4j-1.2.15.jar")))
'(jde-run-option-classpath (quote ("./target/classes" "./lib/javassist.jar" "./lib/hadoop/cdh4/hadoop-common.jar" "./lib/hadoop/cdh4/hadoop-auth.jar" "./lib/hadoop/cdh4/slf4j-api-1.6.1.jar" "./lib/hadoop/cdh4/slf4j-nop-1.6.1.jar" "./lib/hadoop/cdh4/hadoop-hdfs.jar" "./lib/hadoop/cdh4/protobuf-java-2.4.0a.jar" "./lib/apache/commons-codec-1.4.jar" "./lib/apache/commons-configuration-1.6.jar" "./lib/apache/commons-lang-2.4.jar" "./lib/apache/commons-logging-1.1.1.jar" "./lib/apache/httpclient-4.1.1.jar" "./lib/apache/httpcore-4.1.jar" "./lib/junit/junit-4.11.jar" "./lib/apache/guava-12.0.1.jar" "./lib/gson/gson-2.2.2.jar" "./lib/poi/poi-3.8-20120326.jar" "./lib/poi/poi-ooxml-3.8-20120326.jar" "./lib/poi/poi-ooxml-schemas-3.8-20120326.jar" "./lib/poi/dom4j-1.6.1.jar" "./lib/Jama/Jama.jar" "./lib/s3/aws-java-sdk-1.3.27.jar" "./lib/log4j/log4j-1.2.15.jar" "./lib/joda/joda-time-2.3.jar")))
'(jde-run-executable-args nil)
'(jde-run-option-debug nil)
'(jde-run-option-vm-args (quote ("-XX:+PrintGC")))
'(jde-compile-option-directory "./target/classes")
'(jde-run-option-application-args (quote ("-beta" "-mainClass" "org.junit.runner.JUnitCore" "water.exec.DdplyTest")))
'(jde-run-option-application-args (quote ("-beta" "-mainClass" "org.junit.runner.JUnitCore" "water.fvec.ParseTime")))
'(jde-debugger (quote ("JDEbug")))
'(jde-compile-option-source (quote ("1.6")))
'(jde-compile-option-classpath (quote ("./target/classes" "./lib/javassist.jar" "./lib/hadoop/cdh4/hadoop-common.jar" "./lib/hadoop/cdh4/hadoop-auth.jar" "./lib/hadoop/cdh4/slf4j-api-1.6.1.jar" "./lib/hadoop/cdh4/slf4j-nop-1.6.1.jar" "./lib/hadoop/cdh4/hadoop-hdfs.jar" "./lib/hadoop/cdh4/protobuf-java-2.4.0a.jar" "./lib/apache/commons-codec-1.4.jar" "./lib/apache/commons-configuration-1.6.jar" "./lib/apache/commons-lang-2.4.jar" "./lib/apache/commons-logging-1.1.1.jar" "./lib/apache/httpclient-4.1.1.jar" "./lib/apache/httpcore-4.1.jar" "./lib/junit/junit-4.11.jar" "./lib/apache/guava-12.0.1.jar" "./lib/gson/gson-2.2.2.jar" "./lib/poi/poi-3.8-20120326.jar" "./lib/poi/poi-ooxml-3.8-20120326.jar" "./lib/poi/poi-ooxml-schemas-3.8-20120326.jar" "./lib/poi/dom4j-1.6.1.jar" "./lib/Jama/Jama.jar" "./lib/s3/aws-java-sdk-1.3.27.jar" "./lib/log4j/log4j-1.2.15.jar")))
'(jde-compile-option-classpath (quote ("./target/classes" "./lib/javassist.jar" "./lib/hadoop/cdh4/hadoop-common.jar" "./lib/hadoop/cdh4/hadoop-auth.jar" "./lib/hadoop/cdh4/slf4j-api-1.6.1.jar" "./lib/hadoop/cdh4/slf4j-nop-1.6.1.jar" "./lib/hadoop/cdh4/hadoop-hdfs.jar" "./lib/hadoop/cdh4/protobuf-java-2.4.0a.jar" "./lib/apache/commons-codec-1.4.jar" "./lib/apache/commons-configuration-1.6.jar" "./lib/apache/commons-lang-2.4.jar" "./lib/apache/commons-logging-1.1.1.jar" "./lib/apache/httpclient-4.1.1.jar" "./lib/apache/httpcore-4.1.jar" "./lib/junit/junit-4.11.jar" "./lib/apache/guava-12.0.1.jar" "./lib/gson/gson-2.2.2.jar" "./lib/poi/poi-3.8-20120326.jar" "./lib/poi/poi-ooxml-3.8-20120326.jar" "./lib/poi/poi-ooxml-schemas-3.8-20120326.jar" "./lib/poi/dom4j-1.6.1.jar" "./lib/Jama/Jama.jar" "./lib/s3/aws-java-sdk-1.3.27.jar" "./lib/log4j/log4j-1.2.15.jar" "./lib/joda/joda-time-2.3.jar")))
'(jde-db-option-classpath (quote ("$DESK/Dropbox/Sris and Cliff/H2O/classes")))
'(jde-run-option-enable-assertions "Everywhere")
'(jde-compile-option-sourcepath (quote ("./src")))
Expand Down
20 changes: 20 additions & 0 deletions smalldata/test/test_time.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
user,sku,category,query,click_time,query_time
000000df17cd56a5df4a94074e133c9d4739fae3,2125233,abcat0101001,"Televisiones Panasonic 50 pulgadas","2011-09-01 23:44:52.533","2011-09-01 23:43:59.752"
000001928162247ffaf63185cd8b2a244c78e7c6,2009324,abcat0101001,Sharp,"2011-09-05 12:25:37.42","2011-09-05 12:25:01.187"
000017f79c2b5da56721f22f9fdd726b13daf8e8,1517163,pcmcat193100050014,nook,"2011-08-24 12:56:58.91","2011-08-24 12:55:13.012"
000017f79c2b5da56721f22f9fdd726b13daf8e8,2877125,abcat0101001,rca,"2011-10-25 07:18:14.722","2011-10-25 07:16:51.759"
000017f79c2b5da56721f22f9fdd726b13daf8e8,2877134,abcat0101005,rca,"2011-10-25 07:19:51.697","2011-10-25 07:16:51.759"
00001ff8394a2d9bd7adffb1547180bf5bbfc4e5,2416092,pcmcat143200050016,"Flat screen tvs","2011-09-07 15:54:47.956","2011-09-07 15:53:24.353"
000027e2f86b55c901882a9d47ac7feec964825d,3108172,pcmcat247400050001,macbook,"2011-09-25 11:07:02.603","2011-09-25 11:05:26.996"
0000323223cd657f7e063ddb83f32b332d46e920,2264036,pcmcat171900050028,"Blue tooth headphones","2011-09-23 12:40:20.871","2011-09-23 12:40:14.845"
00003cb3f85244c652f22c1daf11aed35d5ab7f6,8280834,abcat0107004,"Tv antenna","2011-08-29 13:37:32.903","2011-08-29 13:33:23.249"
0000404374364b75c80d384c2a8c1d379237a2d9,2740208,pcmcat186100050006,"memory card","2011-10-25 22:55:58.683","2011-10-25 22:54:45.926"
0000517733c9766b4eeef84f2eb8720b3af3233f,2584273,pcmcat138100050040,"AC power cord","2011-09-11 12:48:44.139","2011-09-11 12:46:18.466"
00006d8bb4bcc66f1676237629968277fb7a8dc5,1230537,pcmcat201900050009,"Zagg iPhone","2011-10-18 17:21:33.919","2011-10-18 17:21:26.57"
00007557d9b11f2f1a99792317963d30174171ba,3168067,cat02713,"Watch The Throne","2011-09-04 10:55:20.427","2011-09-04 10:55:10.874"
0000776d7bf35b984ca8e3671327a7ac1d07a86c,7997055,pcmcat224000050003,"Remote control extender","2011-10-28 16:26:29.203","2011-10-28 16:26:20.358"
000086ad8ef4acaa7b1624b47d981fc2f7bc8af9,2009041,pcmcat233600050006,Camcorder,"2011-10-10 00:35:26.858","2011-10-10 00:34:30.708"
0000870469b85f38ceba4b1add61419eb8da9dc5,1988047,abcat0707001,3ds,"2011-09-23 22:14:08.965","2011-09-23 22:13:37.43"
0000aed826178a7929389c0c3d68bd0124c3dec1,1686079,abcat0410020,hoya,"2011-09-10 12:04:53.645","2011-09-10 12:04:30.805"
0000bf4a5fc3c0fc9faaef83b1048927962ef6ab,3770439,pcmcat144700050004,"wireless headphones","2011-10-23 11:44:35.074","2011-10-23 11:43:27.11"
0000bf4a5fc3c0fc9faaef83b1048927962ef6ab,2602403,pcmcat144700050004,"wireless headphones","2011-10-23 11:45:24.416","2011-10-23 11:43:27.11"
28 changes: 19 additions & 9 deletions src/main/java/water/api/Inspect2.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import hex.gbm.GBM;
import hex.glm.GLM2;
import hex.nn.NN;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import water.*;
import water.api.Inspect2.ColSummary.ColType;
import water.fvec.*;
Expand All @@ -29,12 +32,12 @@ public class Inspect2 extends Request2 {

// An internal JSON-output-only class
static class ColSummary extends Iced {
public static enum ColType { Enum, Int, Real };
public static enum ColType { Enum, Int, Real, Time };
static final int API_WEAVER=1; // This file has auto-gen'd doc & json fields
static public DocGen.FieldDoc[] DOC_FIELDS; // Initialized from Auto-Gen code.
public ColSummary( String name, Vec vec ) {
this.name = name;
this.type = vec.isEnum() ? ColType.Enum : vec.isInt() ? ColType.Int : ColType.Real;
this.type = vec.isEnum() ? ColType.Enum : vec.isInt() ? (vec.isTime() ? ColType.Time : ColType.Int) : ColType.Real;
this.min = vec.isEnum() ? Double.NaN : vec.min();
this.max = vec.isEnum() ? Double.NaN : vec.max();
this.mean = vec.isEnum() ? Double.NaN : vec.mean();
Expand Down Expand Up @@ -88,7 +91,7 @@ public static Response redirect(Request req, String src_key) {
naCnt += cols[i].naCnt;
enumCol |= cols[i].type == ColType.Enum;
}

Vec svecs[] = src_key.vecs();

DocGen.HTML.title(sb,skey.toString());
DocGen.HTML.section(sb,""+String.format("%,d",numCols)+" columns, "+String.format("%,d",numRows)+" rows, "+
Expand Down Expand Up @@ -142,19 +145,19 @@ public static Response redirect(Request req, String src_key) {
sb.append("<tr class='warning'>");
sb.append("<td>").append("Min").append("</td>");
for( int i=0; i<cols.length; i++ )
sb.append("<td>").append(cols[i].type==ColType.Enum ? NA : x1(src_key.vecs()[i],-1,cols[i].min)).append("</td>");
sb.append("<td>").append(cols[i].type==ColType.Enum ? NA : x1(svecs[i],-1,cols[i].min)).append("</td>");
sb.append("</tr>");

sb.append("<tr class='warning'>");
sb.append("<td>").append("Max").append("</td>");
for( int i=0; i<cols.length; i++ )
sb.append("<td>").append(cols[i].type==ColType.Enum ? NA :x1(src_key.vecs()[i],-1,cols[i].max)).append("</td>");
sb.append("<td>").append(cols[i].type==ColType.Enum ? NA : x1(svecs[i],-1,cols[i].max)).append("</td>");
sb.append("</tr>");

sb.append("<tr class='warning'>");
sb.append("<td>").append("Mean").append("</td>");
for( int i=0; i<cols.length; i++ )
sb.append("<td>").append(cols[i].type==ColType.Enum ? NA : String.format("%5.3g",cols[i].mean)).append("</td>");
sb.append("<td>").append(cols[i].type==ColType.Enum ? NA : x1(svecs[i],-1,cols[i].mean)).append("</td>");
sb.append("</tr>");

// Cardinality row is shown only if dataset contains enum-column
Expand All @@ -180,7 +183,7 @@ public static Response redirect(Request req, String src_key) {
// An extra row holding vec's compressed bytesize
sb.append("<td>").append("Size").append("</td>");
for( int i=0; i<cols.length; i++ )
sb.append("<td>").append(PrettyPrint.bytes(src_key.vecs()[i].byteSize())).append("</td>");
sb.append("<td>").append(PrettyPrint.bytes(svecs[i].byteSize())).append("</td>");
sb.append("</tr>");

// All Vecs within a frame are compatible, so just read the
Expand All @@ -194,7 +197,7 @@ public static Response redirect(Request req, String src_key) {
.append(", ").append(c0.chunk2StartElem(j)).append("</td>");
for( int i=0; i<cols.length; i++ ) {
// Report chunk-type (compression scheme)
String clazz = src_key.vecs()[i].elem2BV(j).getClass().getSimpleName();
String clazz = svecs[i].elem2BV(j).getClass().getSimpleName();
String trim = clazz.replaceAll("Chunk","");
sb.append("<td>").append(trim).append("</td>");
}
Expand All @@ -208,7 +211,7 @@ public static Response redirect(Request req, String src_key) {
sb.append("<tr id='row_"+String.valueOf(offset+j)+"'>"); // Row header
sb.append("<td>").append(offset+j).append("</td>");
for( int i=0; i<cols.length; i++ ) // Columns w/in row
sb.append("<td>").append(x0(src_key.vecs()[i],offset+j)).append("</td>");
sb.append("<td>").append(x0(svecs[i],offset+j)).append("</td>");
sb.append("</tr>");
}
}
Expand All @@ -227,6 +230,13 @@ public static String x1( Vec v, long row, double d ) {
if( (row >= 0 && v.isNA(row)) || Double.isNaN(d) )
return "-"; // Display of missing elements
if( v.isEnum() ) return row >= 0 ? v.domain(v.at8(row)) : Long.toString((long)d);
if( v.isTime() ) {
String tpat = v.timeParse();
DateTime dt = new DateTime(row >= 0 ? v.at8(row) : (long)d);
DateTimeFormatter fmt = DateTimeFormat.forPattern(tpat);
String str = fmt.print(dt);
return str;
}
if( v.isInt() ) return Long.toString(row >= 0 ? v.at8(row) : (long)d);
Chunk c = v.elem2BV(0);
Class Cc = c.getClass();
Expand Down
51 changes: 37 additions & 14 deletions src/main/java/water/fvec/AppendableVec.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import water.*;
import water.fvec.Vec;
import water.util.Utils;
import java.util.Arrays;

/**
Expand All @@ -17,12 +18,14 @@
*/
public class AppendableVec extends Vec {
long _espc[];
public static final byte NUMBER = 3;
public static final byte ENUM = 2;
public static final byte NA = 1;
public static final byte ENUM = 2;
public static final byte NUMBER = 4;
public static final byte TIME = 8;
byte [] _chunkTypes;
long _missingCnt;
long _naCnt;
long _strCnt;
final long _timCnt[] = new long[Utils.TIME_PARSE.length];
long _totalCnt;

public AppendableVec( String keyName ) {
Expand All @@ -47,15 +50,16 @@ synchronized void closeChunk( NewChunk chk) {
}
_espc[cidx] = chk._len2;
_chunkTypes[cidx] = chk.type();
_missingCnt += chk._naCnt;
_naCnt += chk._naCnt;
_strCnt += chk._strCnt;
for( int i=0; i<_timCnt.length; i++ ) _timCnt[i] += chk._timCnt[i];
_totalCnt += chk._len2;
}

// What kind of data did we find? NA's? Strings-only? Floats or Ints?
boolean shouldBeEnum() {
// TODO: we declare column to be string/enum only if it does not have ANY numbers in it.
if( _strCnt > 0 && (_strCnt + _missingCnt) == _totalCnt ) return true;
if( _strCnt > 0 && (_strCnt + _naCnt) == _totalCnt ) return true;
return false;
}

Expand All @@ -79,8 +83,9 @@ public void reduce( AppendableVec nv ) {
_espc[i] = e1[i];
_chunkTypes[i] |= t1[i];
}
_missingCnt += nv._missingCnt;
_naCnt += nv._naCnt;
_strCnt += nv._strCnt;
Utils.add(_timCnt,nv._timCnt);
_totalCnt += nv._totalCnt;
}

Expand All @@ -92,17 +97,34 @@ public Vec close(Futures fs) {
int nchunk = _espc.length;
while( nchunk > 0 && _espc[nchunk-1] == 0 ) nchunk--;
DKV.remove(chunkKey(nchunk)); // remove potential trailing key
boolean hasNumber = false, hasEnum = false;
for(int i = 0; i < nchunk; ++i)
if(_chunkTypes[i] == NUMBER){
hasNumber = true;
} else if(_chunkTypes[i] == ENUM)
hasEnum = true;
if(hasNumber && hasEnum){ // number wins, we need to go through the enum chunks and declare them all NAs (chunk is considered enum iff it has only enums + possibly some nas)
boolean hasNumber = false, hasEnum = false, hasTime=false;
for( int i = 0; i < nchunk; ++i ) {
if( (_chunkTypes[i] & TIME ) != 0 ) { hasNumber = true; hasTime=true; }
if( (_chunkTypes[i] & NUMBER) != 0 ) hasNumber = true;
if( (_chunkTypes[i] & ENUM ) != 0 ) hasEnum = true;
}
// number wins, we need to go through the enum chunks and declare them all
// NAs (chunk is considered enum iff it has only enums + possibly some nas)
if( hasNumber && hasEnum ) {
for(int i = 0; i < nchunk; ++i)
if(_chunkTypes[i] == ENUM)
DKV.put(chunkKey(i), new C0DChunk(Double.NaN, (int)_espc[i]),fs);
}
// Make sure time is consistent
int t = -1;
if( hasTime ) {
// Find common time parse, and all zeros - or inconsistent time parse
for( int i=0; i<_timCnt.length; i++ )
if( _timCnt[i] != 0 )
if( t== -1 ) t=i; // common time parse
else t = -2; // inconsistent parse
if( t < 0 ) // blow off time parse
for(int i = 0; i < nchunk; ++i)
if(_chunkTypes[i] == TIME)
DKV.put(chunkKey(i), new C0DChunk(Double.NaN, (int)_espc[i]),fs);

}
assert t<0 || _domain == null;

// Compute elems-per-chunk.
// Roll-up elem counts, so espc[i] is the starting element# of chunk i.
Expand All @@ -118,7 +140,8 @@ public Vec close(Futures fs) {
// Replacement plain Vec for AppendableVec.
Vec vec = new Vec(_key, espc);
vec._domain = _domain;
DKV.put(_key,vec,fs); // Inject the header
vec._time = (byte)t; // Time parse, if any
DKV.put(_key,vec,fs); // Inject the header
return vec;
}

Expand Down
15 changes: 10 additions & 5 deletions src/main/java/water/fvec/NewChunk.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public class NewChunk extends Chunk {
int _naCnt=-1; // Count of NA's appended
int _strCnt; // Count of Enum's appended
int _nzCnt; // Count of non-zero's appended
final int _timCnt[] = new int[water.util.Utils.TIME_PARSE.length]; // Count of successful time parses

public NewChunk( Vec vec, int cidx ) { _vec = vec; _cidx = cidx; }

Expand Down Expand Up @@ -65,11 +66,14 @@ public byte type() {
_nzCnt=nzs; _strCnt=ss; _naCnt=nas;
}
// Now run heuristic for type
if(_naCnt == _len2)
if(_naCnt == _len2) // All NAs ==> NA Chunk
return AppendableVec.NA;
if(_strCnt > 0 && _strCnt + _naCnt == _len2)
return AppendableVec.ENUM;
return AppendableVec.NUMBER;
return AppendableVec.ENUM; // All are Strings+NAs ==> Enum Chunk
// Larger of time & numbers
int timCnt=0; for( int t : _timCnt ) timCnt+=t;
int nums = _len2-_naCnt-timCnt;
return timCnt >= nums ? AppendableVec.TIME : AppendableVec.NUMBER;
}
protected final boolean isNA(int idx) {
return (_ds == null) ? (_ls[idx] == 0 && _xs[idx] == Integer.MIN_VALUE) : Double.isNaN(_ds[idx]);
Expand Down Expand Up @@ -185,11 +189,12 @@ Chunk compress() {
byte mode = type();
if( mode==AppendableVec.NA ) // ALL NAs, nothing to do
return new C0DChunk(Double.NaN,_len);
boolean rerun=false;
for( int i=0; i<_len; i++ )
if( mode==AppendableVec.ENUM && !isEnum(i) ||
mode==AppendableVec.NUMBER && isEnum(i) )
setNA_impl(i);
_naCnt = -1; type(); // Re-run rollups after dropping all numbers/enums
{ setNA_impl(i); rerun = true; } // Smack any mismatched string/numbers
if( rerun ) { _naCnt = -1; type(); } // Re-run rollups after dropping all numbers/enums

// If the data was set8 as doubles, we do a quick check to see if it's
// plain longs. If not, we give up and use doubles.
Expand Down
11 changes: 8 additions & 3 deletions src/main/java/water/fvec/ParseDataset2.java
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ public FVecDataOut reduce(StreamDataOut sdout){
}
protected long linenum(){return _nLines;}
@Override public void addNumCol(int colIdx, long number, int exp) {
if(colIdx < _nCols)_nvs[_col = colIdx].addNum(number,exp);
if( colIdx < _nCols ) _nvs[_col = colIdx].addNum(number,exp);
// else System.err.println("Additional column ("+ _nvs.length + " < " + colIdx + ":" + number + "," + exp + ") on line " + linenum());
}

Expand All @@ -647,8 +647,13 @@ public FVecDataOut reduce(StreamDataOut sdout){
_ctypes[colIdx] = TCOL;
if(_ctypes[colIdx] == TCOL){
long l = Utils.attemptTimeParse(str);
if(l > 0)addNumCol(colIdx, l, 0);
else addInvalidCol(colIdx);
if( l == Long.MIN_VALUE ) addInvalidCol(colIdx);
else {
int time_pat = Utils.decodePat(l); // Get time pattern
l = Utils.decodeTime(l); // Get time
addNumCol(colIdx, l, 0); // Record time in msec
_nvs[_col]._timCnt[time_pat]++; // Count histo of time parse patterns
}
} else if(!_enums[_col = colIdx].isKilled()) {
// store enum id into exponent, so that it will be interpreted as NA if compressing as numcol.
int id = _enums[colIdx].addKey(str);
Expand Down
Loading

0 comments on commit 843ef46

Please sign in to comment.