Skip to content

Commit

Permalink
Fix parsing test/test_quote.cvs
Browse files Browse the repository at this point in the history
During parser-guessing, fields with an initial quoted section, followed
by more text (still in the same field) will not directly cause a
bailout.  Also, when the parser-setup guess fails, have the parser
politely throw a IllegalArgumentException (of a zero-column CSV) instead
of an assertion fail.
  • Loading branch information
cliffclick committed Nov 27, 2013
1 parent 4593884 commit 62ad646
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 6 deletions.
7 changes: 7 additions & 0 deletions smalldata/test/test_quote.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
0E0eE1e,0bE1a01,ebaaEee,eE0100b,ebEee01,aE0E1EE,0eaa1Ea,10E0Ee0,aE1e0eb,ba0Ebae,0
a0a0aeE,aee11Eb,beeaabb,a1bEeE1,E1aE0e0,EE1b0e1,abb1aea,E0ebE1a,1ae1baa,ebe1eba,0
1e0abe',bE1aaeE,eea1aE',E0bbe1b,'1'0eE',a0bebbb,1abbE1e,E1a0ee0,0'0aeaa,a'EE0ea,1
eeeb1E',1aE'eEa,1b'1eeE,'01e0aE,Eeb0a01,a1a1abb,b0b'eEb,b100''e,Ee0aeea,ee'Ee0b,0
Ebe1'ea,01ee''e,'1'b0e1,e01Eeba,0'aeaeb,b1eb0E0,E0'bea',b1a'E1e,E0'0eEa,EeEEEbe,0
E0''0ee,eea'0bb,1E0E0'a,E'bebe0,0e1ba10,'eEaaEa,000EeEa,0E01'Ee,''1bEEE,E0'aab',0
011ba1',Eba0b'a,Eb1eabe,11'00b1,E'e1E0',0bE00EE,ebbbb1E,ebebae',e'a0'ee,aa11E'b,0
3 changes: 1 addition & 2 deletions src/main/java/water/fvec/ParseDataset2.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.Map.Entry;
import java.util.zip.*;

import jsr166y.CountedCompleter;
import water.*;
import water.H2O.H2OCallback;
import water.H2O.H2OCountedCompleter;
import water.fvec.Vec.VectorGroup;
import water.nbhm.NonBlockingHashMap;
Expand All @@ -35,6 +33,7 @@ public static Frame parse(Key okey, Key [] keys, CustomParser.ParserSetup global
byte [] bits = v.elem2BV(0).getBytes();
Compression cpr = Utils.guessCompressionMethod(bits);
globalSetup = ParseDataset.guessSetup(Utils.unzipBytes(bits,cpr), globalSetup,true)._setup;
if( globalSetup._ncols == 0 ) throw new java.lang.IllegalArgumentException(globalSetup.toString());
return forkParseDataset(okey, keys, globalSetup, delete_on_done).get();
}
// Same parse, as a backgroundable Job
Expand Down
1 change: 0 additions & 1 deletion src/main/java/water/parser/CsvParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,6 @@ private static String[] determineTokens(String from, byte separator) {
continue;
}
quotes = 0;
break;
} else if ((quotes == 0) && ((c == separator) || (c == CHAR_CR) || (c == CHAR_LF))) {
break;
} else {
Expand Down
8 changes: 5 additions & 3 deletions src/test/java/water/TestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -364,9 +364,11 @@ public static Frame parseFrame(Key okey, File file) {
if(okey == null)
okey = Key.make(file.getName());
Key fkey = NFSFileVec.make(file);
Frame fr = ParseDataset2.parse(okey, new Key[] { fkey });
UKV.remove(fkey);
return fr;
try {
return ParseDataset2.parse(okey, new Key[] { fkey });
} finally {
UKV.remove(fkey);
}
}

public static Frame frame(String[] names, double[]... rows) {
Expand Down
11 changes: 11 additions & 0 deletions src/test/java/water/fvec/ParserTest2.java
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,17 @@ String[] getDataForSeparator(char placeholder, char sep, String[] data) {
// Key r1 = Key.make("single_quotes_test");
// ParseDataset2.parse(r1, new Key[]{k},gSetup);
// }
@Test public void testSingleQuotes() {
Key k = Key.make("q.hex");
try {
Frame fr = TestUtil.parseFrame(k,"smalldata/test/test_quote.csv");
Assert.assertEquals(fr.numCols(),11);
Assert.assertEquals(fr.numRows(), 7);
} finally {
UKV.remove(k);
}
}

@Test public void testSVMLight() {
String[] data = new String[] {
"1 2:.2 5:.5 9:.9\n",
Expand Down

0 comments on commit 62ad646

Please sign in to comment.