Skip to content

Commit

Permalink
Modified parser to handle multiple keys (value arrays).
Browse files Browse the repository at this point in the history
All keys are parsed into single value array. Basically parses each file separately in a loop, with extra step to put information together after pass 1( to compute global compression scheme) and to set correct output chunks in pass2.

Also, added tests and utility method Value.isBitIdentical(Value).
  • Loading branch information
tomasnykodym committed Mar 26, 2013
1 parent 81474ae commit c0bd274
Show file tree
Hide file tree
Showing 22 changed files with 3,746 additions and 67 deletions.
2 changes: 2 additions & 0 deletions py/junit.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,15 @@ def testAll(self):
'water.AtomicTest',
'water.AutoSerialTest',
'water.KVTest',
'water.BitCmpTest',
'water.KeyToString',
'water.api.RStringTest',
'water.exec.ExprTest',
'water.parser.DatasetCornerCasesTest',
'water.parser.ParseProgressTest',
'water.parser.ParserTest',
'water.parser.RReaderTest',
'water.parser.ParseFolderTest'
])

rc = ps.wait(None)
Expand Down
1 change: 1 addition & 0 deletions py/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def test_G_Slower_JUNIT(self):
'water.ConcurrentKeyTest',
'hex.MinorityClassTest',
'water.exec.RBigDataTest',
'water.parser.ParseFolderTestBig'
])
rc = ps.wait(None)
out = file(stdout).read()
Expand Down
11 changes: 11 additions & 0 deletions smalldata/parse_folder_test/prostate_0.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"ID","CAPSULE","AGE","RACE","DPROS","DCAPS","PSA","VOL","GLEASON"
1,0,65,"R2",2,1,1.4,0,6
2,0,72,"R2",3,2,6.7,0,7
3,0,70,"R2",1,2,4.9,0,6
4,0,76,"R3",2,1,51.2,20,7
5,0,69,"R2",1,1,12.3,55.9,6
6,1,71,"R2",3,2,3.3,0,8
7,0,68,"R3",4,2,31.9,0,7
8,0,61,"R3",4,2,66.7,27.2,7
9,0,69,"R2",1,1,3.9,24,7
10,0,68,"R3",1,2,13,0,6
10 changes: 10 additions & 0 deletions smalldata/parse_folder_test/prostate_1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
11,1,68,"R3",4,2,4,0,7
12,1,72,"R2",2,2,21.2,0,7
13,1,72,"R2",4,2,22.7,0,9
14,1,65,"R2",4,2,39,0,7
15,0,75,"R2",1,1,7.5,0,5
16,0,73,"R2",2,1,2.6,0,5
17,0,75,"R3",1,1,2.5,0,5
18,0,70,"R2",2,1,2.6,11.8,5
19,0,54,"R2",1,2,2.8,0,6
20,1,67,"R3",3,2,8.6,25.5,7
10 changes: 10 additions & 0 deletions smalldata/parse_folder_test/prostate_2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
21,1,58,"R2",2,1,3.1,0,7
22,1,70,"R1",4,1,67.1,0,7
23,1,74,"R2",3,1,12.7,27.5,7
24,0,73,"R2",1,1,12.3,47.3,7
25,1,77,"R2",1,1,61.1,58,7
26,0,77,"R2",1,1,8.8,0,5
27,0,67,"R2",2,1,2.8,25.6,7
28,0,73,"R2",3,1,2.9,14.1,5
29,0,64,"R2",3,1,5.6,34.1,5
30,0,58,"R2",4,1,5.6,22.8,6
50 changes: 50 additions & 0 deletions smalldata/parse_folder_test/prostate_3.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
31,1,54,"R2",3,1,8.4,18.3,6
32,0,72,"R2",2,1,6.5,22.5,7
33,0,77,"R2",4,1,11,0,8
34,1,60,"R2",3,2,9.5,0,7
35,0,65,"R2",2,1,11.1,17.7,6
36,0,71,"R2",2,1,7.3,20,7
37,0,54,"R2",2,1,1,0,6
38,1,78,"R2",1,2,27.2,0,8
39,1,63,"R2",2,1,35.1,18.7,7
40,1,73,"R2",3,1,4.5,26.4,7
41,1,66,"R2",3,1,7.9,20.8,7
42,0,71,"R2",1,1,2,0,6
43,1,71,"R2",2,1,7.5,0,6
44,0,72,"R2",2,1,5.3,0,7
45,1,65,"R3",3,1,83.7,32,9
46,1,75,"R1",3,2,33,15.2,8
47,0,69,"R3",2,1,6.7,59.8,6
48,0,68,"R2",2,1,12.3,16.3,8
49,0,70,"R2",2,1,0.4,17.1,5
50,0,67,"R2",2,1,5.8,14.5,6
51,0,76,"R2",1,1,7.6,3.7,6
52,1,72,"R2",4,2,124,38.6,8
53,0,66,"R2",3,1,8.8,39.9,6
54,1,61,"R2",3,1,67.6,0,7
55,0,70,"R2",3,1,13.9,13,7
56,1,57,"R2",3,1,7.4,18.3,7
57,0,74,"R2",2,1,23.1,5.9,7
58,1,70,"R2",3,1,19.3,0,7
59,0,60,"R2",3,1,3,16.5,6
60,0,58,"R2",3,1,3.7,29.9,6
61,0,59,"R2",2,1,0.7,96,5
62,1,71,"R2",2,1,6,31,6
63,0,76,"R2",2,1,9.5,14.4,7
64,0,66,"R2",3,1,2.6,0,7
65,1,59,"R2",4,1,30.7,0,7
66,1,65,"R2",2,1,10.8,0,7
67,0,54,"R2",2,1,10.5,0,6
68,0,78,"R2",1,1,6.5,0,7
69,0,65,"R2",2,1,1.3,6.8,5
70,1,68,"R3",3,1,9.6,32,6
71,0,68,"R2",2,1,0.3,0,6
72,0,71,"R2",3,1,8.3,17.5,5
73,0,60,"R2",3,1,3.2,0,7
74,0,65,"R2",3,1,6.9,23.3,5
75,0,68,"R2",1,1,11,0,7
76,0,54,"R3",1,1,64.3,0,7
77,0,73,"R2",3,1,1.6,17.1,6
78,1,62,"R2",2,1,1.9,0,6
79,0,60,"R2",2,1,7.9,0,5
80,1,66,"R2",1,1,25.7,39.1,9
50 changes: 50 additions & 0 deletions smalldata/parse_folder_test/prostate_4.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
81,0,76,"R2",3,1,4.9,0,6
82,0,62,"R2",1,1,22.1,0,7
83,0,74,"R2",2,1,31.5,0,7
84,0,75,"R2",2,1,11,35,7
85,0,75,"R2",1,1,9.9,15.4,7
86,1,75,"R2",3,1,3.7,0,6
87,1,68,"R2",4,2,51.3,47,9
88,1,71,"R2",2,2,89,24,8
89,0,68,"R2",4,1,17.1,35,9
90,0,70,"R2",3,1,12.3,10.3,7
91,0,68,"R2",1,1,4.4,39,6
92,0,66,"R2",1,1,8,0,5
93,1,70,"R2",3,1,15,0,7
94,1,65,"R2",3,2,35.8,29,9
95,0,71,"R2",1,1,13.4,44.2,7
96,1,75,"R2",1,1,16,18.7,7
97,0,70,"R2",3,1,11.2,0,7
98,0,58,"R2",1,1,7,0,6
99,0,64,"R2",1,1,29.1,0,6
100,1,66,"R2",3,1,9.5,28.1,7
101,0,64,"R2",2,1,6.1,0,6
102,1,72,"R2",3,1,6.3,34,7
103,0,62,"R2",1,1,2.8,44,6
104,0,75,"R2",3,1,25.7,87.6,5
105,1,68,"R2",2,1,5.7,0,7
106,0,56,"R2",3,2,2.7,37,6
107,1,69,"R2",1,1,6,0,7
108,0,67,"R2",3,1,40.4,0,7
109,0,66,"R2",3,1,13.2,23.6,6
110,1,69,"R2",3,1,15.2,0,7
111,0,74,"R2",1,1,8.8,0,7
112,1,79,"R2",3,1,7.8,0,6
113,1,65,"R2",3,1,6.9,4.6,7
114,0,71,"R2",2,1,17.2,65.6,8
115,1,57,"R2",3,2,64,0,8
116,1,47,"R2",1,1,28,0,9
117,1,66,"R3",4,1,45.3,0,6
118,0,64,"R2",2,1,32.8,22.6,6
119,0,74,"R2",2,1,3.2,44.8,6
120,0,56,"R2",2,1,5.6,20.2,8
121,1,77,"R2",1,1,16.4,0,7
122,0,66,"R2",1,1,6.8,54.5,5
123,0,67,"R3",4,1,25.2,21.7,7
124,1,70,"R2",2,1,12.5,0,9
125,1,70,"R2",4,1,3.6,21.7,7
126,0,65,"R2",2,1,11,16,6
127,0,63,"R2",2,1,8.1,20.3,6
128,0,59,"R2",2,1,2.3,0,5
129,0,75,"R2",2,1,1.4,0,6
130,0,70,"R2",1,1,6.7,29.4,6
50 changes: 50 additions & 0 deletions smalldata/parse_folder_test/prostate_5.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
131,0,73,"R2",1,1,1,0,5
132,0,72,"R2",2,1,4.5,29.9,8
133,0,67,"R2",2,1,6.2,0,5
134,1,60,"R2",4,1,4.6,26.3,7
135,0,66,"R2",1,1,8.9,43.5,7
136,0,63,"R2",2,1,8.2,17.8,6
137,0,68,"R2",2,1,5,0,6
138,1,66,"R2",4,1,91.9,0,8
139,1,69,"R2",4,2,94,0,7
140,0,67,"R2",1,1,13.4,0,7
141,0,72,"R2",1,1,9.1,0,6
142,1,56,"R3",2,1,41.9,0,8
143,0,74,"R2",2,1,21.6,52,7
144,1,66,"R2",4,2,37.7,0,8
145,0,61,"R2",1,1,9.4,34.6,6
146,0,62,"R3",1,1,9.1,33.1,6
147,1,68,"R2",4,2,55.6,0,8
148,0,63,"R2",3,1,3.6,38,6
149,0,55,"R2",4,1,6.2,29.2,6
150,0,70,"R2",2,1,6.3,0,7
151,0,65,"R3",2,1,4.6,0,5
152,1,66,"R2",1,1,4.9,16.1,6
153,0,68,"R2",1,1,2.6,12.8,6
154,1,55,"R2",2,1,16.3,21.2,7
155,1,55,"R2",2,1,4.39,0,6
156,0,76,"R2",2,1,8.9,87.3,5
157,1,58,"R2",2,1,21.2,26.8,7
158,1,75,"R2",2,1,7.9,0,7
159,1,70,"R2",1,1,44.4,0,6
160,1,59,"R2",2,1,16.1,23,7
161,1,63,"R3",2,1,26,21.3,7
162,1,62,"R2",3,1,8.8,0,6
163,1,58,"R2",2,1,20.6,0,7
164,1,62,"R2",3,1,26.7,0,6
165,0,69,"R2",3,1,3.5,0,5
166,1,62,"R2",2,2,14.8,22.2,7
167,1,70,"R2",2,1,11.9,35.5,6
168,0,76,"R2",4,1,5.7,40,6
169,1,64,"R2",4,1,2.4,0,6
170,1,73,"R2",2,1,42.8,0,8
171,1,74,"R2",3,1,7,0,6
172,1,71,"R2",3,1,3.3,0,6
173,1,60,"R2",4,1,7.3,0,7
174,1,62,"R2",2,1,17.2,0,7
175,0,71,"R2",2,1,3.8,19,6
176,0,67,"R2",3,1,5.7,15.4,6
177,1,68,"R2",4,1,31.6,18,7
178,0,69,"R2",1,1,5.4,37.3,6
179,0,67,"R2",1,1,15,35.1,7
180,1,70,"R2",2,1,22,0,8
50 changes: 50 additions & 0 deletions smalldata/parse_folder_test/prostate_6.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
181,1,68,"R2",3,2,23.4,0,7
182,1,70,"R2",1,1,51.9,20.1,8
183,1,70,"R2",1,1,20.4,35,7
184,1,60,"R2",2,1,18.7,23.4,7
185,1,61,"R2",3,1,12.7,33.4,7
186,1,57,"R2",3,1,20.1,30.3,8
187,1,68,"R2",1,1,85.4,10,7
188,0,75,"R3",2,1,9.3,23.2,6
189,1,69,"R2",3,2,8,31.2,6
190,0,64,"R2",4,1,7.5,11.4,6
191,0,62,"R2",3,1,5,0,6
192,0,61,"R2",4,1,61.6,21.2,6
193,0,59,"R2",1,1,8.5,38.3,5
194,0,71,"R2",1,1,10,15.09,6
195,1,72,"R2",2,1,12.7,0,6
196,1,73,"R2",3,1,12.3,30.1,8
197,0,66,"R3",4,1,11,0,5
198,1,69,"R2",3,1,17.7,21,5
199,0,69,"R2",2,1,3.9,0,6
200,0,70,"R2",3,1,5,21.6,7
201,0,57,"R2",1,1,10.2,0,6
202,1,68,"R2",3,1,19.2,0,8
203,1,61,"R2",2,1,2.9,0,6
204,0,71,"R2",2,1,1.7,0,6
205,0,62,"R2",2,1,9,0,6
206,1,76,"R2",4,2,11.7,28.5,7
207,0,65,"R2",1,1,8,54,6
208,0,53,"R2",2,1,9.9,27,5
209,0,65,"R2",2,1,14.7,15,7
210,0,57,"R2",3,1,1.5,0,5
211,0,62,"R2",2,1,13.7,33.4,5
212,1,68,"R2",2,1,8.6,0,8
213,1,66,"R2",4,1,45.8,0,8
214,0,50,"R2",3,1,1.8,8.7,5
215,0,67,"R2",2,1,8,20.7,7
216,0,56,"R2",1,1,5,29.6,6
217,1,69,"R2",4,1,53.9,0,6
218,1,68,"R2",4,2,18.8,27.3,9
219,0,68,"R2",3,1,2.4,20.8,6
220,0,74,"R2",2,1,1.2,21.6,6
221,1,51,"R2",2,1,7.4,0,6
222,1,69,"R2",3,1,38,32.8,7
223,0,65,"R2",1,1,9.4,38.4,5
224,1,58,"R2",3,1,3.1,0,7
225,0,71,"R2",1,1,1.29,0,7
226,0,68,"R2",2,1,12.7,0,7
227,0,56,"R2",1,1,7.3,15.7,6
228,0,59,"R2",4,1,3.8,13.8,6
229,0,67,"R2",1,1,10.2,0,7
230,1,54,"R2",3,1,7,29,6
50 changes: 50 additions & 0 deletions smalldata/parse_folder_test/prostate_7.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
231,1,69,"R3",3,1,24.1,24.5,7
232,1,54,"R2",3,1,9.4,36.8,6
233,1,68,"R2",3,2,25,20,7
234,1,59,"R3",4,1,126,20.3,8
235,1,53,"R3",2,1,16.8,16.9,7
236,1,68,"R2",2,1,8.2,38.2,6
237,0,73,"R2",2,1,3.1,21.9,6
238,0,66,"R2",2,1,11,36.6,6
239,0,71,"R2",4,1,4.9,15.2,6
240,0,69,"R3",1,1,15.6,17.9,6
241,1,68,"R2",1,1,6.7,16.7,6
242,1,74,"R2",3,1,8.7,20.4,6
243,1,58,"R3",4,2,20.7,0,8
244,0,67,"R2",1,1,2.1,0,6
245,0,70,"R2",1,1,8,0,7
246,1,70,"R3",4,1,13.2,25.6,7
247,0,66,"R2",1,1,2.3,0,5
248,1,57,"R3",1,1,63.3,73.4,7
249,0,62,"R2",1,1,13.7,0,5
250,0,50,"R2",3,1,13.2,0,7
251,0,61,"R2",2,2,8.4,0,6
252,0,43,"R1",2,1,3.8,21,6
253,1,74,"R2",2,1,17.7,0,7
254,0,78,"R2",2,1,5.9,32.5,6
255,0,65,"R2",2,1,22,60,7
256,1,57,"R2",2,2,15.1,0,9
257,0,71,"R2",4,2,25.1,14.9,7
258,0,70,"R2",1,1,24.8,16.1,6
259,1,55,"R2",2,1,6.7,0,6
260,0,71,"R2",1,1,11.3,32,6
261,0,63,"R2",2,1,17.5,31.2,5
262,1,62,"R2",2,1,8.3,0,6
263,1,71,"R2",2,1,4.8,0,7
264,0,59,"R2",2,1,1.7,17.2,6
265,1,64,"R2",3,1,7.5,13.7,7
266,0,67,"R2",3,1,20,17.5,5
267,1,65,"R2",3,1,3.4,14.3,7
268,0,62,"R2",2,1,10.6,42.4,5
269,1,65,"R2",3,1,18,19.3,7
270,1,71,"R2",4,1,18.7,21,5
271,0,61,"R2",1,1,5.4,27.5,6
272,0,59,"R2",1,1,4.6,29.6,5
273,1,60,"R2",2,1,28.5,0,9
274,1,59,"R2",1,1,11.2,0,6
275,1,70,"R2",1,1,8.9,24,6
276,1,70,"R2",2,1,1.7,33.8,6
277,0,59,"R2",2,1,1.4,0,6
278,1,70,"R2",2,1,8.4,21.8,5
279,0,54,"R2",3,1,8.6,0,5
280,0,64,"R2",2,1,10.9,24.8,6
50 changes: 50 additions & 0 deletions smalldata/parse_folder_test/prostate_8.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
281,0,62,"R2",3,1,20.1,0,7
282,0,65,"R2",1,1,7.3,0,0
283,0,66,"R3",2,1,8,0,5
284,0,71,"R2",1,1,2,0,5
285,0,61,"R2",2,1,13.1,0,7
286,1,58,"R2",3,1,5.4,55,6
287,1,76,"R2",2,1,4.1,0,8
288,1,53,"R2",3,1,6.5,0,7
289,0,65,"R2",1,1,6.1,28.9,7
290,0,69,"R2",1,1,6.2,26.3,5
291,0,64,"R2",3,1,3.8,19.1,5
292,1,61,"R2",1,1,6.8,24.6,5
293,0,62,"R2",2,1,9.5,26.6,6
294,1,63,"R2",4,1,20.4,0,7
295,0,61,"R2",3,1,4.8,0,6
296,1,63,"R2",4,1,9.8,17.5,8
297,1,73,"R2",4,2,7.9,0,8
298,0,72,"R2",1,1,22.1,0,5
299,0,52,"R2",2,1,5.5,0,5
300,0,73,"R2",1,1,17.4,14.2,5
301,0,70,"R2",4,1,10,73.8,5
302,0,70,"R2",2,1,5.8,20,6
303,1,67,"R2",4,2,135,0,7
304,0,64,"R2",2,1,8.5,0,7
305,0,51,"R2",2,1,13,0,6
306,0,64,"R2",1,1,7.2,10.9,6
307,0,68,"R2",3,1,11.8,0,5
308,1,66,"R2",3,1,4.8,8.8,7
309,0,69,"R2",1,1,14.3,67.1,4
310,0,67,"R2",3,1,18.1,0,8
311,0,78,"R2",1,1,5.2,29.1,5
312,1,75,"R2",4,1,9.9,16.3,5
313,1,59,"R2",3,2,12.9,0,6
314,0,64,"R2",1,1,22,0,5
315,1,74,"R2",3,1,9,41.8,7
316,1,73,"R2",4,1,14,0,7
317,1,57,"R3",3,1,7.8,38.9,7
318,0,71,"R2",1,1,4.8,14,7
319,1,66,"R2",2,1,58.6,0,7
320,0,64,"R2",2,1,2,0,6
321,0,64,"R2",3,1,8.7,17.2,7
322,1,62,"R2",4,1,4.6,0,6
323,0,63,"R2",3,1,0.7,18.6,5
324,1,64,"R2",4,1,24.1,0,6
325,0,70,"R2",1,1,5.3,73.7,5
326,0,69,"R2",1,1,6.3,0,5
327,1,75,"R2",1,1,4.8,26.3,7
328,0,52,"R2",2,1,2.2,11.5,6
329,0,62,"R2",2,1,7.4,0,6
330,0,71,"R2",2,1,4.6,48.7,5
Loading

0 comments on commit c0bd274

Please sign in to comment.