Skip to content

Commit 6f83067

Browse files
committed
Moved RebalanceDataset correctly to water/main/java instead of /water/test/...
Added RebalanceDatasetTest testing basic rebalance dataset functionality (just rebalance prostate.hex (380 lines) into 300 chunks and check values of all rows.
1 parent 3a8040e commit 6f83067

File tree

2 files changed

+44
-5
lines changed

2 files changed

+44
-5
lines changed

src/test/java/water/fvec/RebalanceDataSet.java src/main/java/water/fvec/RebalanceDataSet.java

+3-5
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,12 @@ public class RebalanceDataSet extends H2O.H2OCountedCompleter {
2222
Frame _out;
2323
final Key _jobKey;
2424

25-
public RebalanceDataSet(H2O.H2OCountedCompleter cmp, Job job, Frame srcFrame, Key dstKey, int nchunks){
25+
public RebalanceDataSet(Frame srcFrame, Key dstKey, int nchunks) { this(srcFrame, dstKey,nchunks,null,null);}
26+
public RebalanceDataSet(Frame srcFrame, Key dstKey, int nchunks, H2O.H2OCountedCompleter cmp, Key jobKey){
2627
super(cmp);
2728
_in = srcFrame;
2829
_nchunks = nchunks;
29-
_jobKey = job == null?null:job.self();
30+
_jobKey = jobKey;
3031
_okey = dstKey;
3132
}
3233

@@ -84,17 +85,14 @@ public static class RebalanceTask extends MRTask2<RebalanceTask> {
8485
int rem = chk._len;
8586
while(rem > 0 && dst._len2 < chk._len){
8687
Chunk srcRaw = _srcVec.chunkForRow(chk._start+dst._len2);
87-
if(srcRaw == null)System.out.println("missing chunk for row " + chk._start+dst._len2);
8888
NewChunk src = new NewChunk((srcRaw));
8989
src = srcRaw.inflate_impl(src);
9090
assert src._len2 == srcRaw._len;
9191
int srcFrom = (int)(chk._start+dst._len2 - src._start);
92-
boolean sparse = false;
9392
// check if the result is sparse (not exact since we only take subset of src in general)
9493
if((src.sparse() && dst.sparse()) || (src._len + dst._len < NewChunk.MIN_SPARSE_RATIO*(src._len2 + dst._len2))){
9594
src.set_sparse(src._len);
9695
dst.set_sparse(dst._len);
97-
sparse = true;
9896
}
9997
final int srcTo = srcFrom + rem;
10098
int off = srcFrom-1;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package water.fvec;
2+
3+
import org.junit.Test;
4+
import water.H2O;
5+
import water.Key;
6+
import water.TestUtil;
7+
import water.UKV;
8+
9+
import java.io.File;
10+
import static org.junit.Assert.assertEquals;
11+
import static org.junit.Assert.assertTrue;
12+
13+
/**
14+
* Created by tomasnykodym on 4/1/14.
15+
*/
16+
public class RebalanceDatasetTest extends TestUtil {
17+
@Test public void testProstate(){
18+
Key hex = Key.make("p.hex");
19+
Key rebalancedKey = Key.make("rebalanced");
20+
Key raw = NFSFileVec.make(new File("smalldata/logreg/prostate.csv"));
21+
try{
22+
ParseDataset2.parse(hex, new Key[]{raw});
23+
Frame fr = UKV.get(hex);
24+
RebalanceDataSet rb = new RebalanceDataSet(fr,rebalancedKey,300);
25+
H2O.submitTask(rb);
26+
rb.join();
27+
Frame rebalanced = UKV.get(rebalancedKey);
28+
assertEquals(rebalanced.numRows(),fr.numRows());
29+
assertEquals(rebalanced.anyVec()._espc.length,301);
30+
for(long l = 0; l < fr.numRows(); ++l)
31+
for(int i = 0; i < fr.numCols(); ++i)
32+
assertEquals(fr.vecs()[i].at(l),rebalanced.vecs()[i].at(l),1e-8);
33+
} finally{
34+
Frame fr = UKV.get(hex);
35+
fr.delete();
36+
fr = UKV.get(rebalancedKey);
37+
fr.delete();
38+
}
39+
checkLeakedKeys();
40+
}
41+
}

0 commit comments

Comments
 (0)