Skip to content

Commit da6b0e2

Browse files
committed
Added some print outs which will hopefully provide some useful info next time the mysterious Frame.split/rebalance exception hits.
1 parent 841ce57 commit da6b0e2

File tree

5 files changed

+137
-86
lines changed

5 files changed

+137
-86
lines changed

src/main/java/water/fvec/CXIChunk.java

+9-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* Sparse chunk.
1313
*/
1414
public class CXIChunk extends Chunk {
15+
protected transient int _sparseLen; // Number of elements in this chunk
1516
protected transient int _valsz; // byte size of stored value
1617
protected transient int _valsz_log; //
1718
protected transient int _ridsz; // byte size of stored (chunk-relative) row nums
@@ -29,17 +30,18 @@ protected CXIChunk(int len, int nzs, int valsz, byte [] buf){
2930
assert valsz == 0 || (1 << log) == valsz;
3031
_valsz = valsz;
3132
_valsz_log = log;
32-
3333
_ridsz = (len >= 65535)?4:2;
3434
UDP.set4(buf,0,len);
3535
byte b = (byte) _ridsz;
3636
buf[4] = b;
3737
buf[5] = (byte) _valsz;
3838
_mem = buf;
39+
_sparseLen = (_mem.length - OFF) / (_valsz + _ridsz);
40+
assert (_mem.length - OFF) % (_valsz + _ridsz) == 0:"unexpected mem.length in sparse chunk: mem.length = " + (_mem.length - OFF) + "val_sz = " + _valsz + ", rowId_sz = " + _ridsz;
3941
}
4042

4143
@Override public final boolean isSparse() {return true;}
42-
@Override public final int sparseLen(){return (_mem.length - OFF) / (_valsz + _ridsz);}
44+
@Override public final int sparseLen(){return _sparseLen;}
4345
@Override public final int nonzeros(int [] arr){
4446
int len = sparseLen();
4547
int off = OFF;
@@ -77,6 +79,9 @@ protected CXIChunk(int len, int nzs, int valsz, byte [] buf){
7779

7880
@Override boolean hasFloat () { return false; }
7981

82+
@Override public String toString(){
83+
return getClass().getSimpleName() + "( start = " + _start + ", len = " + _len + " sparseLen = " + _sparseLen + " valSz = " + _valsz + " rIdSz = " + _ridsz + ")";
84+
}
8085
@Override NewChunk inflate_impl(NewChunk nc) {
8186
final int slen = sparseLen();
8287
nc.set_sparseLen(slen);
@@ -156,6 +161,8 @@ protected final int findOffset(int idx) {
156161
_len = UDP.get4(_mem,0);
157162
_ridsz = _mem[4];
158163
_valsz = _mem[5];
164+
_sparseLen = (_mem.length - OFF) / (_valsz + _ridsz);
165+
assert (_mem.length - OFF) % (_valsz + _ridsz) == 0:"unexpected mem.length in sparse chunk: mem.length = " + (_mem.length - OFF) + "val_sz = " + _valsz + ", rowId_sz = " + _ridsz;
159166
int x = _valsz;
160167
int log = 0;
161168
while(x > 1){

src/main/java/water/fvec/Chunk.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,9 @@ protected int pformat_len0( double scale, int lg ) {
244244

245245

246246
@Override public Chunk clone() { return (Chunk)super.clone(); }
247-
@Override public String toString() { return getClass().getSimpleName(); }
247+
@Override public String toString() {
248+
return getClass().getSimpleName() + "(start = " + _start + ", len = " + _len + ")";
249+
}
248250

249251
public long byteSize() {
250252
long s= _mem == null ? 0 : _mem.length;

src/main/java/water/fvec/ChunkSplitter.java

+30-25
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.util.Iterator;
44

55
import water.Futures;
6+
import water.util.Log;
67

78
/** Helper to provide access to package
89
* hidden methods and attributes.
@@ -16,32 +17,36 @@ public static NewChunk resetLen(NewChunk nc) {
1617

1718
/** Extract portion of given chunk into given output chunk. */
1819
public static void extractChunkPart(Chunk ic, Chunk oc, int startRow, int nrows, Futures fs) {
19-
NewChunk dst = new NewChunk(oc);
20-
dst._len = dst._sparseLen = 0;
21-
NewChunk src = new NewChunk(ic);
22-
src = ic.inflate_impl(src);
23-
assert src._len == ic._len;
24-
// Iterate over values skip all 0
25-
int remain = nrows;
26-
Iterator<NewChunk.Value> it = src.values(startRow,startRow+nrows);
27-
int off = startRow-1;
28-
while(it.hasNext()) {
29-
NewChunk.Value v = it.next();
30-
final int rid = v.rowId0();
31-
assert rid < startRow+nrows;
32-
int add = rid - off; // number of values to add
33-
off = rid;
34-
dst.addZeros(add-1); // append (add-1) zeros
35-
v.add2Chunk(dst); // followed by a value
36-
remain -= add;
37-
assert remain >= 0;
38-
}
39-
// Handle case when last added value is followed by zeros till startRow+nrows
40-
dst.addZeros(remain);
41-
42-
assert dst._len == oc._len : "NewChunk.dst.len = " + dst._len + ", oc._len = " + oc._len;
43-
dst.close(dst.cidx(),fs);
20+
try {
21+
NewChunk dst = new NewChunk(oc);
22+
dst._len = dst._sparseLen = 0;
23+
NewChunk src = new NewChunk(ic);
24+
src = ic.inflate_impl(src);
25+
assert src._len == ic._len;
26+
// Iterate over values skip all 0
27+
int remain = nrows;
28+
Iterator<NewChunk.Value> it = src.values(startRow, startRow + nrows);
29+
int off = startRow - 1;
30+
while (it.hasNext()) {
31+
NewChunk.Value v = it.next();
32+
final int rid = v.rowId0();
33+
assert rid < startRow + nrows;
34+
int add = rid - off; // number of values to add
35+
off = rid;
36+
dst.addZeros(add - 1); // append (add-1) zeros
37+
v.add2Chunk(dst); // followed by a value
38+
remain -= add;
39+
assert remain >= 0;
40+
}
41+
// Handle case when last added value is followed by zeros till startRow+nrows
42+
dst.addZeros(remain);
4443

44+
assert dst._len == oc._len : "NewChunk.dst.len = " + dst._len + ", oc._len = " + oc._len;
45+
dst.close(dst.cidx(), fs);
46+
} catch(RuntimeException t){
47+
Log.err("gor exception in chunkSplitter, ic = " + ic + ", oc = " + oc + " startRow = " + startRow + " nrows = " + nrows);
48+
throw t;
49+
}
4550
return ;
4651
}
4752
}

src/main/java/water/fvec/NewChunk.java

+56-25
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.util.*;
44

55
import water.*;
6+
import water.util.Log;
67

78
// An uncompressed chunk of data, supporting an append operation
89
public class NewChunk extends Chunk {
@@ -77,31 +78,59 @@ public void add2Chunk(NewChunk c){
7778
}
7879

7980
public Iterator<Value> values(int fromIdx, int toIdx){
80-
final int lId, gId;
81-
final int to = Math.min(toIdx,_len);
82-
83-
if(sparse()){
84-
int x = Arrays.binarySearch(_id,0,_sparseLen,fromIdx);
85-
if(x < 0) x = -x -1;
86-
lId = x;
87-
gId = x == _sparseLen?_len:_id[x];
88-
} else
89-
lId = gId = fromIdx;
90-
final Value v = new Value(lId,gId);
91-
final Value next = new Value(lId,gId);
92-
return new Iterator<Value>(){
93-
@Override public final boolean hasNext(){return next._gId < to;}
94-
@Override public final Value next(){
95-
if(!hasNext())throw new NoSuchElementException();
96-
v._gId = next._gId; v._lId = next._lId;
97-
next._lId++;
98-
if(sparse()) next._gId = next._lId < _sparseLen?_id[next._lId]:_len;
99-
else next._gId++;
100-
return v;
101-
}
102-
@Override
103-
public void remove() {throw new UnsupportedOperationException();}
104-
};
81+
try {
82+
final int lId, gId;
83+
final int to = Math.min(toIdx, _len);
84+
85+
if (sparse()) {
86+
int x = Arrays.binarySearch(_id, 0, _sparseLen, fromIdx);
87+
if (x < 0) x = -x - 1;
88+
lId = x;
89+
gId = x == _sparseLen ? _len : _id[x];
90+
} else
91+
lId = gId = fromIdx;
92+
final Value v = new Value(lId, gId);
93+
final Value next = new Value(lId, gId);
94+
return new Iterator<Value>() {
95+
@Override
96+
public final boolean hasNext() {
97+
return next._gId < to;
98+
}
99+
100+
@Override
101+
public final Value next() {
102+
if (!hasNext()) throw new NoSuchElementException();
103+
v._gId = next._gId;
104+
v._lId = next._lId;
105+
next._lId++;
106+
if (sparse()) next._gId = next._lId < _sparseLen ? _id[next._lId] : _len;
107+
else next._gId++;
108+
return v;
109+
}
110+
111+
@Override
112+
public void remove() {
113+
throw new UnsupportedOperationException();
114+
}
115+
};
116+
}catch(RuntimeException t){
117+
try {
118+
StringBuilder sb = new StringBuilder("NewChunk: got exception during values() call, _len = " + _len + " _sparseLen = " + _sparseLen + " _isSparse = " + isSparse() + ", isDouble = " + (_ds != null) + "\n");
119+
// print first 10 elems
120+
for (int i = 0; i < Math.min(len(),10); ++i)
121+
sb.append(i + ": rowId = " + (_id == null ? i : _id[i]) + ", value = " + (_ds == null ? (_ds[i]) : (_ls[i] + " e" + _xs[i])) + "\n");
122+
// print last 10
123+
if(len() > 10) {
124+
sb.append("...");
125+
for(int i = Math.max(10,len()-10); i < len(); ++i)
126+
sb.append(i + ": rowId = " + (_id == null ? i : _id[i]) + ", value = " + (_ds == null ? (_ds[i]) : (_ls[i] + " e" + _xs[i])) + "\n");
127+
}
128+
Log.err(sb.toString());
129+
} catch(Throwable tt){
130+
Log.err(tt);
131+
} // just in case there is a bug in my printout, don't mask original exception!
132+
throw t;
133+
}
105134
}
106135

107136

@@ -467,6 +496,8 @@ protected void cancel_sparse(){
467496

468497
Chunk compress() {
469498
Chunk res = compress2();
499+
assert _len == res.len();
500+
assert !sparse() || !res.isSparse() || sparseLen() == res.sparseLen();
470501
// force everything to null after compress to free up the memory
471502
_id = null;
472503
_xs = null;

src/main/java/water/fvec/RebalanceDataSet.java

+39-33
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import water.H2O;
55
import water.Key;
66
import water.MRTask2;
7+
import water.util.Log;
78

89
import java.util.Arrays;
910
import java.util.Iterator;
@@ -98,41 +99,46 @@ public static class RebalanceTask extends MRTask2<RebalanceTask> {
9899
@Override public boolean logVerbose() { return false; }
99100

100101
private void rebalanceChunk(Vec srcVec, Chunk chk){
101-
NewChunk dst = new NewChunk(chk);
102-
dst._len = dst._sparseLen = 0;
103-
int rem = chk._len;
104-
while(rem > 0 && dst._len < chk._len){
105-
Chunk srcRaw = srcVec.chunkForRow(chk._start+dst._len);
106-
NewChunk src = new NewChunk((srcRaw));
107-
src = srcRaw.inflate_impl(src);
108-
assert src._len == srcRaw._len;
109-
int srcFrom = (int)(chk._start+dst._len - src._start);
110-
// check if the result is sparse (not exact since we only take subset of src in general)
111-
if((src.sparse() && dst.sparse()) || (src._len + dst._len < NewChunk.MIN_SPARSE_RATIO*(src._len + dst._len))){
112-
src.set_sparse(src._sparseLen);
113-
dst.set_sparse(dst._sparseLen);
102+
try {
103+
NewChunk dst = new NewChunk(chk);
104+
dst._len = dst._sparseLen = 0;
105+
int rem = chk._len;
106+
while (rem > 0 && dst._len < chk._len) {
107+
Chunk srcRaw = srcVec.chunkForRow(chk._start + dst._len);
108+
NewChunk src = new NewChunk((srcRaw));
109+
src = srcRaw.inflate_impl(src);
110+
assert src._len == srcRaw._len;
111+
int srcFrom = (int) (chk._start + dst._len - src._start);
112+
// check if the result is sparse (not exact since we only take subset of src in general)
113+
if ((src.sparse() && dst.sparse()) || (src._len + dst._len < NewChunk.MIN_SPARSE_RATIO * (src._len + dst._len))) {
114+
src.set_sparse(src._sparseLen);
115+
dst.set_sparse(dst._sparseLen);
116+
}
117+
final int srcTo = srcFrom + rem;
118+
int off = srcFrom - 1;
119+
Iterator<NewChunk.Value> it = src.values(Math.max(0, srcFrom), srcTo);
120+
while (it.hasNext()) {
121+
NewChunk.Value v = it.next();
122+
final int rid = v.rowId0();
123+
assert rid < srcTo;
124+
int add = rid - off;
125+
off = rid;
126+
dst.addZeros(add - 1);
127+
v.add2Chunk(dst);
128+
rem -= add;
129+
assert rem >= 0;
130+
}
131+
int trailingZeros = Math.min(rem, src._len - off - 1);
132+
dst.addZeros(trailingZeros);
133+
rem -= trailingZeros;
114134
}
115-
final int srcTo = srcFrom + rem;
116-
int off = srcFrom-1;
117-
Iterator<NewChunk.Value> it = src.values(Math.max(0,srcFrom),srcTo);
118-
while(it.hasNext()){
119-
NewChunk.Value v = it.next();
120-
final int rid = v.rowId0();
121-
assert rid < srcTo;
122-
int add = rid - off;
123-
off = rid;
124-
dst.addZeros(add-1);
125-
v.add2Chunk(dst);
126-
rem -= add;
127-
assert rem >= 0;
128-
}
129-
int trailingZeros = Math.min(rem,src._len - off -1);
130-
dst.addZeros(trailingZeros);
131-
rem -= trailingZeros;
135+
assert rem == 0 : "rem = " + rem;
136+
assert dst._len == chk._len : "len2 = " + dst._len + ", _len = " + chk._len;
137+
dst.close(dst.cidx(), _fs);
138+
} catch(RuntimeException t){
139+
Log.err("got exception while rebalancing chunk " + chk);
140+
throw t;
132141
}
133-
assert rem == 0:"rem = " + rem;
134-
assert dst._len == chk._len:"len2 = " + dst._len + ", _len = " + chk._len;
135-
dst.close(dst.cidx(),_fs);
136142
}
137143
@Override public void map(Chunk [] chks){
138144
for(int i = 0; i < chks.length; ++i)

0 commit comments

Comments
 (0)