Skip to content

Commit 87a5201

Browse files
committed
Fix POJO generator
- missing semicolon in POJO generator - make file name and class name consistent - do not generate NAMES field if it is too long - Right now each string in NAMES field occupies 4bytes in the static initializer of class, so the limit is ~ 65536/(2*2*4) since we need to load all strings from NAMES array and ColInfo in static initializer. And static initializer influences the size of resulting class. - do not generate ColInfo if it is not necessary.
1 parent 3663a6b commit 87a5201

File tree

4 files changed

+21
-10
lines changed

4 files changed

+21
-10
lines changed

src/main/java/hex/gbm/GBM.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,8 @@ private GBMModel(GBMModel prior, Key[][] treeKeys, double[] errs, ConfusionMatri
143143
if(family == Family.bernoulli) {
144144
bodyCtxSB.i().p("// Compute Probabilities for Bernoulli 0-1 classifier").nl();
145145
bodyCtxSB.i().p("double fx = preds[1] + "+initialPrediction+";").nl();
146-
bodyCtxSB.i().p("preds[2] = 1.0f/(float)(1.0f+Math.exp(-fx))").nl();
147-
bodyCtxSB.i().p("preds[1] = 1.0f-preds[2]").nl();
146+
bodyCtxSB.i().p("preds[2] = 1.0f/(float)(1.0f+Math.exp(-fx));").nl();
147+
bodyCtxSB.i().p("preds[1] = 1.0f-preds[2];").nl();
148148
}
149149
else if (isClassifier()) {
150150
bodyCtxSB.i().p("// Compute Probabilities for classifier (scale via http://www.hongliangjie.com/2011/01/07/logsum/)").nl();

src/main/java/water/Model.java

+8-2
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,13 @@ protected SB toJavaSuper( SB sb ) {
656656

657657
return sb;
658658
}
659-
private SB toJavaNAMES( SB sb ) { return JCodeGen.toStaticVar(sb, "NAMES", _names, "Names of columns used by model."); }
659+
private SB toJavaNAMES( SB sb ) {
660+
//
661+
int limit = ((1<<16) /* Max size of class */ - 4 * 500 /* Free space for loading any static stuff */ ) / (4*2*2); // Static initialized needs 4 instructions to load String from constant pool + load ColInfo
662+
return _names.length < limit ?
663+
JCodeGen.toStaticVar(sb, "NAMES", _names, "Names of columns used by model.") :
664+
JCodeGen.toStaticVar(sb, "NAMES", JCodeGen.EMPTY_SA, "Names of columns used by model. WARNING: It is too large to be generated!");
665+
}
660666
protected SB toJavaNCLASSES( SB sb ) { return isClassifier() ? JCodeGen.toStaticVar(sb, "NCLASSES", nclasses(), "Number of output classes included in training data response column.") : sb; }
661667
private SB toJavaDOMAINS( SB sb, SB fileContextSB ) {
662668
sb.nl();
@@ -667,7 +673,7 @@ private SB toJavaDOMAINS( SB sb, SB fileContextSB ) {
667673
String[] dom = _domains[i];
668674
String colInfoClazz = "ColInfo_"+i;
669675
sb.i(1).p("/* ").p(_names[i]).p(" */ ");
670-
sb.p(colInfoClazz).p(".VALUES");
676+
if (dom != null) sb.p(colInfoClazz).p(".VALUES"); else sb.p("null");
671677
if (i!=_domains.length-1) sb.p(',');
672678
sb.nl();
673679
fileContextSB.i().p("// The class representing column ").p(_names[i]).nl();

src/main/java/water/api/SaveModel.java

+9-6
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
package water.api;
22

33
import java.io.*;
4+
45
import static water.util.FSUtils.isHdfs;
56
import static water.util.FSUtils.isS3N;
67

78
import java.io.File;
89
import java.io.IOException;
910

1011
import hex.glm.GLMModel;
12+
1113
import org.apache.hadoop.fs.FileSystem;
1214
import org.apache.hadoop.fs.Path;
1315

@@ -16,6 +18,7 @@
1618
import water.serial.Model2FileBinarySerializer;
1719
import water.serial.Model2HDFSBinarySerializer;
1820
import water.util.FSUtils;
21+
import water.util.JCodeGen;
1922

2023
public class SaveModel extends Func {
2124
static final int API_WEAVER = 1;
@@ -48,7 +51,7 @@ private void saveToLocalFS() {
4851
// Create folder
4952
parentDir.mkdirs();
5053
// Save parent model
51-
new Model2FileBinarySerializer().save(model, new File(parentDir, model._key.toString()));
54+
new Model2FileBinarySerializer().save(model, new File(parentDir, JCodeGen.toJavaId(model._key.toString())));
5255
// Write to model_names
5356
File model_names = new File(parentDir, "model_names");
5457
FileOutputStream is = new FileOutputStream(model_names);
@@ -61,8 +64,8 @@ private void saveToLocalFS() {
6164
Model[] models = getCrossValModels(model);
6265
System.out.println(models);
6366
for (Model m : models) {
64-
new Model2FileBinarySerializer().save(m, new File(parentDir, m._key.toString()));
65-
br.write(m._key.toString());
67+
new Model2FileBinarySerializer().save(m, new File(parentDir, JCodeGen.toJavaId(m._key.toString())));
68+
br.write(JCodeGen.toJavaId(m._key.toString()));
6669
br.newLine();
6770
}
6871
}
@@ -80,7 +83,7 @@ private void saveToHdfs() {
8083
if (force && fs.exists(parentDir)) fs.delete(parentDir);
8184
fs.mkdirs(parentDir);
8285
// Save parent model
83-
new Model2HDFSBinarySerializer(fs, force).save(model, new Path(parentDir, model._key.toString()));
86+
new Model2HDFSBinarySerializer(fs, force).save(model, new Path(parentDir, JCodeGen.toJavaId(model._key.toString())));
8487
// Save parent model key to model_names file
8588
Path model_names = new Path(parentDir, "model_names");
8689
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(fs.create(model_names,true)));
@@ -89,8 +92,8 @@ private void saveToHdfs() {
8992
if (save_cv) {
9093
Model[] models = getCrossValModels(model);
9194
for (Model m : models ) {
92-
new Model2HDFSBinarySerializer(fs, force).save(m, new Path(parentDir, m._key.toString()));
93-
br.write(m._key.toString());
95+
new Model2HDFSBinarySerializer(fs, force).save(m, new Path(parentDir, JCodeGen.toJavaId(m._key.toString())));
96+
br.write(JCodeGen.toJavaId(m._key.toString()));
9497
br.newLine();
9598
}
9699
}

src/main/java/water/util/JCodeGen.java

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
public class JCodeGen {
77

8+
public static final String[] EMPTY_SA = new String[] {} ;
9+
810
/** Generates data sample as a dedicated class with static <code>double[][]</code> member. */
911
public static SB toClass(SB sb, String classSig, String varname, Frame f, int nrows, String comment) {
1012
sb.p(classSig).p(" {").nl().ii(1);

0 commit comments

Comments
 (0)