Skip to content

Commit 745e4ad

Browse files
committed
Fix model save/load on multi-node (remove old Frame object from the model after reload), only work with Frame keys (train+validation).
1 parent 446539e commit 745e4ad

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

src/main/java/hex/deeplearning/DeepLearning.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -774,9 +774,9 @@ private void buildModel() {
774774
ignored_cols = previous.model_info().get_params().ignored_cols;
775775
Log.warn("Automatically re-using ignored_cols from the checkpointed model.");
776776
}
777-
if ((validation == null) == (previous.model_info().get_params().validation != null)
778-
|| (validation != null && validation._key != null && previous.model_info().get_params().validation._key != null
779-
&& !Arrays.equals(validation._key._kb, previous.model_info().get_params().validation._key._kb))) {
777+
if ((validation == null) == (previous._validationKey != null)
778+
|| (validation != null && validation._key != null && previous._validationKey != null
779+
&& !Arrays.equals(validation._key._kb, previous._validationKey._kb))) {
780780
throw new IllegalArgumentException("validation must be the same as for the checkpointed model.");
781781
}
782782
if (classification != previous.model_info().get_params().classification) {

src/main/java/hex/deeplearning/DeepLearningModel.java

+13-4
Original file line numberDiff line numberDiff line change
@@ -1250,6 +1250,16 @@ public double calcOutlierThreshold(Vec mse, double quantile) {
12501250
return qp.result;
12511251
}
12521252

1253+
@Override public ModelAutobufferSerializer getModelSerializer() {
1254+
// Return a serializer which knows how to serialize keys
1255+
return new ModelAutobufferSerializer() {
1256+
@Override protected AutoBuffer postLoad(Model m, AutoBuffer ab) {
1257+
Job.hygiene(((DeepLearningModel)m).get_params());
1258+
return ab;
1259+
}
1260+
};
1261+
}
1262+
12531263
public boolean generateHTML(String title, StringBuilder sb) {
12541264
if (_key == null) {
12551265
DocGen.HTML.title(sb, "No model yet");
@@ -1271,15 +1281,14 @@ public boolean generateHTML(String title, StringBuilder sb) {
12711281
(get_params().validation != null && DKV.get(get_params().validation._key) == null)) (Job.hygiene(get_params())).toHTML(sb);
12721282
else job().toHTML(sb);
12731283

1274-
final Key val_key = get_params().validation != null ? get_params().validation._key : null;
12751284
sb.append("<div class='alert'>Actions: "
12761285
+ (jobKey != null && UKV.get(jobKey) != null && Job.isRunning(jobKey) ? "<i class=\"icon-stop\"></i>" + Cancel.link(jobKey, "Stop training") + ", " : "")
12771286
+ Inspect2.link("Inspect training data (" + _dataKey + ")", _dataKey) + ", "
1278-
+ (val_key != null ? (Inspect2.link("Inspect validation data (" + val_key + ")", val_key) + ", ") : "")
1287+
+ (_validationKey != null ? (Inspect2.link("Inspect validation data (" + _validationKey + ")", _validationKey) + ", ") : "")
12791288
+ water.api.Predict.link(_key, "Score on dataset") + ", "
1280-
+ DeepLearning.link(_dataKey, "Compute new model", null, responseName(), val_key)
1289+
+ DeepLearning.link(_dataKey, "Compute new model", null, responseName(), _validationKey)
12811290
+ (actual_best_model_key != null && UKV.get(actual_best_model_key) != null && actual_best_model_key != _key ? ", " + DeepLearningModelView.link("Go to best model", actual_best_model_key) : "")
1282-
+ (jobKey == null || ((jobKey != null && UKV.get(jobKey) == null)) || (jobKey != null && UKV.get(jobKey) != null && Job.isEnded(jobKey)) ? ", <i class=\"icon-play\"></i>" + DeepLearning.link(_dataKey, "Continue training this model", _key, responseName(), val_key) : "") + ", "
1291+
+ (jobKey == null || ((jobKey != null && UKV.get(jobKey) == null)) || (jobKey != null && UKV.get(jobKey) != null && Job.isEnded(jobKey)) ? ", <i class=\"icon-play\"></i>" + DeepLearning.link(_dataKey, "Continue training this model", _key, responseName(), _validationKey) : "") + ", "
12831292
+ UIUtils.qlink(SaveModel.class, "model", _key, "Save model") + ", "
12841293
+ "</div>");
12851294

0 commit comments

Comments
 (0)