From b45d4a5add7398e74bbedf6c27cba8148f2a44f5 Mon Sep 17 00:00:00 2001
From: Arno Candel <arno.candel@gmail.com>
Date: Sun, 12 Jan 2014 21:46:55 -0800
Subject: [PATCH] Minor cleanup.

---
 .../java/samples/NeuralNetMnistDrednet.java   | 16 +++----
 src/main/java/hex/Layer.java                  | 47 +------------------
 src/main/java/hex/NeuralNet.java              | 19 +++++---
 3 files changed, 21 insertions(+), 61 deletions(-)

diff --git a/h2o-samples/src/main/java/samples/NeuralNetMnistDrednet.java b/h2o-samples/src/main/java/samples/NeuralNetMnistDrednet.java
index 1c473ff50b..4d9835673a 100644
--- a/h2o-samples/src/main/java/samples/NeuralNetMnistDrednet.java
+++ b/h2o-samples/src/main/java/samples/NeuralNetMnistDrednet.java
@@ -22,13 +22,13 @@ public static void main(String[] args) throws Exception {
   @Override protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) {
     Layer[] ls = new Layer[5];
     ls[0] = new VecsInput(data, inputStats, 0.2);
-    ls[1] = new Layer.RectifierDropout(102);
-    ls[2] = new Layer.RectifierDropout(102);
-    ls[3] = new Layer.RectifierDropout(204);
+    ls[1] = new Layer.RectifierDropout(1024);
+    ls[2] = new Layer.RectifierDropout(1024);
+    ls[3] = new Layer.RectifierDropout(2048);
     ls[4] = new VecSoftmax(labels, outputStats, NeuralNet.NeuralNetParams.Loss.CrossEntropy);
 
     NeuralNet.NeuralNetParams p = new NeuralNet.NeuralNetParams();
-    p.rate = 0.1f;
+    p.rate = 0.01f;
     p.rate_annealing = 1e-6f;
     p.epochs = 1000;
     p.activation = NeuralNet.NeuralNetParams.Activation.RectifierWithDropout;
@@ -54,10 +54,10 @@ public static void main(String[] args) throws Exception {
   @Override protected void startTraining(Layer[] ls) {
     // Initial training on one thread to increase stability
     // If the net still produces NaNs, reduce learning rate //TODO: Automate this
-//    System.out.println("Initial single-threaded training");
-//    _trainer = new Trainer.Direct(ls, 0.1, self());
-//    _trainer.start();
-//    _trainer.join();
+    System.out.println("Initial single-threaded training");
+    _trainer = new Trainer.Direct(ls, 0.1, self());
+    _trainer.start();
+    _trainer.join();
 
     System.out.println("Main training");
 
diff --git a/src/main/java/hex/Layer.java b/src/main/java/hex/Layer.java
index 85b2b383cd..a1c88e98cc 100644
--- a/src/main/java/hex/Layer.java
+++ b/src/main/java/hex/Layer.java
@@ -210,24 +210,9 @@ final void bprop(int u, float g, float r, float m) {
     double r2 = 0;
     for( int i = 0; i < _previous._a.length; i++ ) {
       int w = u * _previous._a.length + i;
-
-//      if (Float.isInfinite(g * _w[w])) {
-//        System.out.println("g * w is inf: g = " + g + " w = " + _w[w]);
-//        //System.exit(0);
-//      }
-
       if( _previous._e != null )
         _previous._e[i] += g * _w[w];
       float d = g * _previous._a[i] - _w[w] * l2 - Math.signum(_w[w]) * l1;
-//      if (Math.abs(d) > 1e10) {
-//        System.out.println("d is getting large: d = " + d + " because g = " + g + " and a = " + _previous._a[i]);
-//        //System.exit(0);
-//      }
-
-//      if (Float.isInfinite(d)) {
-//        System.out.println("d is inf: g = " + g + " a = " + _previous._a[i]);
-//        //System.exit(0);
-//      }
 
       // TODO finish per-weight acceleration, doesn't help for now
 //      if( _wp != null && d != 0 ) {
@@ -251,19 +236,8 @@ final void bprop(int u, float g, float r, float m) {
         _wm[w] = d = _wm[w] + d;
       }
       _w[w] += r * d;
-//      if (Math.abs(_w[w]) > 1e10) {
-//        System.out.println("w is getting large: w = " + _w[w] + " because r = " + r + " and d = " + d);
-//        //System.exit(0);
-//      }
       r2 += _w[w] * _w[w];
     }
-//    if (r2 > 1e20) {
-//      System.out.println("r2 is getting large: r2 = " + r2);
-//    }
-//    if (Double.isInfinite(r2)) {
-//      System.out.println("r2 is inf.");
-//      //System.exit(0);
-//    }
     if( r2 > max_w2) { // C.f. Improving neural networks by preventing co-adaptation of feature detectors
       double scale = Math.sqrt(max_w2) / Math.sqrt(r2);
       for( int i = 0; i < _previous._a.length; i++ ) {
@@ -945,20 +919,9 @@ public Rectifier(int units) {
       for( int o = 0; o < _a.length; o++ ) {
         _a[o] = 0;
         if( !training || dropout == null || dropout.unit_active(o) ) {
-          for( int i = 0; i < _previous._a.length; i++ ) {
+          for( int i = 0; i < _previous._a.length; i++ )
             _a[o] += _w[o * _previous._a.length + i] * _previous._a[i];
-//            if (Math.abs(_a[o]) > 1e10) {
-//              System.out.println("a is getting large: previous a = " + _previous._a[i] + " weight = " + _w[o * _previous._a.length + i] );
-//              //System.exit(0);
-//            }
-          }
           _a[o] += _b[o];
-//          if (_a[o] > 1) {
-//            _a[o] = 1;
-//          }
-//          if (Float.isInfinite(_a[o])) {
-//            _a[o] = 1e10f;
-//          }
           if( _a[o] < 0 )
             _a[o] = 0;
           else if( !training && dropout != null )
@@ -975,14 +938,6 @@ else if( !training && dropout != null )
         //(d/dx)(max(0,x)) = 1 if x > 0, otherwise 0
         if( _a[u] > 0 ) { // don't use >=
           final float g = _e[u]; // * 1.0 (from derivative of rectifier)
-//          if (Float.isNaN(g)) {
-//            System.out.println("e is NaN");
-//            //System.exit(0);
-//          }
-//          if (Float.isInfinite(g)) {
-//            System.out.println("e is inf");
-//            //System.exit(0);
-//          }
           bprop(u, g, r, m);
         }
         // otherwise g = _e[u] * 0.0 = 0 and we don't allow other contributions by (and to) weights and momenta
diff --git a/src/main/java/hex/NeuralNet.java b/src/main/java/hex/NeuralNet.java
index 0a9d39039b..3cf2d37457 100644
--- a/src/main/java/hex/NeuralNet.java
+++ b/src/main/java/hex/NeuralNet.java
@@ -100,9 +100,11 @@ protected void registered(RequestServer.API_VERSION ver) {
             ) {
       arg.disable("Only with Uniform or Normal initial weight distributions", inputArgs);
     }
-    if( arg._name.equals("mode") && (H2O.CLOUD._memary.length > 1) ) {
-      mode = NeuralNetParams.ExecutionMode.MapReduce_Hogwild;
-      arg.disable("Using MapReduce since cluster size > 1.", inputArgs);
+    if( arg._name.equals("mode") ) {
+      if (H2O.CLOUD._memary.length > 1) {
+
+        arg.disable("Using MapReduce since cluster size > 1.", inputArgs);
+      }
     }
   }
 
@@ -340,7 +342,6 @@ void startTrain() {
           //validate continuously
           while(!cancelled() && running) {
             eval(valid, validResp);
-            Thread.sleep(2000);
           }
 
           // remove validation data
@@ -582,7 +583,7 @@ public static class NeuralNetModel extends Model {
     @API(help = "RMS error")
     public double[] rms_error;
 
-    public boolean unstable;
+    public boolean unstable = false;
 
     NeuralNetModel(Key selfKey, Key dataKey, Frame fr, Layer[] ls, NeuralNetParams p) {
       super(selfKey, dataKey, fr);
@@ -590,7 +591,7 @@ public static class NeuralNetModel extends Model {
       layers = ls;
       weights = new float[ls.length][];
       biases = new float[ls.length][];
-      for( int y = 1; y < layers.length-1; y++ ) {
+      for( int y = 1; y < layers.length; y++ ) {
         weights[y] = layers[y]._w;
         biases[y] = layers[y]._b;
       }
@@ -730,7 +731,10 @@ public static class NeuralNetProgress extends Progress2 {
         validation_errors = model.validation_errors;
         class_names = model.classNames();
         confusion_matrix = model.confusion_matrix;
-        if (model.unstable && job != null) job.cancel();
+        if (model.unstable && job != null) {
+          System.out.println("Aborting job due to instability. Try a smaller learning rate and/or single-node mode.");
+          job.cancel();
+        }
       }
       return super.serve();
     }
@@ -908,6 +912,7 @@ public NeuralNetScore() {
     @Override public boolean toHTML(StringBuilder sb) {
       DocGen.HTML.section(sb, "Classification error: " + String.format("%5.2f %%", 100 * classification_error));
       DocGen.HTML.section(sb, "Mean square error: " + mean_square_error);
+      DocGen.HTML.section(sb, "Mean cross entropy: " + cross_entropy);
       confusion(sb, "Confusion Matrix", response.domain(), confusion_matrix);
       return true;
     }