From b45d4a5add7398e74bbedf6c27cba8148f2a44f5 Mon Sep 17 00:00:00 2001 From: Arno Candel Date: Sun, 12 Jan 2014 21:46:55 -0800 Subject: [PATCH] Minor cleanup. --- .../java/samples/NeuralNetMnistDrednet.java | 16 +++---- src/main/java/hex/Layer.java | 47 +------------------ src/main/java/hex/NeuralNet.java | 19 +++++--- 3 files changed, 21 insertions(+), 61 deletions(-) diff --git a/h2o-samples/src/main/java/samples/NeuralNetMnistDrednet.java b/h2o-samples/src/main/java/samples/NeuralNetMnistDrednet.java index 1c473ff50b..4d9835673a 100644 --- a/h2o-samples/src/main/java/samples/NeuralNetMnistDrednet.java +++ b/h2o-samples/src/main/java/samples/NeuralNetMnistDrednet.java @@ -22,13 +22,13 @@ public static void main(String[] args) throws Exception { @Override protected Layer[] build(Vec[] data, Vec labels, VecsInput inputStats, VecSoftmax outputStats) { Layer[] ls = new Layer[5]; ls[0] = new VecsInput(data, inputStats, 0.2); - ls[1] = new Layer.RectifierDropout(102); - ls[2] = new Layer.RectifierDropout(102); - ls[3] = new Layer.RectifierDropout(204); + ls[1] = new Layer.RectifierDropout(1024); + ls[2] = new Layer.RectifierDropout(1024); + ls[3] = new Layer.RectifierDropout(2048); ls[4] = new VecSoftmax(labels, outputStats, NeuralNet.NeuralNetParams.Loss.CrossEntropy); NeuralNet.NeuralNetParams p = new NeuralNet.NeuralNetParams(); - p.rate = 0.1f; + p.rate = 0.01f; p.rate_annealing = 1e-6f; p.epochs = 1000; p.activation = NeuralNet.NeuralNetParams.Activation.RectifierWithDropout; @@ -54,10 +54,10 @@ public static void main(String[] args) throws Exception { @Override protected void startTraining(Layer[] ls) { // Initial training on one thread to increase stability // If the net still produces NaNs, reduce learning rate //TODO: Automate this -// System.out.println("Initial single-threaded training"); -// _trainer = new Trainer.Direct(ls, 0.1, self()); -// _trainer.start(); -// _trainer.join(); + System.out.println("Initial single-threaded training"); + _trainer = new Trainer.Direct(ls, 0.1, self()); + _trainer.start(); + _trainer.join(); System.out.println("Main training"); diff --git a/src/main/java/hex/Layer.java b/src/main/java/hex/Layer.java index 85b2b383cd..a1c88e98cc 100644 --- a/src/main/java/hex/Layer.java +++ b/src/main/java/hex/Layer.java @@ -210,24 +210,9 @@ final void bprop(int u, float g, float r, float m) { double r2 = 0; for( int i = 0; i < _previous._a.length; i++ ) { int w = u * _previous._a.length + i; - -// if (Float.isInfinite(g * _w[w])) { -// System.out.println("g * w is inf: g = " + g + " w = " + _w[w]); -// //System.exit(0); -// } - if( _previous._e != null ) _previous._e[i] += g * _w[w]; float d = g * _previous._a[i] - _w[w] * l2 - Math.signum(_w[w]) * l1; -// if (Math.abs(d) > 1e10) { -// System.out.println("d is getting large: d = " + d + " because g = " + g + " and a = " + _previous._a[i]); -// //System.exit(0); -// } - -// if (Float.isInfinite(d)) { -// System.out.println("d is inf: g = " + g + " a = " + _previous._a[i]); -// //System.exit(0); -// } // TODO finish per-weight acceleration, doesn't help for now // if( _wp != null && d != 0 ) { @@ -251,19 +236,8 @@ final void bprop(int u, float g, float r, float m) { _wm[w] = d = _wm[w] + d; } _w[w] += r * d; -// if (Math.abs(_w[w]) > 1e10) { -// System.out.println("w is getting large: w = " + _w[w] + " because r = " + r + " and d = " + d); -// //System.exit(0); -// } r2 += _w[w] * _w[w]; } -// if (r2 > 1e20) { -// System.out.println("r2 is getting large: r2 = " + r2); -// } -// if (Double.isInfinite(r2)) { -// System.out.println("r2 is inf."); -// //System.exit(0); -// } if( r2 > max_w2) { // C.f. Improving neural networks by preventing co-adaptation of feature detectors double scale = Math.sqrt(max_w2) / Math.sqrt(r2); for( int i = 0; i < _previous._a.length; i++ ) { @@ -945,20 +919,9 @@ public Rectifier(int units) { for( int o = 0; o < _a.length; o++ ) { _a[o] = 0; if( !training || dropout == null || dropout.unit_active(o) ) { - for( int i = 0; i < _previous._a.length; i++ ) { + for( int i = 0; i < _previous._a.length; i++ ) _a[o] += _w[o * _previous._a.length + i] * _previous._a[i]; -// if (Math.abs(_a[o]) > 1e10) { -// System.out.println("a is getting large: previous a = " + _previous._a[i] + " weight = " + _w[o * _previous._a.length + i] ); -// //System.exit(0); -// } - } _a[o] += _b[o]; -// if (_a[o] > 1) { -// _a[o] = 1; -// } -// if (Float.isInfinite(_a[o])) { -// _a[o] = 1e10f; -// } if( _a[o] < 0 ) _a[o] = 0; else if( !training && dropout != null ) @@ -975,14 +938,6 @@ else if( !training && dropout != null ) //(d/dx)(max(0,x)) = 1 if x > 0, otherwise 0 if( _a[u] > 0 ) { // don't use >= final float g = _e[u]; // * 1.0 (from derivative of rectifier) -// if (Float.isNaN(g)) { -// System.out.println("e is NaN"); -// //System.exit(0); -// } -// if (Float.isInfinite(g)) { -// System.out.println("e is inf"); -// //System.exit(0); -// } bprop(u, g, r, m); } // otherwise g = _e[u] * 0.0 = 0 and we don't allow other contributions by (and to) weights and momenta diff --git a/src/main/java/hex/NeuralNet.java b/src/main/java/hex/NeuralNet.java index 0a9d39039b..3cf2d37457 100644 --- a/src/main/java/hex/NeuralNet.java +++ b/src/main/java/hex/NeuralNet.java @@ -100,9 +100,11 @@ protected void registered(RequestServer.API_VERSION ver) { ) { arg.disable("Only with Uniform or Normal initial weight distributions", inputArgs); } - if( arg._name.equals("mode") && (H2O.CLOUD._memary.length > 1) ) { - mode = NeuralNetParams.ExecutionMode.MapReduce_Hogwild; - arg.disable("Using MapReduce since cluster size > 1.", inputArgs); + if( arg._name.equals("mode") ) { + if (H2O.CLOUD._memary.length > 1) { + + arg.disable("Using MapReduce since cluster size > 1.", inputArgs); + } } } @@ -340,7 +342,6 @@ void startTrain() { //validate continuously while(!cancelled() && running) { eval(valid, validResp); - Thread.sleep(2000); } // remove validation data @@ -582,7 +583,7 @@ public static class NeuralNetModel extends Model { @API(help = "RMS error") public double[] rms_error; - public boolean unstable; + public boolean unstable = false; NeuralNetModel(Key selfKey, Key dataKey, Frame fr, Layer[] ls, NeuralNetParams p) { super(selfKey, dataKey, fr); @@ -590,7 +591,7 @@ public static class NeuralNetModel extends Model { layers = ls; weights = new float[ls.length][]; biases = new float[ls.length][]; - for( int y = 1; y < layers.length-1; y++ ) { + for( int y = 1; y < layers.length; y++ ) { weights[y] = layers[y]._w; biases[y] = layers[y]._b; } @@ -730,7 +731,10 @@ public static class NeuralNetProgress extends Progress2 { validation_errors = model.validation_errors; class_names = model.classNames(); confusion_matrix = model.confusion_matrix; - if (model.unstable && job != null) job.cancel(); + if (model.unstable && job != null) { + System.out.println("Aborting job due to instability. Try a smaller learning rate and/or single-node mode."); + job.cancel(); + } } return super.serve(); } @@ -908,6 +912,7 @@ public NeuralNetScore() { @Override public boolean toHTML(StringBuilder sb) { DocGen.HTML.section(sb, "Classification error: " + String.format("%5.2f %%", 100 * classification_error)); DocGen.HTML.section(sb, "Mean square error: " + mean_square_error); + DocGen.HTML.section(sb, "Mean cross entropy: " + cross_entropy); confusion(sb, "Confusion Matrix", response.domain(), confusion_matrix); return true; }