Rename mini_batch to train_samples_per_iteration.

obisu · Apr 7, 2014 · b69743e · b69743e
1 parent 13fd381
commit b69743e
Show file tree

Hide file tree

Showing 8 changed files with 33 additions and 33 deletions.
diff --git a/h2o-docs/source/datascience/deeplearning.rst b/h2o-docs/source/datascience/deeplearning.rst
@@ -119,14 +119,14 @@ greatly.
 
     The number of passes over the training dataset to be carried out.
 
-**mini batch**
+**train samples per iteration**
 
     The number of training data rows to be processed per iteration. Note that
     independent of this parameter, each row is used immediately to update the model
-    with (online) stochastic gradient descent. The mini batch size controls the
+    with (online) stochastic gradient descent. This parameter controls the
     synchronization period between nodes in a distributed environment and the
     frequency at which scoring and model cancellation can happen. For example, if
-    mini-batch is set to 10,000 on H2O running on 4 nodes, then each node will
+    it is set to 10,000 on H2O running on 4 nodes, then each node will
     process 2,500 rows per iteration, sampling randomly from their local data.
     Then, model averaging between the nodes takes place, and scoring can happen
     (dependent on scoring interval and duty factor). Special values are 0 for
@@ -238,7 +238,7 @@ greatly.
 **score interval**
 
     The minimum time (in seconds) to elapse between model scoring. The actual
-    interval is determined by the size of mini batch and the scoring duty cycle.
+    interval is determined by the number of training samples per iteration and the scoring duty cycle.
 
 **score training samples**
 
@@ -323,9 +323,9 @@ greatly.
 **shuffle training data** 
 
     Enable shuffling of training data (on each node). This option is
-    recommended if training data is replicated on N nodes, and the mini batch size
+    recommended if training data is replicated on N nodes, and the number of training samples per iteration
     is close to N times the dataset size, where all nodes train will (almost) all
-    the data. It is automatically enabled if the mini batch is set to -1 (or to N
+    the data. It is automatically enabled if the number of training samples per iteration is set to -1 (or to N
     times the dataset size or larger).
 
 Interpreting the Model

diff --git a/h2o-samples/src/main/java/samples/expert/DeepLearningMnist.java b/h2o-samples/src/main/java/samples/expert/DeepLearningMnist.java
@@ -66,7 +66,7 @@ public static void main(String[] args) throws Exception {
     p.response = trainf.lastVec();
     p.ignored_cols = null;
     p.classification_stop = -1;
-    p.mini_batch = -1;
+    p.train_samples_per_iteration = -1;
     p.score_interval = 30;
 
     p.fast_mode = true; //to match old NeuralNet behavior

diff --git a/src/main/java/hex/deeplearning/DeepLearning.java b/src/main/java/hex/deeplearning/DeepLearning.java
@@ -43,8 +43,8 @@ public class DeepLearning extends Job.ValidatedJob {
   @API(help = "How many times the dataset should be iterated (streamed), can be fractional", filter = Default.class, dmin = 1e-3, json = true)
   public double epochs = 10;
 
-  @API(help = "Number of training samples between multi-node synchronization and scoring, can be > #rows if replicate_training_data is enabled (0: one epoch, -1: all available data)", filter = Default.class, lmin = -1, json = true)
-  public long mini_batch = 10000l;
+  @API(help = "Number of training samples (globally) per MapReduce iteration. Special values are 0: one epoch, -1: all available data (e.g., replicated training data)", filter = Default.class, lmin = -1, json = true)
+  public long train_samples_per_iteration = 10000l;
 
   @API(help = "Seed for random numbers (affects sampling) - Note: only reproducible when running single threaded", filter = Default.class, json = true)
   public long seed = new Random().nextLong();
@@ -168,7 +168,7 @@ public class DeepLearning extends Job.ValidatedJob {
   @API(help = "Run on a single node for fine-tuning of model parameters", filter = Default.class, json = true)
   public boolean single_node_mode = false;
 
-  @API(help = "Enable shuffling of training data (recommended if training data is replicated and mini_batch is close to #nodes x #rows)", filter = Default.class, json = true)
+  @API(help = "Enable shuffling of training data (recommended if training data is replicated and train_samples_per_iteration is close to #nodes x #rows)", filter = Default.class, json = true)
   public boolean shuffle_training_data = false;
 
   public enum ClassSamplingMethod {
@@ -232,7 +232,7 @@ public enum Loss {
           "seed",
           "epochs",
           "score_interval",
-          "mini_batch",
+          "train_samples_per_iteration",
           "score_duty_cycle",
           "classification_stop",
           "regression_stop",
@@ -609,14 +609,14 @@ public final DeepLearningModel trainModel(DeepLearningModel model) {
         Log.info("Number of chunks of the validation data: " + validScoreFrame.anyVec().nChunks());
       }
 
-      // Set mini_batch size (cannot be done earlier since this depends on whether stratified sampling is done)
-      mp.mini_batch = computeMiniBatchSize(mp.mini_batch, train.numRows(), mp.replicate_training_data, mp.single_node_mode);
+      // Set train_samples_per_iteration size (cannot be done earlier since this depends on whether stratified sampling is done)
+      mp.train_samples_per_iteration = computeTrainSamplesPerIteration(mp.train_samples_per_iteration, train.numRows(), mp.replicate_training_data, mp.single_node_mode);
       // Determine whether shuffling is enforced
-      if(mp.replicate_training_data && (mp.mini_batch == train.numRows()*H2O.CLOUD.size()) && !mp.shuffle_training_data && H2O.CLOUD.size() > 1) {
+      if(mp.replicate_training_data && (mp.train_samples_per_iteration == train.numRows()*H2O.CLOUD.size()) && !mp.shuffle_training_data && H2O.CLOUD.size() > 1) {
         Log.warn("Enabling training data shuffling, because all nodes train on the full dataset (replicated training data)");
         mp.shuffle_training_data = true;
       }
-      final float rowUsageFraction = computeRowUsageFraction(train.numRows(), mp.mini_batch, mp.replicate_training_data);
+      final float rowUsageFraction = computeRowUsageFraction(train.numRows(), mp.train_samples_per_iteration, mp.replicate_training_data);
 
       if (!mp.quiet_mode) Log.info("Initial model:\n" + model.model_info());
       Log.info("Starting to train the Deep Learning model.");
@@ -697,32 +697,32 @@ private Frame reBalance(final Frame fr, boolean local) {
   }
 
   /**
-   * Compute the actual mini_batch size from the user-given parameter
-   * @param mini_batch user-given mini_batch size
+   * Compute the actual train_samples_per_iteration size from the user-given parameter
+   * @param train_samples_per_iteration user-given train_samples_per_iteration size
    * @param numRows number of training rows
    * @param replicate_training_data whether or not the training data is replicated on each node
    * @param single_node_mode whether or not the single node mode is enabled
    * @return The total number of training rows to be processed per iteration (summed over on all nodes)
    */
-  private static long computeMiniBatchSize(long mini_batch, final long numRows, final boolean replicate_training_data, final boolean single_node_mode) {
-    assert(mini_batch == 0 || mini_batch == -1 || mini_batch >= 1);
-    if (mini_batch == 0 || (!replicate_training_data && (mini_batch == -1 || mini_batch > numRows)) || (replicate_training_data && single_node_mode))
-      Log.info("Setting mini_batch (" + mini_batch + ") to one epoch: #rows (" + (mini_batch=numRows) + ").");
-    else if (mini_batch == -1 || mini_batch > H2O.CLOUD.size()*numRows)
-      Log.info("Setting mini_batch (" + mini_batch + ") to the largest possible number: #nodes x #rows (" + (mini_batch=H2O.CLOUD.size()*numRows) + ").");
-    assert(mini_batch != 0 && mini_batch != -1 && mini_batch >= 1);
-    return mini_batch;
+  private static long computeTrainSamplesPerIteration(long train_samples_per_iteration, final long numRows, final boolean replicate_training_data, final boolean single_node_mode) {
+    assert(train_samples_per_iteration == 0 || train_samples_per_iteration == -1 || train_samples_per_iteration >= 1);
+    if (train_samples_per_iteration == 0 || (!replicate_training_data && (train_samples_per_iteration == -1 || train_samples_per_iteration > numRows)) || (replicate_training_data && single_node_mode))
+      Log.info("Setting train_samples_per_iteration (" + train_samples_per_iteration + ") to one epoch: #rows (" + (train_samples_per_iteration=numRows) + ").");
+    else if (train_samples_per_iteration == -1 || train_samples_per_iteration > H2O.CLOUD.size()*numRows)
+      Log.info("Setting train_samples_per_iteration (" + train_samples_per_iteration + ") to the largest possible number: #nodes x #rows (" + (train_samples_per_iteration=H2O.CLOUD.size()*numRows) + ").");
+    assert(train_samples_per_iteration != 0 && train_samples_per_iteration != -1 && train_samples_per_iteration >= 1);
+    return train_samples_per_iteration;
   }
 
   /**
    * Compute the fraction of rows that need to be used for training during one iteration
    * @param numRows number of training rows
-   * @param mini_batch number of training rows to be processed per iteration
+   * @param train_samples_per_iteration number of training rows to be processed per iteration
    * @param replicate_training_data whether of not the training data is replicated on each node
    * @return fraction of rows to be used for training during one iteration
    */
-  private static float computeRowUsageFraction(final long numRows, long mini_batch, boolean replicate_training_data) {
-    float rowUsageFraction = (float)mini_batch / numRows;
+  private static float computeRowUsageFraction(final long numRows, long train_samples_per_iteration, boolean replicate_training_data) {
+    float rowUsageFraction = (float)train_samples_per_iteration / numRows;
     if (replicate_training_data) rowUsageFraction /= H2O.CLOUD.size();
     assert(rowUsageFraction > 0 && rowUsageFraction <= 1.);
     return rowUsageFraction;

diff --git a/src/main/java/hex/deeplearning/DeepLearningModel.java b/src/main/java/hex/deeplearning/DeepLearningModel.java
@@ -978,7 +978,7 @@ else if (i < neurons.length-1) {
     DocGen.HTML.paragraph(sb, "Epochs: " + String.format("%.3f", epoch_counter) + " / " + String.format("%.3f", model_info.parameters.epochs));
     int cores = 0; for (H2ONode n : H2O.CLOUD._memary) cores += n._heartbeat._num_cpus;
     DocGen.HTML.paragraph(sb, "Number of compute nodes: " + (model_info.get_params().single_node_mode ? ("1 (" + H2O.NUMCPUS + " threads)") : (H2O.CLOUD.size() + " (" + cores + " threads)")));
-    DocGen.HTML.paragraph(sb, "Mini-batch size: " + String.format("%,d", model_info.parameters.mini_batch));
+    DocGen.HTML.paragraph(sb, "Training samples per iteration: " + String.format("%,d", model_info.parameters.train_samples_per_iteration));
     final boolean isEnded = Job.isEnded(model_info().job().self());
     final long time_so_far = isEnded ? run_time : run_time + System.currentTimeMillis() - _timeLastScoreEnter;
     if (time_so_far > 0) {

diff --git a/src/test/java/hex/DeepLearningIrisTest.java b/src/test/java/hex/DeepLearningIrisTest.java
@@ -161,7 +161,7 @@ else if (Math.abs(a - b) <= abseps) {
 //                      p.fast_mode = true; //to be the same as old NeuralNet code
                         p.nesterov_accelerated_gradient = false; //to be the same as reference
 //                        p.nesterov_accelerated_gradient = true; //to be the same as old NeuralNet code
-                        p.mini_batch = 0; //sync once per period
+                        p.train_samples_per_iteration = 0; //sync once per period
                         p.ignore_const_cols = false;
                         p.shuffle_training_data = false;
                         p.classification_stop = -1; //don't stop early -> need to compare against reference, which doesn't stop either

diff --git a/src/test/java/hex/DeepLearningProstateTest.java b/src/test/java/hex/DeepLearningProstateTest.java
@@ -92,7 +92,7 @@ public class DeepLearningProstateTest extends TestUtil {
                         p.destination_key = dest;
                         p.seed = seed;
                         p.validation = valid;
-                        p.mini_batch = 0;
+                        p.train_samples_per_iteration = 0;
                         p.force_load_balance = load_balance;
                         p.replicate_training_data = replicate;
                         p.shuffle_training_data = shuffle;

diff --git a/src/test/java/hex/DeepLearningSpiralsTest.java b/src/test/java/hex/DeepLearningSpiralsTest.java
@@ -48,7 +48,7 @@ public class DeepLearningSpiralsTest extends TestUtil {
       p.validation = null;
       p.score_interval = 10;
       p.ignored_cols = null;
-      p.mini_batch = 0; //sync once per period
+      p.train_samples_per_iteration = 0; //sync once per period
       p.quiet_mode = true;
       p.fast_mode = true;
       p.ignore_const_cols = true;

diff --git a/src/test/java/hex/DeepLearningVsNeuralNet.java b/src/test/java/hex/DeepLearningVsNeuralNet.java
@@ -179,7 +179,7 @@ else if (Math.abs(a - b) <= abseps) {
                               p.validation = null;
                               p.quiet_mode = true;
                               p.fast_mode = fast_mode;
-                              p.mini_batch = 0; //sync once per period
+                              p.train_samples_per_iteration = 0; //sync once per period
                               p.ignore_const_cols = false; //same as old NeuralNet code
                               p.shuffle_training_data = false; //same as old NeuralNet code
                               p.nesterov_accelerated_gradient = true; //same as old NeuralNet code