Skip to content

Commit

Permalink
Rename mini_batch to train_samples_per_iteration.
Browse files Browse the repository at this point in the history
  • Loading branch information
arnocandel committed Apr 7, 2014
1 parent 13fd381 commit b69743e
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 33 deletions.
12 changes: 6 additions & 6 deletions h2o-docs/source/datascience/deeplearning.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,14 @@ greatly.

The number of passes over the training dataset to be carried out.

**mini batch**
**train samples per iteration**

The number of training data rows to be processed per iteration. Note that
independent of this parameter, each row is used immediately to update the model
with (online) stochastic gradient descent. The mini batch size controls the
with (online) stochastic gradient descent. This parameter controls the
synchronization period between nodes in a distributed environment and the
frequency at which scoring and model cancellation can happen. For example, if
mini-batch is set to 10,000 on H2O running on 4 nodes, then each node will
it is set to 10,000 on H2O running on 4 nodes, then each node will
process 2,500 rows per iteration, sampling randomly from their local data.
Then, model averaging between the nodes takes place, and scoring can happen
(dependent on scoring interval and duty factor). Special values are 0 for
Expand Down Expand Up @@ -238,7 +238,7 @@ greatly.
**score interval**

The minimum time (in seconds) to elapse between model scoring. The actual
interval is determined by the size of mini batch and the scoring duty cycle.
interval is determined by the number of training samples per iteration and the scoring duty cycle.

**score training samples**

Expand Down Expand Up @@ -323,9 +323,9 @@ greatly.
**shuffle training data**

Enable shuffling of training data (on each node). This option is
recommended if training data is replicated on N nodes, and the mini batch size
recommended if training data is replicated on N nodes, and the number of training samples per iteration
is close to N times the dataset size, where all nodes train will (almost) all
the data. It is automatically enabled if the mini batch is set to -1 (or to N
the data. It is automatically enabled if the number of training samples per iteration is set to -1 (or to N
times the dataset size or larger).

Interpreting the Model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public static void main(String[] args) throws Exception {
p.response = trainf.lastVec();
p.ignored_cols = null;
p.classification_stop = -1;
p.mini_batch = -1;
p.train_samples_per_iteration = -1;
p.score_interval = 30;

p.fast_mode = true; //to match old NeuralNet behavior
Expand Down
42 changes: 21 additions & 21 deletions src/main/java/hex/deeplearning/DeepLearning.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ public class DeepLearning extends Job.ValidatedJob {
@API(help = "How many times the dataset should be iterated (streamed), can be fractional", filter = Default.class, dmin = 1e-3, json = true)
public double epochs = 10;

@API(help = "Number of training samples between multi-node synchronization and scoring, can be > #rows if replicate_training_data is enabled (0: one epoch, -1: all available data)", filter = Default.class, lmin = -1, json = true)
public long mini_batch = 10000l;
@API(help = "Number of training samples (globally) per MapReduce iteration. Special values are 0: one epoch, -1: all available data (e.g., replicated training data)", filter = Default.class, lmin = -1, json = true)
public long train_samples_per_iteration = 10000l;

@API(help = "Seed for random numbers (affects sampling) - Note: only reproducible when running single threaded", filter = Default.class, json = true)
public long seed = new Random().nextLong();
Expand Down Expand Up @@ -168,7 +168,7 @@ public class DeepLearning extends Job.ValidatedJob {
@API(help = "Run on a single node for fine-tuning of model parameters", filter = Default.class, json = true)
public boolean single_node_mode = false;

@API(help = "Enable shuffling of training data (recommended if training data is replicated and mini_batch is close to #nodes x #rows)", filter = Default.class, json = true)
@API(help = "Enable shuffling of training data (recommended if training data is replicated and train_samples_per_iteration is close to #nodes x #rows)", filter = Default.class, json = true)
public boolean shuffle_training_data = false;

public enum ClassSamplingMethod {
Expand Down Expand Up @@ -232,7 +232,7 @@ public enum Loss {
"seed",
"epochs",
"score_interval",
"mini_batch",
"train_samples_per_iteration",
"score_duty_cycle",
"classification_stop",
"regression_stop",
Expand Down Expand Up @@ -609,14 +609,14 @@ public final DeepLearningModel trainModel(DeepLearningModel model) {
Log.info("Number of chunks of the validation data: " + validScoreFrame.anyVec().nChunks());
}

// Set mini_batch size (cannot be done earlier since this depends on whether stratified sampling is done)
mp.mini_batch = computeMiniBatchSize(mp.mini_batch, train.numRows(), mp.replicate_training_data, mp.single_node_mode);
// Set train_samples_per_iteration size (cannot be done earlier since this depends on whether stratified sampling is done)
mp.train_samples_per_iteration = computeTrainSamplesPerIteration(mp.train_samples_per_iteration, train.numRows(), mp.replicate_training_data, mp.single_node_mode);
// Determine whether shuffling is enforced
if(mp.replicate_training_data && (mp.mini_batch == train.numRows()*H2O.CLOUD.size()) && !mp.shuffle_training_data && H2O.CLOUD.size() > 1) {
if(mp.replicate_training_data && (mp.train_samples_per_iteration == train.numRows()*H2O.CLOUD.size()) && !mp.shuffle_training_data && H2O.CLOUD.size() > 1) {
Log.warn("Enabling training data shuffling, because all nodes train on the full dataset (replicated training data)");
mp.shuffle_training_data = true;
}
final float rowUsageFraction = computeRowUsageFraction(train.numRows(), mp.mini_batch, mp.replicate_training_data);
final float rowUsageFraction = computeRowUsageFraction(train.numRows(), mp.train_samples_per_iteration, mp.replicate_training_data);

if (!mp.quiet_mode) Log.info("Initial model:\n" + model.model_info());
Log.info("Starting to train the Deep Learning model.");
Expand Down Expand Up @@ -697,32 +697,32 @@ private Frame reBalance(final Frame fr, boolean local) {
}

/**
* Compute the actual mini_batch size from the user-given parameter
* @param mini_batch user-given mini_batch size
* Compute the actual train_samples_per_iteration size from the user-given parameter
* @param train_samples_per_iteration user-given train_samples_per_iteration size
* @param numRows number of training rows
* @param replicate_training_data whether or not the training data is replicated on each node
* @param single_node_mode whether or not the single node mode is enabled
* @return The total number of training rows to be processed per iteration (summed over on all nodes)
*/
private static long computeMiniBatchSize(long mini_batch, final long numRows, final boolean replicate_training_data, final boolean single_node_mode) {
assert(mini_batch == 0 || mini_batch == -1 || mini_batch >= 1);
if (mini_batch == 0 || (!replicate_training_data && (mini_batch == -1 || mini_batch > numRows)) || (replicate_training_data && single_node_mode))
Log.info("Setting mini_batch (" + mini_batch + ") to one epoch: #rows (" + (mini_batch=numRows) + ").");
else if (mini_batch == -1 || mini_batch > H2O.CLOUD.size()*numRows)
Log.info("Setting mini_batch (" + mini_batch + ") to the largest possible number: #nodes x #rows (" + (mini_batch=H2O.CLOUD.size()*numRows) + ").");
assert(mini_batch != 0 && mini_batch != -1 && mini_batch >= 1);
return mini_batch;
private static long computeTrainSamplesPerIteration(long train_samples_per_iteration, final long numRows, final boolean replicate_training_data, final boolean single_node_mode) {
assert(train_samples_per_iteration == 0 || train_samples_per_iteration == -1 || train_samples_per_iteration >= 1);
if (train_samples_per_iteration == 0 || (!replicate_training_data && (train_samples_per_iteration == -1 || train_samples_per_iteration > numRows)) || (replicate_training_data && single_node_mode))
Log.info("Setting train_samples_per_iteration (" + train_samples_per_iteration + ") to one epoch: #rows (" + (train_samples_per_iteration=numRows) + ").");
else if (train_samples_per_iteration == -1 || train_samples_per_iteration > H2O.CLOUD.size()*numRows)
Log.info("Setting train_samples_per_iteration (" + train_samples_per_iteration + ") to the largest possible number: #nodes x #rows (" + (train_samples_per_iteration=H2O.CLOUD.size()*numRows) + ").");
assert(train_samples_per_iteration != 0 && train_samples_per_iteration != -1 && train_samples_per_iteration >= 1);
return train_samples_per_iteration;
}

/**
* Compute the fraction of rows that need to be used for training during one iteration
* @param numRows number of training rows
* @param mini_batch number of training rows to be processed per iteration
* @param train_samples_per_iteration number of training rows to be processed per iteration
* @param replicate_training_data whether of not the training data is replicated on each node
* @return fraction of rows to be used for training during one iteration
*/
private static float computeRowUsageFraction(final long numRows, long mini_batch, boolean replicate_training_data) {
float rowUsageFraction = (float)mini_batch / numRows;
private static float computeRowUsageFraction(final long numRows, long train_samples_per_iteration, boolean replicate_training_data) {
float rowUsageFraction = (float)train_samples_per_iteration / numRows;
if (replicate_training_data) rowUsageFraction /= H2O.CLOUD.size();
assert(rowUsageFraction > 0 && rowUsageFraction <= 1.);
return rowUsageFraction;
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/hex/deeplearning/DeepLearningModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -978,7 +978,7 @@ else if (i < neurons.length-1) {
DocGen.HTML.paragraph(sb, "Epochs: " + String.format("%.3f", epoch_counter) + " / " + String.format("%.3f", model_info.parameters.epochs));
int cores = 0; for (H2ONode n : H2O.CLOUD._memary) cores += n._heartbeat._num_cpus;
DocGen.HTML.paragraph(sb, "Number of compute nodes: " + (model_info.get_params().single_node_mode ? ("1 (" + H2O.NUMCPUS + " threads)") : (H2O.CLOUD.size() + " (" + cores + " threads)")));
DocGen.HTML.paragraph(sb, "Mini-batch size: " + String.format("%,d", model_info.parameters.mini_batch));
DocGen.HTML.paragraph(sb, "Training samples per iteration: " + String.format("%,d", model_info.parameters.train_samples_per_iteration));
final boolean isEnded = Job.isEnded(model_info().job().self());
final long time_so_far = isEnded ? run_time : run_time + System.currentTimeMillis() - _timeLastScoreEnter;
if (time_so_far > 0) {
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/hex/DeepLearningIrisTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ else if (Math.abs(a - b) <= abseps) {
// p.fast_mode = true; //to be the same as old NeuralNet code
p.nesterov_accelerated_gradient = false; //to be the same as reference
// p.nesterov_accelerated_gradient = true; //to be the same as old NeuralNet code
p.mini_batch = 0; //sync once per period
p.train_samples_per_iteration = 0; //sync once per period
p.ignore_const_cols = false;
p.shuffle_training_data = false;
p.classification_stop = -1; //don't stop early -> need to compare against reference, which doesn't stop either
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/hex/DeepLearningProstateTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public class DeepLearningProstateTest extends TestUtil {
p.destination_key = dest;
p.seed = seed;
p.validation = valid;
p.mini_batch = 0;
p.train_samples_per_iteration = 0;
p.force_load_balance = load_balance;
p.replicate_training_data = replicate;
p.shuffle_training_data = shuffle;
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/hex/DeepLearningSpiralsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public class DeepLearningSpiralsTest extends TestUtil {
p.validation = null;
p.score_interval = 10;
p.ignored_cols = null;
p.mini_batch = 0; //sync once per period
p.train_samples_per_iteration = 0; //sync once per period
p.quiet_mode = true;
p.fast_mode = true;
p.ignore_const_cols = true;
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/hex/DeepLearningVsNeuralNet.java
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ else if (Math.abs(a - b) <= abseps) {
p.validation = null;
p.quiet_mode = true;
p.fast_mode = fast_mode;
p.mini_batch = 0; //sync once per period
p.train_samples_per_iteration = 0; //sync once per period
p.ignore_const_cols = false; //same as old NeuralNet code
p.shuffle_training_data = false; //same as old NeuralNet code
p.nesterov_accelerated_gradient = true; //same as old NeuralNet code
Expand Down

0 comments on commit b69743e

Please sign in to comment.