Skip to content

Commit

Permalink
Use thread-local PRNG.
Browse files Browse the repository at this point in the history
  • Loading branch information
arnocandel committed Jan 10, 2014
1 parent 35316a2 commit 292874d
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 56 deletions.
73 changes: 35 additions & 38 deletions src/main/java/hex/Layer.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ public enum Loss {
@API(help = "Fast mode (minor approximation)")
public boolean fast_mode;

//public static Random _rand = new MersenneTwisterRNG(MersenneTwisterRNG.SEEDS);
public static Random _rand = new Random(); //time-based

// Weights, biases, activity, error
// TODO hold transients only for current two layers
// TODO extract transients & code in separate one-shot trees to avoid cloning
Expand Down Expand Up @@ -92,11 +89,13 @@ static abstract class Training {

transient Training _training;

private static Random getRNG() { return java.util.concurrent.ThreadLocalRandom.current(); }

public final void init(Layer[] ls, int index) {
init(ls, index, true, 0, _rand);
init(ls, index, true, 0);
}

public void init(Layer[] ls, int index, boolean weights, long step, Random rand) {
public void init(Layer[] ls, int index, boolean weights, long step) {
_a = new float[units];
if (!(this instanceof Output)) {
_e = new float[units];
Expand Down Expand Up @@ -128,12 +127,12 @@ void randomize(Random rng, float prefactor) {
if (initial_weight_distribution == InitialWeightDistribution.UniformAdaptive) {
final float range = prefactor * (float)Math.sqrt(6. / (_previous.units + units));
for( int i = 0; i < _w.length; i++ )
_w[i] = (float) rand(rng, -range, range);
_w[i] = rand(rng, -range, range);
}
else {
if (initial_weight_distribution == InitialWeightDistribution.Uniform) {
for (int i = 0; i < _w.length; i++) {
_w[i] = (float) rand(rng, -initial_weight_scale, initial_weight_scale);
_w[i] = rand(rng, (float)-initial_weight_scale, (float)initial_weight_scale);
}
} else if (initial_weight_distribution == InitialWeightDistribution.Normal) {
for (int i = 0; i < _w.length; i++) {
Expand Down Expand Up @@ -233,7 +232,7 @@ public static abstract class Input extends Layer {
@ParamsSearch.Ignore
protected long _pos, _len;

@Override public void init(Layer[] ls, int index, boolean weights, long step, Random rand) {
@Override public void init(Layer[] ls, int index, boolean weights, long step) {
_a = new float[units];
}

Expand Down Expand Up @@ -469,10 +468,10 @@ protected final long pos() {
public static abstract class Softmax extends Output {
protected abstract int target();

@Override public void init(Layer[] ls, int index, boolean weights, long step, Random rand) {
super.init(ls, index, weights, step, rand);
@Override public void init(Layer[] ls, int index, boolean weights, long step) {
super.init(ls, index, weights, step);
if( weights ) {
randomize(rand, 4.0f);
randomize(getRNG(), 4.0f);
}
}

Expand Down Expand Up @@ -501,9 +500,8 @@ public static abstract class Softmax extends Output {
float r = rate(processed) * (1 - m);
int label = target();
for( int u = 0; u < _a.length; u++ ) {
//output unit u should be 1.0 if u is the class label for the training point
final float targetval = (u == label ? 1 : 0);
float g = targetval - _a[u]; //error
float g = targetval - _a[u];
if (loss == Loss.CrossEntropy) {
//nothing else needed
} else if (loss == Loss.MeanSquare) {
Expand Down Expand Up @@ -700,10 +698,10 @@ public Tanh(int units) {
this.units = units;
}

@Override public void init(Layer[] ls, int index, boolean weights, long step, Random rand) {
super.init(ls, index, weights, step, rand);
@Override public void init(Layer[] ls, int index, boolean weights, long step) {
super.init(ls, index, weights, step);
if( weights ) {
randomize(rand, 1.0f);
randomize(getRNG(), 1.0f);
}
}

Expand Down Expand Up @@ -754,10 +752,10 @@ public TanhDropout(int units) {
return super.clone();
}

@Override public void init(Layer[] ls, int index, boolean weights, long step, Random rand) {
super.init(ls, index, weights, step, rand);
@Override public void init(Layer[] ls, int index, boolean weights, long step) {
super.init(ls, index, weights, step);
if( weights ) {
randomize(rand, 1.0f);
randomize(getRNG(), 1.0f);
}
}

Expand All @@ -773,12 +771,12 @@ public TanhDropout(int units) {
if( _bits == null ) {
_bits = new byte[(units + 7) / 8];
}
_rand.nextBytes(_bits);
getRNG().nextBytes(_bits);
// input dropout: set some input layer feature values to 0
if (_previous.isInput() && training) {
final double rate = ((Input)_previous)._dropout_rate;
for( int i = 0; i < _previous._a.length; i++ ) {
if (_rand.nextFloat() < rate) _previous._a[i] = 0;
if (getRNG().nextFloat() < rate) _previous._a[i] = 0;
}
}

Expand Down Expand Up @@ -835,8 +833,8 @@ public TanhPrime(int units) {
this.units = units;
}

@Override public void init(Layer[] ls, int index, boolean weights, long step, Random rand) {
super.init(ls, index, weights, step, rand);
@Override public void init(Layer[] ls, int index, boolean weights, long step) {
super.init(ls, index, weights, step);
// Auto encoder has it's own bias vector
_b = new float[units];
}
Expand All @@ -858,8 +856,7 @@ public TanhPrime(int units) {
for( int o = 0; o < _a.length; o++ ) {
assert _previous._previous.units == units;
float e = _previous._previous._a[o] - _a[o];

float g = e;
float g = e; // * (1 - _a[o]) * _a[o]; // Square error
for( int i = 0; i < _previous._a.length; i++ ) {
int w = i * _a.length + o;
if( _previous._e != null )
Expand All @@ -881,10 +878,10 @@ public Maxout(int units) {
this.units = units;
}

@Override public void init(Layer[] ls, int index, boolean weights, long step, Random rand) {
super.init(ls, index, weights, step, rand);
@Override public void init(Layer[] ls, int index, boolean weights, long step) {
super.init(ls, index, weights, step);
if( weights ) {
randomize(rand, 4.0f);
randomize(getRNG(), 4.0f);
for( int i = 0; i < _b.length; i++ )
_b[i] = 1;
}
Expand All @@ -894,7 +891,7 @@ public Maxout(int units) {
if( _bits == null ) {
_bits = new byte[units / 8 + 1];
}
_rand.nextBytes(_bits);
getRNG().nextBytes(_bits);
float max = 0;
for( int o = 0; o < _a.length; o++ ) {
_a[o] = 0;
Expand Down Expand Up @@ -937,10 +934,10 @@ public Rectifier(int units) {
this.units = units;
}

@Override public void init(Layer[] ls, int index, boolean weights, long step, Random rand) {
super.init(ls, index, weights, step, rand);
@Override public void init(Layer[] ls, int index, boolean weights, long step) {
super.init(ls, index, weights, step);
if( weights ) {
randomize(rand, 1.0f);
randomize(getRNG(), 1.0f);
for( int i = 0; i < _b.length; i++ )
_b[i] = 1;
}
Expand Down Expand Up @@ -1016,13 +1013,13 @@ public RectifierDropout(int units) {
if( _bits == null ) {
_bits = new byte[(units + 7) / 8];
}
_rand.nextBytes(_bits);
getRNG().nextBytes(_bits);

// input dropout: set some input layer feature values to 0
if (_previous.isInput() && training) {
final double rate = ((Input)_previous)._dropout_rate;
for( int i = 0; i < _previous._a.length; i++ ) {
if (_rand.nextFloat() < rate) _previous._a[i] = 0;
if (getRNG().nextFloat() < rate) _previous._a[i] = 0;
}
}

Expand Down Expand Up @@ -1054,8 +1051,8 @@ public RectifierPrime(int units) {
this.units = units;
}

@Override public void init(Layer[] ls, int index, boolean weights, long step, Random rand) {
super.init(ls, index, weights, step, rand);
@Override public void init(Layer[] ls, int index, boolean weights, long step) {
super.init(ls, index, weights, step);
// Auto encoder has it's own bias vector
_b = new float[units];
for( int i = 0; i < _b.length; i++ )
Expand Down Expand Up @@ -1122,8 +1119,8 @@ public static void shareWeights(Layer[] src, Layer[] dst) {
shareWeights(src[y], dst[y]);
}

private static double rand(Random rand, double min, double max) {
return min + rand.nextDouble() * (max - min);
private static float rand(Random rand, float min, float max) {
return min + rand.nextFloat() * (max - min);
}

@Override public AutoBuffer writeJSON(AutoBuffer bb) {
Expand Down
23 changes: 5 additions & 18 deletions src/main/java/hex/NeuralNet.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ public enum ExecutionMode {
}

@API(help = "Execution Mode", filter = Default.class)
public ExecutionMode mode = ExecutionMode.Threaded_Hogwild;
public ExecutionMode mode = ExecutionMode.MapReduce_Hogwild;

@API(help = "Activation function", filter = Default.class)
public Activation activation = Activation.Tanh;
public Activation activation = Activation.RectifierWithDropout;

@API(help = "Input layer dropout ratio", filter = Default.class, dmin = 0, dmax = 1)
public float input_dropout_ratio = 0;
public float input_dropout_ratio = 0.2;

@API(help = "Hidden layer sizes, e.g. 1000, 1000. Grid search: (100, 100), (200, 200)", filter = Default.class)
public int[] hidden = new int[] { 1024, 1024, 2048 };
Expand Down Expand Up @@ -82,10 +82,6 @@ public enum ExecutionMode {
@API(help = "How many times the dataset should be iterated", filter = Default.class, dmin = 0)
public double epochs = 100;

@API(help = "Seed for the random number generator", filter = Default.class)
//public static long seed = new Random().nextLong();
public static long seed = 0; //TODO: Revert to RNG

@Override
protected void registered(RequestServer.API_VERSION ver) {
super.registered(ver);
Expand Down Expand Up @@ -232,7 +228,6 @@ void startTrain() {
model.l2 = l2;
model.loss = loss;
model.fast_mode = fast_mode;
model.seed = seed;

UKV.put(destination_key, model);

Expand Down Expand Up @@ -336,7 +331,6 @@ private long eval(Vec[] valid, Vec validResp) {
model.l2 = l2;
model.loss = loss;
model.fast_mode = fast_mode;
model.seed = seed;
UKV.put(model._selfKey, model);
return e.training_samples;
}
Expand Down Expand Up @@ -396,7 +390,7 @@ private static Errors eval(Layer[] ls, Input input, Output output, long n, long[
clones[y] = ls[y].clone();
clones[clones.length - 1] = output;
for( int y = 0; y < clones.length; y++ )
clones[y].init(clones, y, false, 0, null);
clones[y].init(clones, y, false, 0);
Layer.shareWeights(ls, clones);
return eval(clones, n, cm);
}
Expand Down Expand Up @@ -543,9 +537,6 @@ public static class NeuralNetModel extends Model {
@API(help = "Fast mode (minor approximation)")
public boolean fast_mode;

@API(help = "Seed for the random number generator")
public long seed;

@API(help = "Layers")
public Layer[] layers;

Expand Down Expand Up @@ -586,7 +577,7 @@ public static class NeuralNetModel extends Model {
for( int y = 0; y < clones.length; y++ ) {
clones[y]._w = weights[y];
clones[y]._b = biases[y];
clones[y].init(clones, y, false, 0, null);
clones[y].init(clones, y, false, 0);
}
((Input) clones[0])._pos = rowInChunk;
for (Layer clone : clones) clone.fprop(false);
Expand Down Expand Up @@ -664,9 +655,6 @@ public static class NeuralNetProgress extends Progress2 {
@API(help = "Fast mode (minor approximation)")
public boolean fast_mode;

@API(help = "Seed for the random number generator")
public long seed;

@API(help = "How many times the dataset should be iterated")
public double epochs;

Expand Down Expand Up @@ -706,7 +694,6 @@ public static class NeuralNetProgress extends Progress2 {
loss = job.loss;
fast_mode = job.fast_mode;
epochs = job.epochs;
seed = NeuralNet.seed;
}
NeuralNetModel model = UKV.get(destination_key);
if( model != null ) {
Expand Down

0 comments on commit 292874d

Please sign in to comment.