Skip to content

Commit 3080af5

Browse files
acvogelStanford NLP
authored and
Stanford NLP
committed
Moved SGDMinimizer to InefficientSGDMinimizer and then StochasticInPlaceMinimizer to SGDMinimizer. Updated all classes which use these. Currently left in the usage of InefficientSGDMinimizer for backward compatability.
1 parent a11499c commit 3080af5

9 files changed

+472
-1095
lines changed

src/edu/stanford/nlp/classify/LinearClassifierFactory.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ public void useStochasticGradientDescent(){
503503
public void useStochasticGradientDescent(final double gainSGD, final int stochasticBatchSize){
504504
this.minimizerCreator = new Factory<Minimizer<DiffFunction>>() {
505505
public Minimizer<DiffFunction> create() {
506-
return new SGDMinimizer<DiffFunction>(gainSGD,stochasticBatchSize);
506+
return new InefficientSGDMinimizer<DiffFunction>(gainSGD,stochasticBatchSize);
507507
}
508508
};
509509
}
@@ -515,15 +515,15 @@ public void useInPlaceStochasticGradientDescent() {
515515
public void useInPlaceStochasticGradientDescent(final int SGDPasses, final int tuneSampleSize, final double sigma) {
516516
this.minimizerCreator = new Factory<Minimizer<DiffFunction>>() {
517517
public Minimizer<DiffFunction> create() {
518-
return new StochasticInPlaceMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);
518+
return new SGDMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);
519519
}
520520
};
521521
}
522522

523523
public void useHybridMinimizerWithInPlaceSGD(final int SGDPasses, final int tuneSampleSize, final double sigma) {
524524
this.minimizerCreator = new Factory<Minimizer<DiffFunction>>() {
525525
public Minimizer<DiffFunction> create() {
526-
Minimizer<DiffFunction> firstMinimizer = new StochasticInPlaceMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);
526+
Minimizer<DiffFunction> firstMinimizer = new SGDMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);
527527
Minimizer<DiffFunction> secondMinimizer = new QNMinimizer(mem);
528528
return new HybridMinimizer(firstMinimizer, secondMinimizer, SGDPasses);
529529
}

src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java

+2-8
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ public abstract class AbstractSequenceClassifier<IN extends CoreMap> implements
8787
private CoreTokenFactory<IN> tokenFactory;
8888
public int windowSize;
8989
// different threads can add or query knownLCWords at the same time,
90-
// so we need a concurrent data structure. created in reinit()
91-
protected Set<String> knownLCWords = null;
90+
// so we need a concurrent data structure
91+
protected Set<String> knownLCWords = Collections.newSetFromMap(new ConcurrentHashMap<String,Boolean>());
9292

9393
private DocumentReaderAndWriter<IN> defaultReaderAndWriter;
9494
public DocumentReaderAndWriter<IN> defaultReaderAndWriter() {
@@ -165,12 +165,6 @@ protected final void reinit() {
165165
} else {
166166
plainTextReaderAndWriter = makePlainTextReaderAndWriter();
167167
}
168-
169-
if (!flags.useKnownLCWords) {
170-
knownLCWords = Collections.emptySet();
171-
} else if (knownLCWords == null || knownLCWords.size() == 0) {
172-
knownLCWords = Collections.newSetFromMap(new ConcurrentHashMap<String,Boolean>());
173-
}
174168
}
175169

176170
/**

src/edu/stanford/nlp/ie/crf/CRFClassifier.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -2563,8 +2563,8 @@ public Minimizer getMinimizer(int featurePruneIteration, Evaluator[] evaluators)
25632563
((QNMinimizer) minimizer).useOWLQN(flags.useOWLQN, flags.priorLambda);
25642564
}
25652565
} else if (flags.useInPlaceSGD) {
2566-
StochasticInPlaceMinimizer<DiffFunction> sgdMinimizer =
2567-
new StochasticInPlaceMinimizer<DiffFunction>(flags.sigma, flags.SGDPasses, flags.tuneSampleSize, flags.stochasticBatchSize);
2566+
SGDMinimizer<DiffFunction> sgdMinimizer =
2567+
new SGDMinimizer<DiffFunction>(flags.sigma, flags.SGDPasses, flags.tuneSampleSize, flags.stochasticBatchSize);
25682568
if (flags.useSGDtoQN) {
25692569
QNMinimizer qnMinimizer;
25702570
int QNmem;
@@ -2598,7 +2598,7 @@ public Minimizer getMinimizer(int featurePruneIteration, Evaluator[] evaluators)
25982598
minimizer = new SMDMinimizer(flags.initialGain, flags.stochasticBatchSize, flags.stochasticMethod,
25992599
flags.SGDPasses);
26002600
} else if (flags.useSGD) {
2601-
minimizer = new SGDMinimizer(flags.initialGain, flags.stochasticBatchSize);
2601+
minimizer = new InefficientSGDMinimizer(flags.initialGain, flags.stochasticBatchSize);
26022602
} else if (flags.useScaledSGD) {
26032603
minimizer = new ScaledSGDMinimizer(flags.initialGain, flags.stochasticBatchSize, flags.SGDPasses,
26042604
flags.scaledSGDMethod);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
package edu.stanford.nlp.optimization;
2+
3+
import edu.stanford.nlp.util.Pair;
4+
5+
/**
6+
* Stochastic Gradient Descent Minimizer.
7+
*
8+
*
9+
* The basic way to use the minimizer is with a null constructor, then
10+
* the simple minimize method:
11+
* <p/>
12+
* <p><code>Minimizer smd = new InefficientSGDMinimizer();</code>
13+
* <br><code>DiffFunction df = new SomeDiffFunction(); //Note that it must be a incidence of AbstractStochasticCachingDiffFunction</code>
14+
* <br><code>double tol = 1e-4;</code>
15+
* <br><code>double[] initial = getInitialGuess();</code>
16+
* <br><code>int maxIterations = someSafeNumber;
17+
* <br><code>double[] minimum = qnm.minimize(df,tol,initial,maxIterations);</code>
18+
* <p/>
19+
* Constructing with a null constructor will use the default values of
20+
* <p>
21+
* <br><code>batchSize = 15;</code>
22+
* <br><code>initialGain = 0.1;</code>
23+
* <p/>
24+
* <br> NOTE: This class was previously called SGDMinimizer. SGDMinimizer is now what was StochasticInPlaceMinimizer. New projects should use that class.
25+
* <p/>
26+
*
27+
* @author <a href="mailto:[email protected]">Alex Kleeman</a>
28+
* @version 1.0
29+
* @since 1.0
30+
*/
31+
public class InefficientSGDMinimizer<T extends Function> extends StochasticMinimizer<T> {
32+
33+
34+
@Override
35+
public void shutUp() {
36+
this.quiet = true;
37+
}
38+
39+
public void setBatchSize(int batchSize) {
40+
bSize = batchSize;
41+
}
42+
43+
public InefficientSGDMinimizer() {
44+
}
45+
46+
public InefficientSGDMinimizer(double SGDGain, int batchSize){
47+
this(SGDGain,batchSize,50);
48+
}
49+
50+
public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes){
51+
this(SGDGain,batchSize,passes,Long.MAX_VALUE,false);
52+
}
53+
54+
public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, boolean outputToFile){
55+
this(SGDGain, batchSize, passes, Long.MAX_VALUE ,outputToFile );
56+
}
57+
58+
public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, long maxTime){
59+
this(SGDGain,batchSize,passes,maxTime,false);
60+
}
61+
62+
public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, long maxTime, boolean outputToFile){
63+
bSize = batchSize;
64+
gain = SGDGain;
65+
this.numPasses = passes;
66+
this.outputIterationsToFile = outputToFile;
67+
this.maxTime = maxTime;
68+
}
69+
70+
71+
@Override
72+
protected String getName(){
73+
int g = (int) gain*1000;
74+
return "SGD" + bSize + "_g" + g;
75+
}
76+
77+
78+
public Pair <Integer,Double> tune(Function function, double[] initial,long msPerTest,double gainLow,double gainHigh){
79+
this.quiet = true;
80+
gain = tuneGain(function, initial, msPerTest, gainLow,gainHigh);
81+
bSize = tuneBatch(function,initial,msPerTest,1);
82+
83+
return new Pair<Integer,Double>(bSize, gain);
84+
}
85+
86+
@Override
87+
public Pair<Integer,Double> tune(Function function,double[] initial, long msPerTest){
88+
return this.tune(function, initial, msPerTest, 1e-7,1.0);
89+
90+
}
91+
92+
@Override
93+
protected void takeStep(AbstractStochasticCachingDiffFunction dfunction){
94+
for(int i = 0; i < x.length; i++){
95+
newX[i] = x[i] - gain*gainSchedule(k,5*numBatches)*grad[i];
96+
}
97+
}
98+
99+
100+
101+
102+
103+
104+
public static void main(String[] args) {
105+
// optimizes test function using doubles and floats
106+
// test function is (0.5 sum(x_i^2 * var_i)) ^ PI
107+
// where var is a vector of random nonnegative numbers
108+
// dimensionality is variable.
109+
final int dim = 500000;
110+
final double maxVar = 5;
111+
final double[] var = new double[dim];
112+
double[] init = new double[dim];
113+
114+
for (int i = 0; i < dim; i++) {
115+
init[i] = ((i + 1) / (double) dim - 0.5);//init[i] = (Math.random() - 0.5);
116+
var[i] = maxVar * (i + 1) / dim;
117+
}
118+
119+
final double[] grads = new double[dim];
120+
121+
final DiffFunction f = new DiffFunction() {
122+
@Override
123+
public double[] derivativeAt(double[] x) {
124+
double val = Math.PI * valuePow(x, Math.PI - 1);
125+
for (int i = 0; i < dim; i++) {
126+
grads[i] = x[i] * var[i] * val;
127+
}
128+
return grads;
129+
}
130+
131+
@Override
132+
public double valueAt(double[] x) {
133+
return 1.0 + valuePow(x, Math.PI);
134+
}
135+
136+
private double valuePow(double[] x, double pow) {
137+
double val = 0.0;
138+
for (int i = 0; i < dim; i++) {
139+
val += x[i] * x[i] * var[i];
140+
}
141+
return Math.pow(val * 0.5, pow);
142+
}
143+
144+
@Override
145+
public int domainDimension() {
146+
return dim;
147+
}
148+
};
149+
150+
InefficientSGDMinimizer<DiffFunction> min = new InefficientSGDMinimizer<DiffFunction>();
151+
min.minimize(f, 1.0E-4, init);
152+
}
153+
154+
}

0 commit comments

Comments
 (0)