relwell
diff --git a/‎src/edu/stanford/nlp/classify/LinearClassifierFactory.java
+3-3 b/‎src/edu/stanford/nlp/classify/LinearClassifierFactory.java
+3-3
diff --git a/‎src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java
+2-8 b/‎src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java
+2-8
diff --git a/‎src/edu/stanford/nlp/ie/crf/CRFClassifier.java
+3-3 b/‎src/edu/stanford/nlp/ie/crf/CRFClassifier.java
+3-3
diff --git a/‎src/edu/stanford/nlp/optimization/InefficientSGDMinimizer.java
+154 b/‎src/edu/stanford/nlp/optimization/InefficientSGDMinimizer.java
+154
@@ -503,7 +503,7 @@ public void useStochasticGradientDescent(){
   public void useStochasticGradientDescent(final double gainSGD, final int stochasticBatchSize){
     this.minimizerCreator = new Factory<Minimizer<DiffFunction>>() {
       public Minimizer<DiffFunction> create() {
-        return new SGDMinimizer<DiffFunction>(gainSGD,stochasticBatchSize);
+        return new InefficientSGDMinimizer<DiffFunction>(gainSGD,stochasticBatchSize);
       }
     };
   }
@@ -515,15 +515,15 @@ public void useInPlaceStochasticGradientDescent() {
   public void useInPlaceStochasticGradientDescent(final int SGDPasses, final int tuneSampleSize, final double sigma) {
     this.minimizerCreator = new Factory<Minimizer<DiffFunction>>() {
       public Minimizer<DiffFunction> create() {
-        return new StochasticInPlaceMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);
+        return new SGDMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);
       }
     };
   }
 
   public void useHybridMinimizerWithInPlaceSGD(final int SGDPasses, final int tuneSampleSize, final double sigma) {
     this.minimizerCreator = new Factory<Minimizer<DiffFunction>>() {
       public Minimizer<DiffFunction> create() {
-        Minimizer<DiffFunction> firstMinimizer = new StochasticInPlaceMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);
+        Minimizer<DiffFunction> firstMinimizer = new SGDMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);
         Minimizer<DiffFunction> secondMinimizer = new QNMinimizer(mem);
         return new HybridMinimizer(firstMinimizer, secondMinimizer, SGDPasses);
       }
 
@@ -87,8 +87,8 @@ public abstract class AbstractSequenceClassifier<IN extends CoreMap> implements
   private CoreTokenFactory<IN> tokenFactory;
   public int windowSize;
   // different threads can add or query knownLCWords at the same time,
-  // so we need a concurrent data structure.  created in reinit()
-  protected Set<String> knownLCWords = null;
+  // so we need a concurrent data structure
+  protected Set<String> knownLCWords = Collections.newSetFromMap(new ConcurrentHashMap<String,Boolean>());
 
   private DocumentReaderAndWriter<IN> defaultReaderAndWriter;
   public DocumentReaderAndWriter<IN> defaultReaderAndWriter() {
@@ -165,12 +165,6 @@ protected final void reinit() {
     } else {
       plainTextReaderAndWriter = makePlainTextReaderAndWriter();
     }
-
-    if (!flags.useKnownLCWords) {
-      knownLCWords = Collections.emptySet();
-    } else if (knownLCWords == null || knownLCWords.size() == 0) {
-      knownLCWords = Collections.newSetFromMap(new ConcurrentHashMap<String,Boolean>());
-    }
   }
 
   /**
 
@@ -2563,8 +2563,8 @@ public Minimizer getMinimizer(int featurePruneIteration, Evaluator[] evaluators)
         ((QNMinimizer) minimizer).useOWLQN(flags.useOWLQN, flags.priorLambda);
       }
     } else if (flags.useInPlaceSGD) {
-      StochasticInPlaceMinimizer<DiffFunction> sgdMinimizer =
-              new StochasticInPlaceMinimizer<DiffFunction>(flags.sigma, flags.SGDPasses, flags.tuneSampleSize, flags.stochasticBatchSize);
+      SGDMinimizer<DiffFunction> sgdMinimizer =
+              new SGDMinimizer<DiffFunction>(flags.sigma, flags.SGDPasses, flags.tuneSampleSize, flags.stochasticBatchSize);
       if (flags.useSGDtoQN) {
         QNMinimizer qnMinimizer;
         int QNmem;
@@ -2598,7 +2598,7 @@ public Minimizer getMinimizer(int featurePruneIteration, Evaluator[] evaluators)
       minimizer = new SMDMinimizer(flags.initialGain, flags.stochasticBatchSize, flags.stochasticMethod,
           flags.SGDPasses);
     } else if (flags.useSGD) {
-      minimizer = new SGDMinimizer(flags.initialGain, flags.stochasticBatchSize);
+      minimizer = new InefficientSGDMinimizer(flags.initialGain, flags.stochasticBatchSize);
     } else if (flags.useScaledSGD) {
       minimizer = new ScaledSGDMinimizer(flags.initialGain, flags.stochasticBatchSize, flags.SGDPasses,
           flags.scaledSGDMethod);
 
@@ -0,0 +1,154 @@
+package edu.stanford.nlp.optimization;
+
+import edu.stanford.nlp.util.Pair;
+
+/**
+ * Stochastic Gradient Descent Minimizer.
+ *
+ *
+ * The basic way to use the minimizer is with a null constructor, then
+ * the simple minimize method:
+ * <p/>
+ * <p><code>Minimizer smd = new InefficientSGDMinimizer();</code>
+ * <br><code>DiffFunction df = new SomeDiffFunction(); //Note that it must be a incidence of AbstractStochasticCachingDiffFunction</code>
+ * <br><code>double tol = 1e-4;</code>
+ * <br><code>double[] initial = getInitialGuess();</code>
+ * <br><code>int maxIterations = someSafeNumber;
+ * <br><code>double[] minimum = qnm.minimize(df,tol,initial,maxIterations);</code>
+ * <p/>
+ * Constructing with a null constructor will use the default values of
+ * <p>
+ * <br><code>batchSize = 15;</code>
+ * <br><code>initialGain = 0.1;</code>
+ * <p/>
+ * <br> NOTE: This class was previously called SGDMinimizer. SGDMinimizer is now what was StochasticInPlaceMinimizer. New projects should use that class.
+ * <p/>
+ *
+ * @author <a href="mailto:[email protected]">Alex Kleeman</a>
+ * @version 1.0
+ * @since 1.0
+ */
+public class InefficientSGDMinimizer<T extends Function> extends StochasticMinimizer<T> {
+
+
+  @Override
+  public void shutUp() {
+    this.quiet = true;
+  }
+
+  public void setBatchSize(int batchSize) {
+    bSize = batchSize;
+  }
+
+  public InefficientSGDMinimizer() {
+  }
+
+  public InefficientSGDMinimizer(double SGDGain, int batchSize){
+    this(SGDGain,batchSize,50);
+  }
+
+  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes){
+    this(SGDGain,batchSize,passes,Long.MAX_VALUE,false);
+  }
+
+  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, boolean outputToFile){
+    this(SGDGain, batchSize, passes, Long.MAX_VALUE ,outputToFile );
+  }
+
+  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, long maxTime){
+    this(SGDGain,batchSize,passes,maxTime,false);
+  }
+
+  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, long maxTime, boolean outputToFile){
+    bSize = batchSize;
+    gain = SGDGain;
+    this.numPasses = passes;
+    this.outputIterationsToFile = outputToFile;
+    this.maxTime = maxTime;
+  }
+
+
+  @Override
+  protected String getName(){
+    int g = (int) gain*1000;
+      return "SGD" + bSize + "_g" + g;
+  }
+
+
+  public Pair <Integer,Double> tune(Function function, double[] initial,long msPerTest,double gainLow,double gainHigh){
+    this.quiet = true;
+    gain = tuneGain(function, initial, msPerTest, gainLow,gainHigh);
+    bSize = tuneBatch(function,initial,msPerTest,1);
+
+    return new Pair<Integer,Double>(bSize, gain);
+  }
+
+  @Override
+  public Pair<Integer,Double> tune(Function function,double[] initial, long msPerTest){
+    return this.tune(function, initial, msPerTest, 1e-7,1.0);
+
+  }
+
+  @Override
+  protected void takeStep(AbstractStochasticCachingDiffFunction dfunction){
+    for(int i = 0; i < x.length; i++){
+      newX[i] = x[i] - gain*gainSchedule(k,5*numBatches)*grad[i];
+    }
+  }
+
+
+
+
+
+
+  public static void main(String[] args) {
+    // optimizes test function using doubles and floats
+    // test function is (0.5 sum(x_i^2 * var_i)) ^ PI
+    // where var is a vector of random nonnegative numbers
+    // dimensionality is variable.
+    final int dim = 500000;
+    final double maxVar = 5;
+    final double[] var = new double[dim];
+    double[] init = new double[dim];
+
+    for (int i = 0; i < dim; i++) {
+      init[i] = ((i + 1) / (double) dim - 0.5);//init[i] = (Math.random() - 0.5);
+      var[i] = maxVar * (i + 1) / dim;
+    }
+
+    final double[] grads = new double[dim];
+
+    final DiffFunction f = new DiffFunction() {
+      @Override
+      public double[] derivativeAt(double[] x) {
+        double val = Math.PI * valuePow(x, Math.PI - 1);
+        for (int i = 0; i < dim; i++) {
+          grads[i] = x[i] * var[i] * val;
+        }
+        return grads;
+      }
+
+      @Override
+      public double valueAt(double[] x) {
+        return 1.0 + valuePow(x, Math.PI);
+      }
+
+      private double valuePow(double[] x, double pow) {
+        double val = 0.0;
+        for (int i = 0; i < dim; i++) {
+          val += x[i] * x[i] * var[i];
+        }
+        return Math.pow(val * 0.5, pow);
+      }
+
+      @Override
+      public int domainDimension() {
+        return dim;
+      }
+    };
+
+    InefficientSGDMinimizer<DiffFunction> min = new InefficientSGDMinimizer<DiffFunction>();
+    min.minimize(f, 1.0E-4, init);
+  }
+
+}
Original file line number	Diff line number	Diff line change
`@@ -503,7 +503,7 @@ public void useStochasticGradientDescent(){`
`503`	`503`	`public void useStochasticGradientDescent(final double gainSGD, final int stochasticBatchSize){`
`504`	`504`	`this.minimizerCreator = new Factory<Minimizer<DiffFunction>>() {`
`505`	`505`	`public Minimizer<DiffFunction> create() {`
`506`		`- return new SGDMinimizer<DiffFunction>(gainSGD,stochasticBatchSize);`
	`506`	`+ return new InefficientSGDMinimizer<DiffFunction>(gainSGD,stochasticBatchSize);`
`507`	`507`	`}`
`508`	`508`	`};`
`509`	`509`	`}`
`@@ -515,15 +515,15 @@ public void useInPlaceStochasticGradientDescent() {`
`515`	`515`	`public void useInPlaceStochasticGradientDescent(final int SGDPasses, final int tuneSampleSize, final double sigma) {`
`516`	`516`	`this.minimizerCreator = new Factory<Minimizer<DiffFunction>>() {`
`517`	`517`	`public Minimizer<DiffFunction> create() {`
`518`		`- return new StochasticInPlaceMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);`
	`518`	`+ return new SGDMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);`
`519`	`519`	`}`
`520`	`520`	`};`
`521`	`521`	`}`
`522`	`522`
`523`	`523`	`public void useHybridMinimizerWithInPlaceSGD(final int SGDPasses, final int tuneSampleSize, final double sigma) {`
`524`	`524`	`this.minimizerCreator = new Factory<Minimizer<DiffFunction>>() {`
`525`	`525`	`public Minimizer<DiffFunction> create() {`
`526`		`- Minimizer<DiffFunction> firstMinimizer = new StochasticInPlaceMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);`
	`526`	`+ Minimizer<DiffFunction> firstMinimizer = new SGDMinimizer<DiffFunction>(sigma, SGDPasses, tuneSampleSize);`
`527`	`527`	`Minimizer<DiffFunction> secondMinimizer = new QNMinimizer(mem);`
`528`	`528`	`return new HybridMinimizer(firstMinimizer, secondMinimizer, SGDPasses);`
`529`	`529`	`}`