kalyanp
diff --git a/‎CONTRIBUTING.md
+3-3 b/‎CONTRIBUTING.md
+3-3
diff --git a/‎data/edu/stanford/nlp/classify/iris.gold
+20-20 b/‎data/edu/stanford/nlp/classify/iris.gold
+20-20
diff --git a/‎data/edu/stanford/nlp/classify/iris.l1reg.gold
+20-20 b/‎data/edu/stanford/nlp/classify/iris.l1reg.gold
+20-20
diff --git a/‎data/edu/stanford/nlp/dcoref/expected.txt
+9-9 b/‎data/edu/stanford/nlp/dcoref/expected.txt
+9-9
diff --git a/‎test/src/edu/stanford/nlp/neural/EmbeddingTest.java ‎itest/src/edu/stanford/nlp/neural/EmbeddingITest.java
+4-5 b/‎test/src/edu/stanford/nlp/neural/EmbeddingTest.java ‎itest/src/edu/stanford/nlp/neural/EmbeddingITest.java
+4-5
diff --git a/‎itest/src/edu/stanford/nlp/pipeline/DeterministicCorefAnnotatorITest.java
+2-1 b/‎itest/src/edu/stanford/nlp/pipeline/DeterministicCorefAnnotatorITest.java
+2-1
diff --git a/‎scripts/makeSerialized.csh
+1-1 b/‎scripts/makeSerialized.csh
+1-1
diff --git a/‎scripts/pos-tagger/Makefile
+72 b/‎scripts/pos-tagger/Makefile
+72
@@ -10,7 +10,7 @@ In order for us to continue to be able to dual-license Stanford CoreNLP, we need
 Therefore, we can accept contributions on any of the following terms:
  * If your contribution is a bug fix of 6 lines or less of new code, we will accept it on the basis that both you and us regard the contribution as de minimis, and not requiring further hassle.
  * You can declare that the contribution is in the public domain (in your commit message or pull request).
- * You can make your contribution available under a non-restrictive open source licensing, such as the Revised (or 3-clause) BSD license, with appropriate licensing information included with the submitted code.
- * You can sign and return to us a contributor license agreement, explicitly licensing us to be able to use the code. Contact us at: [email protected] .
+ * You can make your contribution available under a non-restrictive open source license, such as the Revised (or 3-clause) BSD license, with appropriate licensing information included with the submitted code.
+ * You can sign and return to us a contributor license agreement (CLA), explicitly licensing us to be able to use the code. You can find these agreements at http://nlp.stanford.edu/software/CLA/ . You can send them to us or contact us at: [email protected] .
 
-You should do development against our master branch. You should make sure that all unit tests still pass. (In general, you will not be able to run our integration tests, since they rely on resources in our filesystem.)
+You should do development against our master branch. The project's source code is in utf-8 character encoding. You should make sure that all unit tests still pass. (In general, you will not be able to run our integration tests, since they rely on resources in our filesystem.)
@@ -1,20 +1,20 @@
-5	Iris-setosa	Iris-setosa	0.995615365125735
-4.6	Iris-setosa	Iris-setosa	0.9994804135630505
-5.1	Iris-setosa	Iris-setosa	0.9937095680980086
-4.9	Iris-setosa	Iris-setosa	0.9905109629700247
-5.4	Iris-setosa	Iris-setosa	0.9982151488134486
-4.4	Iris-setosa	Iris-setosa	0.9944214428148407
-5.3	Iris-setosa	Iris-setosa	0.9984497925740373
-6.1	Iris-versicolor	Iris-versicolor	0.8873152482428373
-6	Iris-versicolor	Iris-versicolor	0.9424246013278404
-5.5	Iris-versicolor	Iris-versicolor	0.9030026595536319
-6.5	Iris-versicolor	Iris-versicolor	0.928816167001929
-6.8	Iris-versicolor	Iris-versicolor	0.9569376555329442
-6.2	Iris-versicolor	Iris-versicolor	0.9857141927233324
-6.7	Iris-virginica	Iris-virginica	0.9698639532763317
-6.4	Iris-virginica	Iris-virginica	0.8982390073296296
-5.7	Iris-virginica	Iris-virginica	0.9920401400173403
-6.7	Iris-virginica	Iris-virginica	0.968576539063806
-6.8	Iris-virginica	Iris-virginica	0.9957320369272686
-7.7	Iris-virginica	Iris-virginica	0.9900526044768513
-7.3	Iris-virginica	Iris-virginica	0.9766204287594443
+5	Iris-setosa	Iris-setosa	0.996	0.996
+4.6	Iris-setosa	Iris-setosa	0.999	0.999
+5.1	Iris-setosa	Iris-setosa	0.994	0.994
+4.9	Iris-setosa	Iris-setosa	0.991	0.991
+5.4	Iris-setosa	Iris-setosa	0.998	0.998
+4.4	Iris-setosa	Iris-setosa	0.994	0.994
+5.3	Iris-setosa	Iris-setosa	0.998	0.998
+6.1	Iris-versicolor	Iris-versicolor	0.887	0.887
+6	Iris-versicolor	Iris-versicolor	0.942	0.942
+5.5	Iris-versicolor	Iris-versicolor	0.903	0.903
+6.5	Iris-versicolor	Iris-versicolor	0.929	0.929
+6.8	Iris-versicolor	Iris-versicolor	0.957	0.957
+6.2	Iris-versicolor	Iris-versicolor	0.986	0.986
+6.7	Iris-virginica	Iris-virginica	0.970	0.970
+6.4	Iris-virginica	Iris-virginica	0.898	0.898
+5.7	Iris-virginica	Iris-virginica	0.992	0.992
+6.7	Iris-virginica	Iris-virginica	0.969	0.969
+6.8	Iris-virginica	Iris-virginica	0.996	0.996
+7.7	Iris-virginica	Iris-virginica	0.990	0.990
+7.3	Iris-virginica	Iris-virginica	0.977	0.977
@@ -1,20 +1,20 @@
-5       Iris-setosa     Iris-setosa     0.9919247137755053
-4.6     Iris-setosa     Iris-setosa     0.9988153870786971
-5.1     Iris-setosa     Iris-setosa     0.9893228231715544
-4.9     Iris-setosa     Iris-setosa     0.9835318845429561
-5.4     Iris-setosa     Iris-setosa     0.9960427411240634
-4.4     Iris-setosa     Iris-setosa     0.9910859075339642
-5.3     Iris-setosa     Iris-setosa     0.9965862883009643
-6.1     Iris-versicolor Iris-versicolor 0.8468902641192759
-6       Iris-versicolor Iris-versicolor 0.9307517829994151
-5.5     Iris-versicolor Iris-versicolor 0.7982164305911292
-6.5     Iris-versicolor Iris-versicolor 0.873020490772672
-6.8     Iris-versicolor Iris-versicolor 0.9142958840729118
-6.2     Iris-versicolor Iris-versicolor 0.9691329948474605
-6.7     Iris-virginica  Iris-virginica  0.9514065325627161
-6.4     Iris-virginica  Iris-virginica  0.8326970803989662
-5.7     Iris-virginica  Iris-virginica  0.9861478471561218
-6.7     Iris-virginica  Iris-virginica  0.9281387678310443
-6.8     Iris-virginica  Iris-virginica  0.9869791941203433
-7.7     Iris-virginica  Iris-virginica  0.980694494307154
-7.3     Iris-virginica  Iris-virginica  0.9555631398239129
+5       Iris-setosa     Iris-setosa     0.992	0.992
+4.6     Iris-setosa     Iris-setosa     0.999	0.999
+5.1     Iris-setosa     Iris-setosa     0.989	0.989
+4.9     Iris-setosa     Iris-setosa     0.984	0.984
+5.4     Iris-setosa     Iris-setosa     0.996	0.996
+4.4     Iris-setosa     Iris-setosa     0.991	0.991
+5.3     Iris-setosa     Iris-setosa     0.997	0.997
+6.1     Iris-versicolor Iris-versicolor 0.847	0.847
+6       Iris-versicolor Iris-versicolor 0.931	0.931
+5.5     Iris-versicolor Iris-versicolor 0.798	0.798
+6.5     Iris-versicolor Iris-versicolor 0.873	0.873
+6.8     Iris-versicolor Iris-versicolor 0.914	0.914
+6.2     Iris-versicolor Iris-versicolor 0.969	0.969
+6.7     Iris-virginica  Iris-virginica  0.951	0.951
+6.4     Iris-virginica  Iris-virginica  0.833	0.833
+5.7     Iris-virginica  Iris-virginica  0.986	0.986
+6.7     Iris-virginica  Iris-virginica  0.928	0.928
+6.8     Iris-virginica  Iris-virginica  0.987	0.987
+7.7     Iris-virginica  Iris-virginica  0.981	0.981
+7.3     Iris-virginica  Iris-virginica  0.956	0.956
@@ -1,15 +1,15 @@
 CONLL EVAL SUMMARY (Before COREF)
-Identification of Mentions: Recall: (12407 / 14291) 86.81%  Precision: (12407 / 34999) 35.44% F1: 50.34%
+Identification of Mentions: Recall: (12407 / 14291) 86.81%	Precision: (12407 / 34999) 35.44%	F1: 50.34%
 
 CONLL EVAL SUMMARY (After COREF)
-METRIC muc:Coreference: Recall: (6260 / 10539) 59.39% Precision: (6260 / 10027) 62.43% F1: 60.87%
-METRIC bcub:Coreference: Recall: (12379.37 / 18298) 67.65% Precision: (13598.84 / 18298) 74.31% F1: 70.83%
-METRIC ceafm:Coreference: Recall: (10894 / 18298) 59.53% Precision: (10894 / 18298) 59.53% F1: 59.53%
-METRIC ceafe:Coreference: Recall: (3811.5 / 7759) 49.12%	Precision: (3811.5 / 8271) 46.08%	F1: 47.55%
-METRIC blanc:Coreference links: Recall: (25257 / 54427) 46.4% Precision: (25257 / 40544) 62.29% F1: 53.18%
-Non-coreference links: Recall: (922975 / 938262) 98.37%	Precision: (922975 / 952145) 96.93%	F1: 97.64%
-BLANC: Recall: (0.72 / 1) 72.38%	Precision: (0.8 / 1) 79.61%	F1: 75.41%
+METRIC muc:Coreference: Recall: (6256 / 10539) 59.36%     Precision: (6256 / 10078) 62.07%        F1: 60.68%
+METRIC bcub:Coreference: Recall: (12462.33 / 18385) 67.78%      Precision: (13629.92 / 18385) 74.13%    F1: 70.81%
+METRIC ceafm:Coreference: Recall: (10928 / 18385) 59.43%        Precision: (10928 / 18385) 59.43%       F1: 59.43%
+METRIC ceafe:Coreference: Recall: (3832.95 / 7846) 48.85%       Precision: (3832.95 / 8307) 46.14%      F1: 47.45%
+METRIC blanc:Coreference links: Recall: (25245 / 54427) 46.38% Precision: (25245 / 40608) 62.16% F1: 53.12%
+Non-coreference links: Recall: (932068 / 947431) 98.37% Precision: (932068 / 961250) 96.96%	F1: 97.66%
+BLANC: Recall: (0.72 / 1) 72.38% Precision: (0.8 / 1) 79.56% F1: 75.39%
 
-Final conll score ((muc+bcub+ceafe)/3) = 59.75
+Final conll score ((muc+bcub+ceafe)/3) = 59.65
 Final score (pairwise) Precision = 0.57
 done
@@ -13,11 +13,10 @@
  *
  */
 
-public class EmbeddingTest {
-  public static final String PREFIX = "projects/core/";
-  public static final String wordVectorFile = PREFIX + "data/edu/stanford/nlp/neural/wordVector.txt";
-  public static final String wordFile = PREFIX + "data/edu/stanford/nlp/neural/word.txt";
-  public static final String vectorFile = PREFIX + "data/edu/stanford/nlp/neural/vector.txt";
+public class EmbeddingITest {
+  public static final String wordVectorFile = "edu/stanford/nlp/neural/wordVector.txt";
+  public static final String wordFile = "edu/stanford/nlp/neural/word.txt";
+  public static final String vectorFile = "edu/stanford/nlp/neural/vector.txt";
 
   @Test
   public void testLoadFromOneFile() {
 
@@ -12,12 +12,12 @@
 import edu.stanford.nlp.ling.CoreAnnotations;
 import edu.stanford.nlp.ling.CoreLabel;
 import edu.stanford.nlp.dcoref.CorefCoreAnnotations;
-import edu.stanford.nlp.dcoref.CorefCoreAnnotations;
 import edu.stanford.nlp.util.CoreMap;
 
 public class DeterministicCorefAnnotatorITest extends TestCase {
   private static AnnotationPipeline pipeline;
 
+  @Override
   public void setUp() throws Exception {
     synchronized(DeterministicCorefAnnotatorITest.class) {
       pipeline = new AnnotationPipeline();
@@ -131,4 +131,5 @@ public static void main(String[] args) throws Exception {
     DeterministicCorefAnnotatorITest itest = new DeterministicCorefAnnotatorITest();
     itest.testDeterministicCorefAnnotator();
   }
+
 }
@@ -145,7 +145,7 @@ java -mx1500m edu.stanford.nlp.parser.lexparser.LexicalizedParser -evals "factDA
 # This now works
 ( echo "Running xinhuaFactored from serialized (check without specifying -tLPP) on $host -server" ; time java -server -mx1800m edu.stanford.nlp.parser.lexparser.LexicalizedParser -evals "factDA,tsv" -maxLength 40 -loadFromSerializedFile xinhuaFactored.ser.gz -test $ctb 001-025 ) >>& ./serializedParsers.log
 
-( echo "Running chinesePCFG (simplified for use in the RNN parser) on $host -server" ; time java -server -mx4g edu.stanford.nlp.parser.lexparser.LexicalizedParser -evals "factDA,tsv" -tLPP edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams -chineseFactored -PCFG -compactGrammar 0 -saveToSerializedFile chinesePCFG-simple.ser.gz -maxLength 40 -train $ctb7train -test $ctb7test ) >>& ./serializedParsers.log
+( echo "Running chinesePCFG (simplified for use in the RNN parser) on $host -server" ; time java -server -mx4g edu.stanford.nlp.parser.lexparser.LexicalizedParser -evals "factDA,tsv" -tLPP edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams -chineseFactored -PCFG -hMarkov 1 -nomarkNPconj -compactGrammar 0 -saveToSerializedFile chinesePCFG.simple.ser.gz -maxLength 40 -train $ctb7train -test $ctb7test ) >>& ./serializedParsers.log
 
 # German Factored binary from Negra (version 2)
 # $negra 3 is the dev set 
 
@@ -0,0 +1,72 @@
+# TODO: is there some way to make all of the targets use the same command?
+
+ARABIC_TEST = format=TREES,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Test.utf8.txt
+
+CHINESE_TEST = format=TREES,/u/nlp/data/chinese/ctb7/test.mrg
+
+ENGLISH_TEST = /u/nlp/data/pos-tagger/english/test-wsj-22-24
+
+FRENCH_TEST = format=TREES,/u/nlp/data/lexparser/trees/French/FTB-Test.utf8.txt
+
+GERMAN_TEST = /u/nlp/data/pos-tagger/german/german-dev.txt
+
+.SECONDEXPANSION:
+
+all: arabic chinese english french german testing wsj
+.PHONY: all arabic chinese english french german testing wsj
+
+arabic: arabic.tagger  arabic-train.tagger 
+
+# we release an arabic model trained on everything, with a
+# corresponding model on train only for testing purposes
+arabic.tagger arabic-train.tagger: $$@.props
+	@echo Training $@
+	@echo Will test on $(ARABIC_TEST)
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -props $@.props > $@.out 2>&1
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -model $@ -testFile $(ARABIC_TEST) -verboseResults false >> $@.out 2>&1
+
+chinese: chinese-distsim.tagger chinese-nodistsim.tagger 
+
+chinese-nodistsim.tagger chinese-distsim.tagger: $$@.props
+	@echo Training $@
+	@echo Will test on $(CHINESE_TEST) 
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -props $@.props > $@.out 2>&1
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -model $@ -testFile $(CHINESE_TEST)  -verboseResults false >> $@.out 2>&1
+
+english: english-bidirectional-distsim.tagger english-caseless-left3words-distsim.tagger english-left3words-distsim.tagger
+
+english-bidirectional-distsim.tagger english-caseless-left3words-distsim.tagger english-left3words-distsim.tagger: $$@.props
+	@echo Training $@
+	@echo Will test on $(ENGLISH_TEST) 
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -props $@.props > $@.out 2>&1
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -model $@ -testFile $(ENGLISH_TEST)  -verboseResults false >> $@.out 2>&1
+
+french: french.tagger
+
+french.tagger: $$@.props
+	@echo Training $@
+	@echo Will test on $(FRENCH_TEST) 
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -props $@.props > $@.out 2>&1
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -model $@ -testFile $(FRENCH_TEST)  -verboseResults false >> $@.out 2>&1
+
+german: german-dewac.tagger german-fast.tagger german-fast-caseless.tagger german-hgc.tagger
+
+german-dewac.tagger german-fast.tagger german-fast-caseless.tagger german-hgc.tagger: $$@.props
+	@echo Training $@
+	@echo Will test on $(GERMAN_TEST) 
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -props $@.props > $@.out 2>&1
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -model $@ -testFile $(GERMAN_TEST)  -verboseResults false >> $@.out 2>&1
+
+testing: testing.tagger
+
+testing.tagger:
+	@echo Training $@
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -props $@.props > $@.out 2>&1
+
+wsj: wsj-0-18-bidirectional-distsim.tagger  wsj-0-18-bidirectional-nodistsim.tagger  wsj-0-18-caseless-left3words-distsim.tagger  wsj-0-18-left3words-distsim.tagger wsj-0-18-left3words-nodistsim.tagger 
+
+wsj-0-18-bidirectional-distsim.tagger wsj-0-18-bidirectional-nodistsim.tagger wsj-0-18-caseless-left3words-distsim.tagger wsj-0-18-left3words-distsim.tagger wsj-0-18-left3words-nodistsim.tagger: $$@.props
+	@echo Training $@
+	@echo Will test on $(ENGLISH_TEST)
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -props $@.props > $@.out 2>&1
+	java -mx6g edu.stanford.nlp.tagger.maxent.MaxentTagger -model $@ -testFile $(ENGLISH_TEST) -verboseResults false >> $@.out 2>&1