answeclass

kalyanp · Sep 9, 2014 · 4a6f72f · 4a6f72f
1 parent df2c5b0
commit 4a6f72f
Show file tree

Hide file tree

Showing 625 changed files with 23,259 additions and 103,345 deletions.
diff --git a/JavaNLP-core.eml b/JavaNLP-core.eml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<component inheritJdk="true">
+<component LANGUAGE_LEVEL="JDK_1_6" inheritJdk="true">
 	<exclude-output/>
 	<contentEntry url="file://$MODULE_DIR$">
 		<testFolder url="file://$MODULE_DIR$/test/src"/>

diff --git a/build.xml b/build.xml
@@ -15,8 +15,8 @@
   <property name="compile.debug"       value="true"/>
   <property name="compile.deprecation" value="false"/>
   <property name="compile.optimize"    value="true"/>
-  <property name="compile.source"      value="1.8" />
-  <property name="compile.target"      value="1.8" />
+  <property name="compile.source"      value="1.7" />
+  <property name="compile.target"      value="1.7" />
   <property name="compile.encoding"    value="utf-8" />
 
   <target name="classpath" description="Sets the classpath">
@@ -305,11 +305,7 @@
     <buildjsp webapp.path="edu/stanford/nlp/parser/webapp"
               webapp.war="parser.war"
               webapp.jar="javanlp-core.jar">
-      <webapp.lib>
-        <lib dir="/u/nlp/data/StanfordCoreNLPModels">
-          <include name="stanford-spanish-corenlp-models-current.jar"/>
-        </lib>
-      </webapp.lib>
+      <webapp.lib/> <!-- don't need anything! -->
       <webapp.data>
         <zipfileset prefix="WEB-INF/data"
                     file="/u/nlp/data/lexparser/englishPCFG.ser.gz"/>
@@ -321,8 +317,6 @@
                     file="/u/nlp/data/gale/segtool/stanford-seg/classifiers-2010/05202008-ctb6.processed-chris6.lex.gz"/>
         <zipfileset prefix="WEB-INF/data/chinesesegmenter"
                     dir="/u/nlp/data/gale/segtool/stanford-seg/releasedata"/>
-        <zipfileset prefix="WEB-INF/data"
-                    file="/u/nlp/data/lexparser/spanishPCFG.ser.gz"/>
         <zipfileset file="${data.path}/webapps/favicon.ico"/>
       </webapp.data>
     </buildjsp>
@@ -370,15 +364,17 @@
       </lib>
       <lib dir="${basedir}/lib">
         <include name="commons-lang3-3.1.jar"/>
-        <include name="xom-1.2.10.jar"/>
+        <include name="xom-1.2.8.jar"/>
+        <include name="xalan.jar"/>
+        <include name="serializer.jar"/>
+        <include name="xercesImpl.jar"/>
         <include name="xml-apis.jar"/>
         <include name="joda-time.jar"/>
         <include name="jollyday-0.4.7.jar"/>
       </lib>
       <!-- note for John: c:/Users/John Bauer/nlp/stanford-releases -->
       <lib dir="/u/nlp/data/StanfordCoreNLPModels">
         <include name="stanford-corenlp-models-current.jar"/>
-        <include name="stanford-chinese-corenlp-models-current.jar"/>
       </lib>
       <classes dir="${source.path}/edu/stanford/nlp/pipeline">
         <include name="StanfordCoreNLP.properties"/>

diff --git a/data/edu/stanford/nlp/patterns/surface/example.properties b/data/edu/stanford/nlp/patterns/surface/example.properties
@@ -1,4 +1,4 @@
-#### NOTE: for all flags and their description, see the javadoc. Important parameters (in our experience) that you should tune for your dataset are marked with ***
+#### NOTE: for all flags and their description, see the javadoc. Important parameters (in our experience) that you should tune for your dataset are marked with *** 
 
 #name for the saved files for the output of the system (useful for comparing results of different experiments with different variables etc
 identifier=useNERRestriction
@@ -30,7 +30,7 @@ file=${DIR}/presidents.txt
 #fileFormat=ser
 #file= ${DIR}/presidents_sents.ser
 
-#We are learning names of presidential candidates, places, and other names
+#We are learning names of presidential candidates, places, and other names 
 seedWordsFiles=NAME,${DIR}/names.txt;PLACE,${DIR}/places.txt;OTHER,${DIR}/otherpeople.txt
 
 #You can evaluate two ways; both presented here.
@@ -74,13 +74,13 @@ usePOS4Pattern = true
 #Ignore words {a, an, the} while matching the patterns to text (advisable true)
 useFillerWordsInPat = false
 
-#***Specific allowed tags' initials for the target phrase for each label while creating the patterns (if not specified, every tag is acceptable to create a pattern). Tag initials can be written as N or NN or J or N,J etc. E.g.: NAME,N,J;PLACE,N. If
+#***Specific allowed tags' initials for the target phrase for each label while creating the patterns (if not specified, every tag is acceptable to create a pattern). Tag initials can be written as N or NN or J or N,J etc. E.g.: NAME,N,J;PLACE,N. If 
 targetAllowedTagsInitialsStr=NAME,N;OTHER,N
 
 #You can save all possible patterns for all tokens in the flag allPatternsFile so you wouldn't need to calculate them everytime.
 computeAllPatterns = true
 
-#Save or read (if computeAllPatterns is false) from here
+#Save or read (if computeAllPatterns is false) from here 
 allPatternsFile= ${DIR}/${identifier}_allpatterns.ser
 
 #***maximum Num of allowed words in the target phrase
@@ -127,7 +127,7 @@ usePatternResultAsLabel=true
 #remove common stop words from phrases to get clean phrases (for example, "disease" instead of "some disease")
 removeStopWordsFromSelectedPhrases = true
 
-#Do not learn phrases that have any stop word
+#Do not learn phrases that have any stop word 
 removePhrasesWithStopWords = false
 
 
@@ -182,10 +182,10 @@ useAvgInsteadofMinPhraseScoring=true
 #only if wordClassClusterFile is provided
 usePhraseEvalWordClass=false
 
-#tf-idf scoring w.r.t to the domain
+#tf-idf scoring w.r.t to the domain 
 usePhraseEvalDomainNgram=false
 
-#use pattern weights in scoring phrases extracted by them, if usePhraseEvalPatWtByFreq is true. otherwise it's just a tfidf like score
+#use pattern weights in scoring phrases extracted by them, if usePhraseEvalPatWtByFreq is true. otherwise it's just a tfidf like score 
 usePatternWeights=true
 
 #basically patwt/log(freq), patwt = 1 if usePatternWeights is false
@@ -231,8 +231,8 @@ debug = 3
 #stop words file
 stopWordsPatternFiles=${DIR}/stopwords.txt
 
-englishWordsFiles=${stopWordsPatternFiles}
-commonWordsPatternFiles= ${stopWordsPatternFiles}
+englishWordsFiles=${stopWordsPatternFiles} 
+commonWordsPatternFiles= ${stopWordsPatternFiles} 
 #You can give some common words like this
 #commonWordsPatternFiles =${DIR}/lists/commonEngWords1k.txt
 

diff --git a/doc/classify/README.txt b/doc/classify/README.txt
@@ -1,4 +1,4 @@
-Stanford Classifier v3.4.1 - 2014-08-27
+Stanford Classifier v3.4 - 2014-06-16
 -------------------------------------------------
 
 Copyright (c) 2003-2012 The Board of Trustees of 
@@ -76,8 +76,6 @@ LICENSE
 CHANGES
 -------------------------
 
-2014-08-27    3.4.1     Update for compatibility 
-
 2014-06-16      3.4     Update for compatibility 
 
 2014-01-04    3.3.1     Bugfix release 

diff --git a/doc/corenlp/README.txt b/doc/corenlp/README.txt
@@ -42,8 +42,6 @@ LICENSE
 CHANGES
 ---------------------------------
 
-2014-08-27    3.4.1     Add Spanish models 
-
 2014-06-16      3.4     Add shift reduce parser 
 
 2014-01-04    3.3.1     Bugfix release

diff --git a/doc/corenlp/pom-full.xml b/doc/corenlp/pom-full.xml
@@ -2,7 +2,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>edu.stanford.nlp</groupId>
   <artifactId>stanford-corenlp</artifactId>
-  <version>3.4.1</version>
+  <version>3.4</version>
   <packaging>jar</packaging>
   <name>Stanford CoreNLP</name>
   <description>Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.</description>
@@ -14,8 +14,8 @@
     </license>
   </licenses>
   <scm>
-    <url>http://nlp.stanford.edu/software/stanford-corenlp-2014-08-27.zip</url>
-    <connection>http://nlp.stanford.edu/software/stanford-corenlp-2014-08-27.zip</connection>
+    <url>http://nlp.stanford.edu/software/stanford-corenlp-2014-06-16.zip</url>
+    <connection>http://nlp.stanford.edu/software/stanford-corenlp-2014-06-16.zip</connection>
   </scm>
   <developers>
     <developer>

diff --git a/doc/lexparser/README.txt b/doc/lexparser/README.txt
@@ -1,4 +1,4 @@
-Stanford Lexicalized Parser v3.4.1 - 2014-08-27
+Stanford Lexicalized Parser v3.4 - 2014-06-16
 -----------------------------------------------
 
 Copyright (c) 2002-2012 The Board of Trustees of The Leland Stanford Junior
@@ -206,8 +206,6 @@ LICENSE
 CHANGES
 ---------------------------------
 
-2014-08-27    3.4.1     Add Spanish models 
-
 2014-06-16      3.4     Shift-reduce parser 
 
 2014-01-04    3.3.1     Bugfix release, dependency improvements 

diff --git a/doc/lexparser/pom.xml b/doc/lexparser/pom.xml
@@ -2,7 +2,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>edu.stanford.nlp</groupId>
   <artifactId>stanford-parser</artifactId>
-  <version>3.4.1</version>
+  <version>3.4</version>
   <packaging>jar</packaging>
   <name>Stanford Parser</name>
   <description>Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.</description>
@@ -14,8 +14,8 @@
     </license>
   </licenses>
   <scm>
-    <url>http://nlp.stanford.edu/software/stanford-parser-2014-08-27.zip</url>
-    <connection>http://nlp.stanford.edu/software/stanford-parser-2014-08-27.zip</connection>
+    <url>http://nlp.stanford.edu/software/stanford-parser-2014-06-16.zip</url>
+    <connection>http://nlp.stanford.edu/software/stanford-parser-2014-06-16.zip</connection>
   </scm>
   <developers>
     <developer>

diff --git a/doc/ner/README.txt b/doc/ner/README.txt
@@ -1,4 +1,4 @@
-Stanford NER - v3.4.1 - 2014-08-27
+Stanford NER - v3.4 - 2014-06-16
 ----------------------------------------------
 
 This package provides a high-performance machine learning based named
@@ -165,8 +165,6 @@ PERSON	ORGANIZATION	LOCATION
 CHANGES
 --------------------
 
-2014-08-27    3.4.1     Add Spanish models 
-
 2014-06-16      3.4     Fix serialization bug 
 
 2014-01-04    3.3.1     Bugfix release
-Original file line number
+Diff line change
@@ Expand Up / @@ -42,8 +42,6 @@ LICENSE @@
     CHANGES
     ---------------------------------
--08-27    3.4.1     Add Spanish models
 -06-16      3.4     Add shift reduce parser
 -01-04    3.3.1     Bugfix release
@@ Expand Down @@