Skip to content

Commit 5412765

Browse files
acvogelStanford NLP
authored and
Stanford NLP
committed
Merge branch 'master' of jamie.stanford.edu:/u/nlp/git/javanlp
1 parent 3080af5 commit 5412765

File tree

4 files changed

+630
-4
lines changed

4 files changed

+630
-4
lines changed

src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java

+8-2
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ public abstract class AbstractSequenceClassifier<IN extends CoreMap> implements
8787
private CoreTokenFactory<IN> tokenFactory;
8888
public int windowSize;
8989
// different threads can add or query knownLCWords at the same time,
90-
// so we need a concurrent data structure
91-
protected Set<String> knownLCWords = Collections.newSetFromMap(new ConcurrentHashMap<String,Boolean>());
90+
// so we need a concurrent data structure. created in reinit()
91+
protected Set<String> knownLCWords = null;
9292

9393
private DocumentReaderAndWriter<IN> defaultReaderAndWriter;
9494
public DocumentReaderAndWriter<IN> defaultReaderAndWriter() {
@@ -165,6 +165,12 @@ protected final void reinit() {
165165
} else {
166166
plainTextReaderAndWriter = makePlainTextReaderAndWriter();
167167
}
168+
169+
if (!flags.useKnownLCWords) {
170+
knownLCWords = Collections.emptySet();
171+
} else if (knownLCWords == null || knownLCWords.size() == 0) {
172+
knownLCWords = Collections.newSetFromMap(new ConcurrentHashMap<String,Boolean>());
173+
}
168174
}
169175

170176
/**

src/edu/stanford/nlp/sequences/ObjectBankWrapper.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ private void doBasicStuff(List<IN> doc) {
136136
if (flags.wordFunction != null) {
137137
word = flags.wordFunction.apply(word);
138138
}
139-
if (word.length() > 0) {
139+
if (flags.useKnownLCWords && word.length() > 0) {
140140
char ch = word.charAt(0);
141141
if (Character.isLowerCase(ch)) {
142142
knownLCWords.add(word);

src/edu/stanford/nlp/sequences/SeqClassifierFlags.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,7 @@ public class SeqClassifierFlags implements Serializable {
996996
public boolean useHardGE = false;
997997
public boolean useCRFforUnsup = false;
998998
public boolean useGEforSup = false;
999+
public boolean useKnownLCWords = true;
9991000

10001001
// "ADD VARIABLES ABOVE HERE"
10011002

@@ -2478,7 +2479,8 @@ public void setProperties(Properties props, boolean printProps) {
24782479
useCRFforUnsup = Boolean.parseBoolean(val);
24792480
} else if (key.equalsIgnoreCase("useGEforSup")){
24802481
useGEforSup = Boolean.parseBoolean(val);
2481-
2482+
} else if (key.equalsIgnoreCase("useKnownLCWords")){
2483+
useKnownLCWords = Boolean.parseBoolean(val);
24822484
// ADD VALUE ABOVE HERE
24832485
} else if (key.length() > 0 && !key.equals("prop")) {
24842486
System.err.println("Unknown property: |" + key + '|');

0 commit comments

Comments
 (0)