Skip to content

Commit

Permalink
deal with pbs problems
Browse files Browse the repository at this point in the history
  • Loading branch information
J38 authored and Stanford NLP committed Nov 2, 2017
1 parent bdb301f commit b96959f
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 205 deletions.
139 changes: 54 additions & 85 deletions src/edu/stanford/nlp/coref/data/Mention.java
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ public Mention(int mentionID, int startIndex, int endIndex, List<CoreLabel> sent
// Mention is identified as being this speaker....
public SpeakerInfo speakerInfo;

private transient String spanString = null;
private transient String lowercaseNormalizedSpanString = null;
transient private String spanString = null;
transient private String lowercaseNormalizedSpanString = null;

public IntCounter<Integer> antecedentOrdering = new IntCounter<>();

Expand Down Expand Up @@ -303,17 +303,16 @@ private Gender getGender(Dictionaries dict, List<String> mStr) {
List<String> convertedStr = new ArrayList<>(2);
convertedStr.add(mStr.get(firstNameIdx));
convertedStr.add("!");
if (dict.genderNumber.containsKey(convertedStr)) return dict.genderNumber.get(convertedStr);
if(dict.genderNumber.containsKey(convertedStr)) return dict.genderNumber.get(convertedStr);

if (dict.genderNumber.containsKey(mStr.subList(firstNameIdx, firstNameIdx+1))) return dict.genderNumber.get(mStr.subList(firstNameIdx, firstNameIdx+1));
if(dict.genderNumber.containsKey(mStr.subList(firstNameIdx, firstNameIdx+1))) return dict.genderNumber.get(mStr.subList(firstNameIdx, firstNameIdx+1));
}

if (mStr.size() > 0 && dict.genderNumber.containsKey(mStr.subList(len-1, len))) return dict.genderNumber.get(mStr.subList(len-1, len));
if(mStr.size() > 0 && dict.genderNumber.containsKey(mStr.subList(len-1, len))) return dict.genderNumber.get(mStr.subList(len-1, len));
return null;
}

private void setDiscourse() {
// utter = headWord.get(CoreAnnotations.UtteranceAnnotation.class);
// utter = headWord.get(CoreAnnotations.UtteranceAnnotation.class);

Pair<IndexedWord, String> verbDependency = findDependentVerb(this);
String dep = verbDependency.second();
Expand All @@ -324,29 +323,20 @@ private void setDiscourse() {
isIndirectObject = false;
isPrepositionObject = false;

if (dep != null) {
switch(dep) {
case "nsubj":
case "csubj":
isSubject = true;
break;
case "dobj":
case "nsubjpass":
case "nsubj:pass":
isDirectObject = true;
break;
case "iobj":
isIndirectObject = true;
break;
default:
if (dep.startsWith("nmod")
&& !dep.equals("nmod:npmod")
&& !dep.equals("nmod:tmod")
&& !dep.equals("nmod:poss")
&& !dep.equals("nmod:agent")) {
isPrepositionObject = true;
}
}
if(dep==null) {
return;
} else if(dep.equals("nsubj") || dep.equals("csubj")) {
isSubject = true;
} else if(dep.equals("dobj") || dep.equals("nsubjpass")){
isDirectObject = true;
} else if(dep.equals("iobj")){
isIndirectObject = true;
} else if(dep.startsWith("nmod")
&& ! dep.equals("nmod:npmod")
&& ! dep.equals("nmod:tmod")
&& ! dep.equals("nmod:poss")
&& ! dep.equals("nmod:agent")) {
isPrepositionObject = true;
}
}

Expand Down Expand Up @@ -633,66 +623,45 @@ private void setAnimacy(Dictionaries dict) {
} else {
animacy = Animacy.UNKNOWN;
}
} else if (nerString.equals("PERSON") || nerString.startsWith("PER")) {
animacy = Animacy.ANIMATE;
} else if (nerString.equals("LOCATION")|| nerString.startsWith("LOC")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("MONEY")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("NUMBER")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("PERCENT")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("DATE")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("TIME")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("MISC")) {
animacy = Animacy.UNKNOWN;
} else if (nerString.startsWith("VEH")) {
animacy = Animacy.UNKNOWN;
} else if (nerString.startsWith("FAC")) {
animacy = Animacy.INANIMATE;
} else if (nerString.startsWith("GPE")) {
animacy = Animacy.INANIMATE;
} else if (nerString.startsWith("WEA")) {
animacy = Animacy.INANIMATE;
} else if (nerString.startsWith("ORG")) {
animacy = Animacy.INANIMATE;
} else {
switch(nerString) {
case "PERSON":
case "PER":
case "PERS":
animacy = Animacy.ANIMATE;
break;
case "LOCATION":
case "LOC":
animacy = Animacy.INANIMATE;
break;
case "MONEY":
animacy = Animacy.INANIMATE;
break;
case "NUMBER":
animacy = Animacy.INANIMATE;
break;
case "PERCENT":
animacy = Animacy.INANIMATE;
break;
case "DATE":
animacy = Animacy.INANIMATE;
break;
case "TIME":
animacy = Animacy.INANIMATE;
break;
case "MISC":
animacy = Animacy.UNKNOWN;
break;
case "VEH":
case "VEHICLE":
animacy = Animacy.UNKNOWN;
break;
case "FAC":
case "FACILITY":
animacy = Animacy.INANIMATE;
break;
case "GPE":
animacy = Animacy.INANIMATE;
break;
case "WEA":
case "WEAPON":
animacy = Animacy.INANIMATE;
break;
case "ORG":
case "ORGANIZATION":
animacy = Animacy.INANIMATE;
break;
default:
animacy = Animacy.UNKNOWN;
}
animacy = Animacy.UNKNOWN;
}
if(mentionType != MentionType.PRONOMINAL) {
// Better heuristics using DekangLin:
if (animacy == Animacy.UNKNOWN) {
if (dict.animateWords.contains(headString)) {
if(animacy == Animacy.UNKNOWN) {
if(dict.animateWords.contains(headString)) {
animacy = Animacy.ANIMATE;
} else if (dict.inanimateWords.contains(headString)) {
}
else if(dict.inanimateWords.contains(headString)) {
animacy = Animacy.INANIMATE;
}
}

}
}

Expand Down Expand Up @@ -1686,4 +1655,4 @@ public int hashCode() {
}


}
}
127 changes: 47 additions & 80 deletions src/edu/stanford/nlp/dcoref/Mention.java
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,6 @@ private Gender getGender(Dictionaries dict, List<String> mStr) {
if(mStr.size() > 0 && dict.genderNumber.containsKey(mStr.subList(len-1, len))) return dict.genderNumber.get(mStr.subList(len-1, len));
return null;
}

private void setDiscourse() {
utter = headWord.get(CoreAnnotations.UtteranceAnnotation.class);

Expand All @@ -321,33 +320,23 @@ private void setDiscourse() {
isIndirectObject = false;
isPrepositionObject = false;

if (dep != null) {
switch(dep) {
case "nsubj":
case "csubj":
isSubject = true;
break;
case "dobj":
case "nsubjpass":
case "nsubj:pass":
isDirectObject = true;
break;
case "iobj":
isIndirectObject = true;
break;
default:
if (dep.startsWith("nmod")
&& !dep.equals("nmod:npmod")
&& !dep.equals("nmod:tmod")
&& !dep.equals("nmod:poss")
&& !dep.equals("nmod:agent")) {
isPrepositionObject = true;
}
}
if(dep==null) {
return;
} else if(dep.equals("nsubj") || dep.equals("csubj")) {
isSubject = true;
} else if(dep.equals("dobj")){
isDirectObject = true;
} else if(dep.equals("iobj")){
isIndirectObject = true;
} else if(dep.startsWith("nmod")
&& ! dep.equals("nmod:npmod")
&& ! dep.equals("nmod:tmod")
&& ! dep.equals("nmod:poss")
&& ! dep.equals("nmod:agent")){
isPrepositionObject = true;
}
}


private void setPerson(Dictionaries dict) {
// only do for pronoun
if(!this.isPronominal()) person = Person.UNKNOWN;
Expand Down Expand Up @@ -625,65 +614,44 @@ private void setAnimacy(Dictionaries dict) {
} else {
animacy = Animacy.UNKNOWN;
}
} else if (nerString.equals("PERSON") || nerString.startsWith("PER")) {
animacy = Animacy.ANIMATE;
} else if (nerString.equals("LOCATION")|| nerString.startsWith("LOC")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("MONEY")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("NUMBER")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("PERCENT")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("DATE")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("TIME")) {
animacy = Animacy.INANIMATE;
} else if (nerString.equals("MISC")) {
animacy = Animacy.UNKNOWN;
} else if (nerString.startsWith("VEH")) {
animacy = Animacy.UNKNOWN;
} else if (nerString.startsWith("FAC")) {
animacy = Animacy.INANIMATE;
} else if (nerString.startsWith("GPE")) {
animacy = Animacy.INANIMATE;
} else if (nerString.startsWith("WEA")) {
animacy = Animacy.INANIMATE;
} else if (nerString.startsWith("ORG")) {
animacy = Animacy.INANIMATE;
} else {
switch(nerString) {
case "PERSON":
case "PER":
case "PERS":
animacy = Animacy.ANIMATE;
break;
case "LOCATION":
case "LOC":
animacy = Animacy.INANIMATE;
break;
case "MONEY":
animacy = Animacy.INANIMATE;
break;
case "NUMBER":
animacy = Animacy.INANIMATE;
break;
case "PERCENT":
animacy = Animacy.INANIMATE;
break;
case "DATE":
animacy = Animacy.INANIMATE;
break;
case "TIME":
animacy = Animacy.INANIMATE;
break;
case "MISC":
animacy = Animacy.UNKNOWN;
break;
case "VEH":
case "VEHICLE":
animacy = Animacy.UNKNOWN;
break;
case "FAC":
case "FACILITY":
animacy = Animacy.INANIMATE;
break;
case "GPE":
animacy = Animacy.INANIMATE;
break;
case "WEA":
case "WEAPON":
animacy = Animacy.INANIMATE;
break;
case "ORG":
case "ORGANIZATION":
animacy = Animacy.INANIMATE;
break;
default:
animacy = Animacy.UNKNOWN;
}
if (Constants.USE_ANIMACY_LIST) {
animacy = Animacy.UNKNOWN;
}
if(mentionType != MentionType.PRONOMINAL) {
if(Constants.USE_ANIMACY_LIST){
// Better heuristics using DekangLin:
if(animacy == Animacy.UNKNOWN) {
if (dict.animateWords.contains(headString)) {
if(animacy == Animacy.UNKNOWN) {
if(dict.animateWords.contains(headString)) {
animacy = Animacy.ANIMATE;
SieveCoreferenceSystem.logger.finest("Assigned Dekang Lin animacy:\tANIMATE:\t" + headString);
}
else if (dict.inanimateWords.contains(headString)) {
else if(dict.inanimateWords.contains(headString)) {
animacy = Animacy.INANIMATE;
SieveCoreferenceSystem.logger.finest("Assigned Dekang Lin animacy:\tINANIMATE:\t" + headString);
}
Expand All @@ -692,7 +660,6 @@ else if (dict.inanimateWords.contains(headString)) {
}
}


private static final String [] commonNESuffixes = {
"Corp", "Co", "Inc", "Ltd"
};
Expand Down
18 changes: 7 additions & 11 deletions src/edu/stanford/nlp/ie/ChineseQuantifiableEntityNormalizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -430,17 +430,13 @@ private static String normalizedMoneyString(String s, String nextWord) {
boolean notMatched = true;
// We check multiCharCurrencyWords first
for (String currencyWord : multiCharCurrencyWords.keySet()) {
if (notMatched && StringUtils.find(s, currencyWord)) {
switch(currencyWord) {
case "美分" :
multiplier = 0.01;
break;
case "先令" :
multiplier = 0.05;
break;
case "便士" :
multiplier = 1.0/240;
break;
if(notMatched && StringUtils.find(s, currencyWord)) {
if(currencyWord.equals("美分")) {
multiplier = 0.01;
} else if(currencyWord.equals("先令")) {
multiplier = 0.05;
} else if(currencyWord.equals("便士")) {
multiplier = 1.0/240;
}
s = s.replaceAll(currencyWord, "");
currencySign = multiCharCurrencyWords.get(currencyWord);
Expand Down
Loading

0 comments on commit b96959f

Please sign in to comment.