Skip to content

Commit

Permalink
Merge pull request #25 from stevenbedrick/subsumedTermOption
Browse files Browse the repository at this point in the history
Added configuration flag to override default behavior regarding subsumed terms
  • Loading branch information
willjrogers authored May 4, 2023
2 parents 93a9e2c + b5297d5 commit 9b06071
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 17 deletions.
49 changes: 41 additions & 8 deletions src/main/java/gov/nih/nlm/nls/metamap/lite/EntityLookup4.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ public class EntityLookup4 implements EntityLookup {
NegationDetector negationDetector;
boolean addPartOfSpeechTagsFlag =
Boolean.parseBoolean(System.getProperty("metamaplite.enable.postagging","true"));

// In cases where there are subsumed (entirely overlapped) entities, do we remove them?
// For example, "blood sugar level" and "blood sugar"
// Most of the time, the answer is "yes"; in some high-recall scenarios, we don't want to.
boolean shouldRemoveSubsumedEntities = true;

Properties properties;

/** Part of speech tags used for term lookup, can be set using
Expand Down Expand Up @@ -201,6 +207,11 @@ public EntityLookup4(Properties properties)
}
this.uaMap = UserDefinedAcronym.udasToUA(this.udaMap);
}

if (properties.containsKey("metamaplite.removeSubsumedEntities")) {
this.shouldRemoveSubsumedEntities = Boolean.parseBoolean(properties.getProperty("metamaplite.removeSubsumedEntities"));
}

}

/**
Expand Down Expand Up @@ -634,7 +645,14 @@ public List<Entity> processText(String docid,
entitySet1.add(entity);
}
}
Set<Entity> entitySet = removeSubsumedEntities(entitySet1);

Set<Entity> entitySet;

if (this.shouldRemoveSubsumedEntities) {
entitySet = removeSubsumedEntities(entitySet1);
} else {
entitySet = entitySet1;
}

List<Entity> resultList = new ArrayList<Entity>(entitySet);
Collections.sort(resultList, entityComparator);
Expand Down Expand Up @@ -712,7 +730,14 @@ public List<Entity> processPassage(String docid, BioCPassage passage,
entitySet1.add(entity);
}
}
Set<Entity> entitySet = removeSubsumedEntities(entitySet1);


Set<Entity> entitySet;
if (this.shouldRemoveSubsumedEntities) {
entitySet = removeSubsumedEntities(entitySet1);
} else {
entitySet = entitySet1;
}

List<Entity> resultList = new ArrayList<Entity>(entitySet);
Collections.sort(resultList, entityComparator);
Expand Down Expand Up @@ -755,7 +780,13 @@ public List<Entity> processSentences(String docid, List<Sentence> sentenceList,
}
i++;
}
Set<Entity> entitySet = removeSubsumedEntities(entitySet0);
Set<Entity> entitySet;
if (this.shouldRemoveSubsumedEntities) {
entitySet = removeSubsumedEntities(entitySet0);
} else {
entitySet = entitySet0;
}

List<Entity> resultList = new ArrayList<Entity>(entitySet);
Collections.sort(resultList, entityComparator);
return resultList;
Expand All @@ -767,11 +798,13 @@ public Set<BioCAnnotation> generateBioCEntitySet(String docid,
String fieldid = "text";
try {
Set<BioCAnnotation> bioCEntityList = new HashSet<BioCAnnotation>();
Set<Entity> entitySet =
removeSubsumedEntities
(this.processSentenceTokenList(docid, fieldid, sentenceTokenList,
new HashSet<String>(),
new HashSet<String>()));
Set<Entity> entitySet = this.processSentenceTokenList(docid, fieldid, sentenceTokenList,
new HashSet<String>(),
new HashSet<String>());
if (this.shouldRemoveSubsumedEntities) {
entitySet = removeSubsumedEntities(entitySet);
}

for (Entity entity: entitySet) {
for (Ev ev: entity.getEvList()) {
BioCAnnotation entityAnnotation = new BioCAnnotation();
Expand Down
45 changes: 36 additions & 9 deletions src/main/java/gov/nih/nlm/nls/metamap/lite/EntityLookup5.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ public void defaultAllowedPhraseTypes() {
this.allowedPhraseTypeSet.add("NP"); // just noun phrases for now.
}

// In cases where there are subsumed (entirely overlapped) entities, do we remove them?
// For example, "blood sugar level" and "blood sugar"
// Most of the time, the answer is "yes"; in some high-recall scenarios, we don't want to.
boolean shouldRemoveSubsumedEntities = true;

public EntityLookup5(Properties properties)
throws IOException, FileNotFoundException
{
Expand Down Expand Up @@ -266,6 +271,11 @@ public EntityLookup5(Properties properties)
}
this.uaMap = UserDefinedAcronym.udasToUA(this.udaMap);
}

// remove subsumed?
if (properties.containsKey("metamaplite.removeSubsumedEntities")) {
this.shouldRemoveSubsumedEntities = Boolean.parseBoolean(properties.getProperty("metamaplite.removeSubsumedEntities"));
}
}

/**
Expand Down Expand Up @@ -920,8 +930,13 @@ public List<Entity> processText(String docid,
}
}

// remove any entities subsumed by another entity
Set<Entity> entitySet1 = removeSubsumedEntities(entitySet0);
Set<Entity> entitySet1;
if (this.shouldRemoveSubsumedEntities) {
// remove any entities subsumed by another entity
entitySet1 = removeSubsumedEntities(entitySet0);
} else {
entitySet1 = entitySet0;
}
// filter entities by semantic type and source sets.
Set<Entity> entitySet = new HashSet<Entity>();
for (Entity entity: entitySet1) {
Expand Down Expand Up @@ -999,7 +1014,12 @@ public List<Entity> processPassage(String docid, BioCPassage passage,
}

// remove any entities subsumed by another entity
Set<Entity> entitySet1 = removeSubsumedEntities(entitySet0);
Set<Entity> entitySet1;
if (this.shouldRemoveSubsumedEntities) {
entitySet1 = removeSubsumedEntities(entitySet0);
} else {
entitySet1 = entitySet0;
}
// filter entities by semantic type and source sets.
Set<Entity> entitySet = new HashSet<Entity>();
for (Entity entity: entitySet1) {
Expand Down Expand Up @@ -1047,7 +1067,12 @@ public List<Entity> processSentences(String docid, List<Sentence> sentenceList,
}
i++;
}
Set<Entity> entitySet = removeSubsumedEntities(entitySet0);
Set<Entity> entitySet;
if (this.shouldRemoveSubsumedEntities) {
entitySet = removeSubsumedEntities(entitySet0);
} else {
entitySet = entitySet0;
}
List<Entity> resultList = new ArrayList<Entity>(entitySet);
Collections.sort(resultList, entityComparator);
return resultList;
Expand All @@ -1059,11 +1084,13 @@ public Set<BioCAnnotation> generateBioCEntitySet(String docid,
String fieldid = "text";
try {
Set<BioCAnnotation> bioCEntityList = new HashSet<BioCAnnotation>();
Set<Entity> entitySet =
removeSubsumedEntities
(this.processSentenceTokenList(docid, fieldid, sentenceTokenList,
new HashSet<String>(),
new HashSet<String>()));
Set<Entity> entitySet = this.processSentenceTokenList(docid, fieldid, sentenceTokenList,
new HashSet<String>(),
new HashSet<String>());
if (this.shouldRemoveSubsumedEntities) {
entitySet = removeSubsumedEntities(entitySet);
}

for (Entity entity: entitySet) {
for (Ev ev: entity.getEvList()) {
BioCAnnotation entityAnnotation = new BioCAnnotation();
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/gov/nih/nlm/nls/ner/MetaMapLite.java
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,9 @@ public static Properties getDefaultConfiguration() {
defaultConfiguration.setProperty("metamaplite.negation.detector",
"gov.nih.nlm.nls.metamap.lite.NegEx");
defaultConfiguration.setProperty("metamaplite.disable.chunker","true");

defaultConfiguration.setProperty("metamaplite.removeSubsumedEntities", "true");

return defaultConfiguration;
}

Expand Down

0 comments on commit 9b06071

Please sign in to comment.