Skip to content

Commit

Permalink
Minor openie code cleanup.
Browse files Browse the repository at this point in the history
  • Loading branch information
manning authored and Stanford NLP committed Nov 5, 2017
1 parent 09d0edc commit 29098d5
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 76 deletions.
92 changes: 39 additions & 53 deletions src/edu/stanford/nlp/ie/util/RelationTriple.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
*/
@SuppressWarnings("UnusedDeclaration")
public class RelationTriple implements Comparable<RelationTriple>, Iterable<CoreLabel> {

/** The subject (first argument) of this triple */
public final List<CoreLabel> subject;

Expand Down Expand Up @@ -142,7 +143,7 @@ public String subjectLink() {
* This method will additionally strip out punctuation as well.
*/
public String subjectLemmaGloss() {
return StringUtils.join(canonicalSubject.stream().filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
return StringUtils.join(canonicalSubject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
}

/** The object of this relation triple, as a String */
Expand All @@ -165,7 +166,7 @@ public String objectLink() {
* This method will additionally strip out punctuation as well.
*/
public String objectLemmaGloss() {
return StringUtils.join(canonicalObject.stream().filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
return StringUtils.join(canonicalObject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
}

/**
Expand Down Expand Up @@ -196,7 +197,7 @@ public String relationLemmaGloss() {
String relationGloss = (
(prefixBe ? "be " : "")
+ StringUtils.join(relation.stream()
.filter(x -> x.tag() == null || (!x.tag().matches("[\\.\\?,:;'\"!]") && (x.lemma() == null || !x.lemma().matches("[\\.,;'\"\\?!]"))))
.filter(x -> x.tag() == null || (!x.tag().matches("[.?,:;'\"!]") && (x.lemma() == null || !x.lemma().matches("[.,;'\"?!]"))))
.map(x -> x.lemma() == null ? x.word() : x.lemma()),
" ")
.toLowerCase()
Expand Down Expand Up @@ -225,7 +226,7 @@ public String confidenceGloss() {
return new DecimalFormat("0.000").format(confidence);
}

protected Pair<Integer, Integer> getSpan(List<CoreLabel> tokens, Function<CoreLabel, Integer> toMin, Function<CoreLabel, Integer> toMax) {
private static Pair<Integer, Integer> getSpan(List<CoreLabel> tokens, Function<CoreLabel, Integer> toMin, Function<CoreLabel, Integer> toMax) {
int min = Integer.MAX_VALUE;
int max = Integer.MIN_VALUE;
for (CoreLabel token : tokens) {
Expand All @@ -243,19 +244,15 @@ public Pair<Integer, Integer> subjectTokenSpan() {
}

/**
* <p>
* Get a representative span for the relation expressed by this triple.
* </p>
*
* <p>
* This is a bit more complicated than the subject and object spans, as the relation
* span is occasionally discontinuous.
* If this is the case, this method returns the largest contiguous chunk.
* If the relation span is empty, return the object span.
* </p>
*/
public Pair<Integer, Integer> relationTokenSpan() {
if (relation.size() == 0) {
if (relation.isEmpty()) {
return objectTokenSpan();
} else if (relation.size() == 1) {
return Pair.makePair(relation.get(0).index() - 1, relation.get(0).index());
Expand Down Expand Up @@ -435,21 +432,24 @@ public int hashCode() {
// return result;
}

/** Print a human-readable description of this relation triple, as a tab-separated line */
/** Print a human-readable description of this relation triple, as a tab-separated line. */
@Override
public String toString() {
return "" + this.confidence + "\t" + subjectGloss() + "\t" + relationGloss() + "\t" + objectGloss();
return String.valueOf(this.confidence) + '\t' + subjectGloss() + '\t' + relationGloss() + '\t' + objectGloss();
}


/** Print in the format expected for https://www.cs.bgu.ac.il/~gabriels/emnlp2016.pdf, with equivalence classes. */
/** Print in the format expected by Gabriel Stanovsky and Ido Dagan, Creating a Large Benchmark for Open
* Information Extraction, EMNLP 2016. https://gabrielstanovsky.github.io/assets/papers/emnlp16a/paper.pdf ,
* with equivalence classes.
*/
public String toQaSrlString(CoreMap sentence) {
String equivalenceClass = subjectHead().index() + "." + relationHead().index() + "." + objectHead().index();
return equivalenceClass + "\t" +
subjectGloss().replace('\t', ' ') + "\t" +
relationGloss().replace('\t', ' ') + "\t" +
objectGloss().replace('\t', ' ') + "\t" +
confidence + "\t" +
String equivalenceClass = subjectHead().index() + "." + relationHead().index() + '.' + objectHead().index();
return equivalenceClass + '\t' +
subjectGloss().replace('\t', ' ') + '\t' +
relationGloss().replace('\t', ' ') + '\t' +
objectGloss().replace('\t', ' ') + '\t' +
confidence + '\t' +
StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ");
}

Expand All @@ -468,44 +468,38 @@ public String toReverbString(String docid, CoreMap sentence) {
relationIndex = relation.get(0).index() - 1;
relationIndexEnd = relation.get(relation.size() - 1).index();
}
if (!subject.isEmpty()) {
if ( ! subject.isEmpty()) {
if (sentIndex < 0) { sentIndex = subject.get(0).sentIndex(); }
subjIndex = subject.get(0).index() - 1;
subjIndexEnd = subject.get(subject.size() - 1).index();
}
if (!object.isEmpty()) {
if ( ! object.isEmpty()) {
if (sentIndex < 0) { sentIndex = subject.get(0).sentIndex(); }
objIndex = object.get(0).index() - 1;
objIndexEnd = object.get(object.size() - 1).index();
}
return (docid == null ? "no_doc_id" : docid) + "\t" +
sentIndex + "\t" +
subjectGloss().replace('\t', ' ') + "\t" +
relationGloss().replace('\t', ' ') + "\t" +
objectGloss().replace('\t', ' ') + "\t" +
subjIndex + "\t" +
subjIndexEnd+ "\t" +
relationIndex + "\t" +
relationIndexEnd + "\t" +
objIndex + "\t" +
objIndexEnd + "\t" +
confidenceGloss() + "\t" +
StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ") + "\t" +
StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(CoreLabel::tag), " ") + "\t" +
subjectLemmaGloss().replace('\t', ' ') + "\t" +
relationLemmaGloss().replace('\t', ' ') + "\t" +
return (docid == null ? "no_doc_id" : docid) + '\t' +
sentIndex + '\t' +
subjectGloss().replace('\t', ' ') + '\t' +
relationGloss().replace('\t', ' ') + '\t' +
objectGloss().replace('\t', ' ') + '\t' +
subjIndex + '\t' +
subjIndexEnd+ '\t' +
relationIndex + '\t' +
relationIndexEnd + '\t' +
objIndex + '\t' +
objIndexEnd + '\t' +
confidenceGloss() + '\t' +
StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ") + '\t' +
StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(CoreLabel::tag), " ") + '\t' +
subjectLemmaGloss().replace('\t', ' ') + '\t' +
relationLemmaGloss().replace('\t', ' ') + '\t' +
objectLemmaGloss().replace('\t', ' ');
}

@Override
public int compareTo(RelationTriple o) {
if (this.confidence < o.confidence) {
return -1;
} else if (this.confidence > o.confidence) {
return 1;
} else {
return 0;
}
return Double.compare(this.confidence, o.confidence);
}

@SuppressWarnings("unchecked")
Expand Down Expand Up @@ -641,21 +635,13 @@ public WithLink(List<CoreLabel> subject, List<CoreLabel> canonicalSubject, List<
/** {@inheritDoc} */
@Override
public String subjectLink() {
if (subjectLink.isPresent()) {
return subjectLink.get();
} else {
return super.subjectLink();
}
return subjectLink.orElseGet(super::subjectLink);
}

/** {@inheritDoc} */
@Override
public String objectLink() {
if (objectLink.isPresent()) {
return objectLink.get();
} else {
return super.objectLink();
}
return objectLink.orElseGet(super::objectLink);
}
}

Expand Down
36 changes: 13 additions & 23 deletions src/edu/stanford/nlp/naturalli/OpenIE.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,25 +43,19 @@
* "Leveraging Linguistic Structure For Open Domain Information Extraction." Gabor Angeli, Melvin Johnson Premkumar, Christopher Manning. ACL 2015.
* </pre>
*
* <p>
* The paper can be found at <a href="http://nlp.stanford.edu/pubs/2015angeli-openie.pdf">http://nlp.stanford.edu/pubs/2015angeli-openie.pdf</a>.
* </p>
* <p>
*
* Documentation on the system can be found on
* <a href="https://nlp.stanford.edu/software/openie.html">the project homepage</a>,
* or the <a href="http://stanfordnlp.github.io/CoreNLP/openie.html">CoreNLP annotator documentation page</a>.
* The simplest invocation of the system would be something like:
* </p>
*
* <pre>
* java -mx1g -cp stanford-openie.jar:stanford-openie-models.jar edu.stanford.nlp.naturalli.OpenIE
* </pre>
*
* <p>
* Note that this class serves both as an entry point for the OpenIE system, but also as a CoreNLP annotator
* which can be plugged into the CoreNLP pipeline (or any other annotation pipeline).
* </p>
* Note that this class serves both as an entry point for the OpenIE system, but also as a CoreNLP annotator
* which can be plugged into the CoreNLP pipeline (or any other annotation pipeline).
*
* @see OpenIE#annotate(Annotation)
* @see OpenIE#main(String[])
Expand Down Expand Up @@ -173,15 +167,14 @@ public OpenIE() {

/**
* Create a ne OpenIE system, based on the given properties.
*
* @param props The properties to parametrize the system with.
*/
public OpenIE(Properties props) {
// Fill the properties
ArgumentParser.fillOptions(this, props);
Properties withoutOpenIEPrefix = new Properties();
Enumeration<Object> keys = props.keys();
while (keys.hasMoreElements()) {
String key = keys.nextElement().toString();
for (String key : props.stringPropertyNames()) {
withoutOpenIEPrefix.setProperty(key.replace("openie.", ""), props.getProperty(key));
}
ArgumentParser.fillOptions(this, withoutOpenIEPrefix);
Expand Down Expand Up @@ -443,13 +436,10 @@ private static SemanticGraph canonicalizeCoref(SemanticGraph parse, Map<CoreLabe
}

/**
* <p>
* Annotate a single sentence.
* </p>
* <p>
*
* This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation}
* and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations.
* </p>
*/
@SuppressWarnings("unchecked")
public void annotateSentence(CoreMap sentence, Map<CoreLabel, List<CoreLabel>> canonicalMentionMap) {
Expand Down Expand Up @@ -506,10 +496,8 @@ public void annotateSentence(CoreMap sentence, Map<CoreLabel, List<CoreLabel>> c
/**
* {@inheritDoc}
*
* <p>
* This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation}
* and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations.
* </p>
* This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation}
* and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations.
*/
@Override
public void annotate(Annotation annotation) {
Expand Down Expand Up @@ -630,7 +618,7 @@ public static String tripleToString(RelationTriple extraction, String docid, Cor
case REVERB:
return extraction.toReverbString(docid, sentence);
case OLLIE:
return extraction.confidenceGloss() + ": (" + extraction.subjectGloss() + "; " + extraction.relationGloss() + "; " + extraction.objectGloss() + ")";
return extraction.confidenceGloss() + ": (" + extraction.subjectGloss() + "; " + extraction.relationGloss() + "; " + extraction.objectGloss() + ')';
case DEFAULT:
return extraction.toString();
case QA_SRL:
Expand All @@ -643,14 +631,15 @@ public static String tripleToString(RelationTriple extraction, String docid, Cor

/**
* Process a single file or line of standard in.
*
* @param pipeline The annotation pipeline to run the lines of the input through.
* @param docid The docid of the document we are extracting.
* @param document the document to annotate.
*/
@SuppressWarnings("SynchronizeOnNonFinalField")
private static void processDocument(AnnotationPipeline pipeline, String docid, String document) {
// Error checks
if (document.trim().equals("")) {
if (document.trim().isEmpty()) {
return;
}

Expand Down Expand Up @@ -775,7 +764,7 @@ public static void main(String[] args) throws IOException, InterruptedException
// This will prevent a nasty surprise 10 hours into a running job...
for (String file : filesToProcess) {
if (!new File(file).exists() || !new File(file).canRead()) {
log.error("Cannot read file (or file does not exist: '" + file + "'");
log.error("Cannot read file (or file does not exist: '" + file + '\'');
}
}
// Actually process the files.
Expand Down Expand Up @@ -806,4 +795,5 @@ public static void main(String[] args) throws IOException, InterruptedException
log.info("DONE processing files. " + exceptionCount.get() + " exceptions encountered.");
System.exit(exceptionCount.get());
}

}

0 comments on commit 29098d5

Please sign in to comment.