Minor openie code cleanup.

behnam · Nov 5, 2017 · 29098d5 · 29098d5
1 parent 09d0edc
commit 29098d5
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 76 deletions.
diff --git a/src/edu/stanford/nlp/ie/util/RelationTriple.java b/src/edu/stanford/nlp/ie/util/RelationTriple.java
@@ -22,6 +22,7 @@
  */
 @SuppressWarnings("UnusedDeclaration")
 public class RelationTriple implements Comparable<RelationTriple>, Iterable<CoreLabel> {
+
   /** The subject (first argument) of this triple */
   public final List<CoreLabel> subject;
 
@@ -142,7 +143,7 @@ public String subjectLink() {
    * This method will additionally strip out punctuation as well.
    */
    public String subjectLemmaGloss() {
-    return StringUtils.join(canonicalSubject.stream().filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
+    return StringUtils.join(canonicalSubject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
   }
 
   /** The object of this relation triple, as a String */
@@ -165,7 +166,7 @@ public String objectLink() {
    * This method will additionally strip out punctuation as well.
    */
   public String objectLemmaGloss() {
-    return StringUtils.join(canonicalObject.stream().filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
+    return StringUtils.join(canonicalObject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
   }
 
   /**
@@ -196,7 +197,7 @@ public String relationLemmaGloss() {
     String relationGloss = (
         (prefixBe ? "be " : "")
         + StringUtils.join(relation.stream()
-            .filter(x -> x.tag() == null || (!x.tag().matches("[\\.\\?,:;'\"!]") && (x.lemma() == null || !x.lemma().matches("[\\.,;'\"\\?!]"))))
+            .filter(x -> x.tag() == null || (!x.tag().matches("[.?,:;'\"!]") && (x.lemma() == null || !x.lemma().matches("[.,;'\"?!]"))))
             .map(x -> x.lemma() == null ? x.word() : x.lemma()),
           " ")
           .toLowerCase()
@@ -225,7 +226,7 @@ public String confidenceGloss() {
     return new DecimalFormat("0.000").format(confidence);
   }
 
-  protected Pair<Integer, Integer> getSpan(List<CoreLabel> tokens, Function<CoreLabel, Integer> toMin, Function<CoreLabel, Integer> toMax) {
+  private static Pair<Integer, Integer> getSpan(List<CoreLabel> tokens, Function<CoreLabel, Integer> toMin, Function<CoreLabel, Integer> toMax) {
     int min = Integer.MAX_VALUE;
     int max = Integer.MIN_VALUE;
     for (CoreLabel token : tokens) {
@@ -243,19 +244,15 @@ public Pair<Integer, Integer> subjectTokenSpan() {
   }
 
   /**
-   * <p>
    *   Get a representative span for the relation expressed by this triple.
-   * </p>
    *
-   * <p>
    *   This is a bit more complicated than the subject and object spans, as the relation
    *   span is occasionally discontinuous.
    *   If this is the case, this method returns the largest contiguous chunk.
    *   If the relation span is empty, return the object span.
-   * </p>
    */
   public Pair<Integer, Integer> relationTokenSpan() {
-    if (relation.size() == 0) {
+    if (relation.isEmpty()) {
       return objectTokenSpan();
     } else if (relation.size() == 1) {
       return Pair.makePair(relation.get(0).index() - 1, relation.get(0).index());
@@ -435,21 +432,24 @@ public int hashCode() {
 //    return result;
   }
 
-  /** Print a human-readable description of this relation triple, as a tab-separated line */
+  /** Print a human-readable description of this relation triple, as a tab-separated line. */
   @Override
   public String toString() {
-    return "" + this.confidence + "\t" + subjectGloss() + "\t" + relationGloss() + "\t" + objectGloss();
+    return String.valueOf(this.confidence) + '\t' + subjectGloss() + '\t' + relationGloss() + '\t' + objectGloss();
   }
 
 
-  /** Print in the format expected for https://www.cs.bgu.ac.il/~gabriels/emnlp2016.pdf, with equivalence classes. */
+  /** Print in the format expected by Gabriel Stanovsky and Ido Dagan, Creating a Large Benchmark for Open
+   *  Information Extraction, EMNLP 2016. https://gabrielstanovsky.github.io/assets/papers/emnlp16a/paper.pdf ,
+   *  with equivalence classes.
+   */
   public String toQaSrlString(CoreMap sentence) {
-    String equivalenceClass = subjectHead().index() + "." + relationHead().index() + "." + objectHead().index();
-    return equivalenceClass + "\t" +
-        subjectGloss().replace('\t', ' ') + "\t" +
-        relationGloss().replace('\t', ' ') + "\t" +
-        objectGloss().replace('\t', ' ') + "\t" +
-        confidence + "\t" +
+    String equivalenceClass = subjectHead().index() + "." + relationHead().index() + '.' + objectHead().index();
+    return equivalenceClass + '\t' +
+        subjectGloss().replace('\t', ' ') + '\t' +
+        relationGloss().replace('\t', ' ') + '\t' +
+        objectGloss().replace('\t', ' ') + '\t' +
+        confidence + '\t' +
         StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ");
   }
 
@@ -468,44 +468,38 @@ public String toReverbString(String docid, CoreMap sentence) {
       relationIndex = relation.get(0).index() - 1;
       relationIndexEnd = relation.get(relation.size() - 1).index();
     }
-    if (!subject.isEmpty()) {
+    if ( ! subject.isEmpty()) {
       if (sentIndex < 0) { sentIndex = subject.get(0).sentIndex(); }
       subjIndex = subject.get(0).index() - 1;
       subjIndexEnd = subject.get(subject.size() - 1).index();
     }
-    if (!object.isEmpty()) {
+    if ( ! object.isEmpty()) {
       if (sentIndex < 0) { sentIndex = subject.get(0).sentIndex(); }
       objIndex = object.get(0).index() - 1;
       objIndexEnd = object.get(object.size() - 1).index();
     }
-    return (docid == null ? "no_doc_id" : docid) + "\t" +
-        sentIndex + "\t" +
-        subjectGloss().replace('\t', ' ') + "\t" +
-        relationGloss().replace('\t', ' ') + "\t" +
-        objectGloss().replace('\t', ' ') + "\t" +
-        subjIndex + "\t" +
-        subjIndexEnd+ "\t" +
-        relationIndex + "\t" +
-        relationIndexEnd + "\t" +
-        objIndex + "\t" +
-        objIndexEnd + "\t" +
-        confidenceGloss() + "\t" +
-        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ") + "\t" +
-        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(CoreLabel::tag), " ") + "\t" +
-        subjectLemmaGloss().replace('\t', ' ') + "\t" +
-        relationLemmaGloss().replace('\t', ' ') + "\t" +
+    return (docid == null ? "no_doc_id" : docid) + '\t' +
+        sentIndex + '\t' +
+        subjectGloss().replace('\t', ' ') + '\t' +
+        relationGloss().replace('\t', ' ') + '\t' +
+        objectGloss().replace('\t', ' ') + '\t' +
+        subjIndex + '\t' +
+        subjIndexEnd+ '\t' +
+        relationIndex + '\t' +
+        relationIndexEnd + '\t' +
+        objIndex + '\t' +
+        objIndexEnd + '\t' +
+        confidenceGloss() + '\t' +
+        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ") + '\t' +
+        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(CoreLabel::tag), " ") + '\t' +
+        subjectLemmaGloss().replace('\t', ' ') + '\t' +
+        relationLemmaGloss().replace('\t', ' ') + '\t' +
         objectLemmaGloss().replace('\t', ' ');
   }
 
   @Override
   public int compareTo(RelationTriple o) {
-    if (this.confidence < o.confidence) {
-      return -1;
-    } else if (this.confidence > o.confidence) {
-      return 1;
-    } else {
-      return 0;
-    }
+    return Double.compare(this.confidence, o.confidence);
   }
 
   @SuppressWarnings("unchecked")
@@ -641,21 +635,13 @@ public WithLink(List<CoreLabel> subject, List<CoreLabel> canonicalSubject, List<
     /** {@inheritDoc} */
     @Override
     public String subjectLink() {
-      if (subjectLink.isPresent()) {
-        return subjectLink.get();
-      } else {
-        return super.subjectLink();
-      }
+      return subjectLink.orElseGet(super::subjectLink);
     }
 
     /** {@inheritDoc} */
     @Override
     public String objectLink() {
-      if (objectLink.isPresent()) {
-        return objectLink.get();
-      } else {
-        return super.objectLink();
-      }
+      return objectLink.orElseGet(super::objectLink);
     }
   }
 

diff --git a/src/edu/stanford/nlp/naturalli/OpenIE.java b/src/edu/stanford/nlp/naturalli/OpenIE.java
@@ -43,25 +43,19 @@
  *   "Leveraging Linguistic Structure For Open Domain Information Extraction." Gabor Angeli, Melvin Johnson Premkumar, Christopher Manning. ACL 2015.
  * </pre>
  *
- * <p>
  * The paper can be found at <a href="http://nlp.stanford.edu/pubs/2015angeli-openie.pdf">http://nlp.stanford.edu/pubs/2015angeli-openie.pdf</a>.
- * </p>
-
- * <p>
+ *
  * Documentation on the system can be found on
  * <a href="https://nlp.stanford.edu/software/openie.html">the project homepage</a>,
  * or the <a href="http://stanfordnlp.github.io/CoreNLP/openie.html">CoreNLP annotator documentation page</a>.
  * The simplest invocation of the system would be something like:
- * </p>
  *
  * <pre>
  * java -mx1g -cp stanford-openie.jar:stanford-openie-models.jar edu.stanford.nlp.naturalli.OpenIE
  * </pre>
  *
- * <p>
- *   Note that this class serves both as an entry point for the OpenIE system, but also as a CoreNLP annotator
- *   which can be plugged into the CoreNLP pipeline (or any other annotation pipeline).
- * </p>
+ * Note that this class serves both as an entry point for the OpenIE system, but also as a CoreNLP annotator
+ * which can be plugged into the CoreNLP pipeline (or any other annotation pipeline).
  *
  * @see OpenIE#annotate(Annotation)
  * @see OpenIE#main(String[])
@@ -173,15 +167,14 @@ public OpenIE() {
 
   /**
    * Create a ne OpenIE system, based on the given properties.
+   *
    * @param props The properties to parametrize the system with.
    */
   public OpenIE(Properties props) {
     // Fill the properties
     ArgumentParser.fillOptions(this, props);
     Properties withoutOpenIEPrefix = new Properties();
-    Enumeration<Object> keys = props.keys();
-    while (keys.hasMoreElements()) {
-      String key = keys.nextElement().toString();
+    for (String key : props.stringPropertyNames()) {
       withoutOpenIEPrefix.setProperty(key.replace("openie.", ""), props.getProperty(key));
     }
     ArgumentParser.fillOptions(this, withoutOpenIEPrefix);
@@ -443,13 +436,10 @@ private static SemanticGraph canonicalizeCoref(SemanticGraph parse, Map<CoreLabe
   }
 
   /**
-   * <p>
    *   Annotate a single sentence.
-   * </p>
-   * <p>
+   *
    *   This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation}
    *   and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations.
-   * </p>
    */
   @SuppressWarnings("unchecked")
   public void annotateSentence(CoreMap sentence, Map<CoreLabel, List<CoreLabel>> canonicalMentionMap) {
@@ -506,10 +496,8 @@ public void annotateSentence(CoreMap sentence, Map<CoreLabel, List<CoreLabel>> c
   /**
    * {@inheritDoc}
    *
-   * <p>
-   *   This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation}
-   *   and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations.
-   * </p>
+   *  This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation}
+   *  and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations.
    */
   @Override
   public void annotate(Annotation annotation) {
@@ -630,7 +618,7 @@ public static String tripleToString(RelationTriple extraction, String docid, Cor
       case REVERB:
         return extraction.toReverbString(docid, sentence);
       case OLLIE:
-        return extraction.confidenceGloss() + ": (" + extraction.subjectGloss() + "; " + extraction.relationGloss() + "; " + extraction.objectGloss() + ")";
+        return extraction.confidenceGloss() + ": (" + extraction.subjectGloss() + "; " + extraction.relationGloss() + "; " + extraction.objectGloss() + ')';
       case DEFAULT:
         return extraction.toString();
       case QA_SRL:
@@ -643,14 +631,15 @@ public static String tripleToString(RelationTriple extraction, String docid, Cor
 
   /**
    * Process a single file or line of standard in.
+   *
    * @param pipeline The annotation pipeline to run the lines of the input through.
    * @param docid The docid of the document we are extracting.
    * @param document the document to annotate.
    */
   @SuppressWarnings("SynchronizeOnNonFinalField")
   private static void processDocument(AnnotationPipeline pipeline, String docid, String document) {
     // Error checks
-    if (document.trim().equals("")) {
+    if (document.trim().isEmpty()) {
       return;
     }
 
@@ -775,7 +764,7 @@ public static void main(String[] args) throws IOException, InterruptedException
       // This will prevent a nasty surprise 10 hours into a running job...
       for (String file : filesToProcess) {
         if (!new File(file).exists() || !new File(file).canRead()) {
-          log.error("Cannot read file (or file does not exist: '" + file + "'");
+          log.error("Cannot read file (or file does not exist: '" + file + '\'');
         }
       }
       // Actually process the files.
@@ -806,4 +795,5 @@ public static void main(String[] args) throws IOException, InterruptedException
     log.info("DONE processing files. " + exceptionCount.get() + " exceptions encountered.");
     System.exit(exceptionCount.get());
   }
+
 }