Merge branch 'master' of origin

behnam · Nov 7, 2017 · 4bc848c · 4bc848c
1 parent ebe8629
commit 4bc848c
Show file tree

Hide file tree

Showing 22 changed files with 7,755 additions and 10,878 deletions.
diff --git a/src/edu/stanford/nlp/ie/util/RelationTriple.java b/src/edu/stanford/nlp/ie/util/RelationTriple.java
@@ -22,6 +22,7 @@
  */
 @SuppressWarnings("UnusedDeclaration")
 public class RelationTriple implements Comparable<RelationTriple>, Iterable<CoreLabel> {
+
   /** The subject (first argument) of this triple */
   public final List<CoreLabel> subject;
 
@@ -142,7 +143,7 @@ public String subjectLink() {
    * This method will additionally strip out punctuation as well.
    */
    public String subjectLemmaGloss() {
-    return StringUtils.join(canonicalSubject.stream().filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
+    return StringUtils.join(canonicalSubject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
   }
 
   /** The object of this relation triple, as a String */
@@ -165,7 +166,7 @@ public String objectLink() {
    * This method will additionally strip out punctuation as well.
    */
   public String objectLemmaGloss() {
-    return StringUtils.join(canonicalObject.stream().filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
+    return StringUtils.join(canonicalObject.stream().filter(x -> !x.tag().matches("[.?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ");
   }
 
   /**
@@ -196,7 +197,7 @@ public String relationLemmaGloss() {
     String relationGloss = (
         (prefixBe ? "be " : "")
         + StringUtils.join(relation.stream()
-            .filter(x -> x.tag() == null || (!x.tag().matches("[\\.\\?,:;'\"!]") && (x.lemma() == null || !x.lemma().matches("[\\.,;'\"\\?!]"))))
+            .filter(x -> x.tag() == null || (!x.tag().matches("[.?,:;'\"!]") && (x.lemma() == null || !x.lemma().matches("[.,;'\"?!]"))))
             .map(x -> x.lemma() == null ? x.word() : x.lemma()),
           " ")
           .toLowerCase()
@@ -225,7 +226,7 @@ public String confidenceGloss() {
     return new DecimalFormat("0.000").format(confidence);
   }
 
-  protected Pair<Integer, Integer> getSpan(List<CoreLabel> tokens, Function<CoreLabel, Integer> toMin, Function<CoreLabel, Integer> toMax) {
+  private static Pair<Integer, Integer> getSpan(List<CoreLabel> tokens, Function<CoreLabel, Integer> toMin, Function<CoreLabel, Integer> toMax) {
     int min = Integer.MAX_VALUE;
     int max = Integer.MIN_VALUE;
     for (CoreLabel token : tokens) {
@@ -243,19 +244,15 @@ public Pair<Integer, Integer> subjectTokenSpan() {
   }
 
   /**
-   * <p>
    *   Get a representative span for the relation expressed by this triple.
-   * </p>
    *
-   * <p>
    *   This is a bit more complicated than the subject and object spans, as the relation
    *   span is occasionally discontinuous.
    *   If this is the case, this method returns the largest contiguous chunk.
    *   If the relation span is empty, return the object span.
-   * </p>
    */
   public Pair<Integer, Integer> relationTokenSpan() {
-    if (relation.size() == 0) {
+    if (relation.isEmpty()) {
       return objectTokenSpan();
     } else if (relation.size() == 1) {
       return Pair.makePair(relation.get(0).index() - 1, relation.get(0).index());
@@ -435,21 +432,24 @@ public int hashCode() {
 //    return result;
   }
 
-  /** Print a human-readable description of this relation triple, as a tab-separated line */
+  /** Print a human-readable description of this relation triple, as a tab-separated line. */
   @Override
   public String toString() {
-    return "" + this.confidence + "\t" + subjectGloss() + "\t" + relationGloss() + "\t" + objectGloss();
+    return String.valueOf(this.confidence) + '\t' + subjectGloss() + '\t' + relationGloss() + '\t' + objectGloss();
   }
 
 
-  /** Print in the format expected for https://www.cs.bgu.ac.il/~gabriels/emnlp2016.pdf, with equivalence classes. */
+  /** Print in the format expected by Gabriel Stanovsky and Ido Dagan, Creating a Large Benchmark for Open
+   *  Information Extraction, EMNLP 2016. https://gabrielstanovsky.github.io/assets/papers/emnlp16a/paper.pdf ,
+   *  with equivalence classes.
+   */
   public String toQaSrlString(CoreMap sentence) {
-    String equivalenceClass = subjectHead().index() + "." + relationHead().index() + "." + objectHead().index();
-    return equivalenceClass + "\t" +
-        subjectGloss().replace('\t', ' ') + "\t" +
-        relationGloss().replace('\t', ' ') + "\t" +
-        objectGloss().replace('\t', ' ') + "\t" +
-        confidence + "\t" +
+    String equivalenceClass = subjectHead().index() + "." + relationHead().index() + '.' + objectHead().index();
+    return equivalenceClass + '\t' +
+        subjectGloss().replace('\t', ' ') + '\t' +
+        relationGloss().replace('\t', ' ') + '\t' +
+        objectGloss().replace('\t', ' ') + '\t' +
+        confidence + '\t' +
         StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ");
   }
 
@@ -468,44 +468,38 @@ public String toReverbString(String docid, CoreMap sentence) {
       relationIndex = relation.get(0).index() - 1;
       relationIndexEnd = relation.get(relation.size() - 1).index();
     }
-    if (!subject.isEmpty()) {
+    if ( ! subject.isEmpty()) {
       if (sentIndex < 0) { sentIndex = subject.get(0).sentIndex(); }
       subjIndex = subject.get(0).index() - 1;
       subjIndexEnd = subject.get(subject.size() - 1).index();
     }
-    if (!object.isEmpty()) {
+    if ( ! object.isEmpty()) {
       if (sentIndex < 0) { sentIndex = subject.get(0).sentIndex(); }
       objIndex = object.get(0).index() - 1;
       objIndexEnd = object.get(object.size() - 1).index();
     }
-    return (docid == null ? "no_doc_id" : docid) + "\t" +
-        sentIndex + "\t" +
-        subjectGloss().replace('\t', ' ') + "\t" +
-        relationGloss().replace('\t', ' ') + "\t" +
-        objectGloss().replace('\t', ' ') + "\t" +
-        subjIndex + "\t" +
-        subjIndexEnd+ "\t" +
-        relationIndex + "\t" +
-        relationIndexEnd + "\t" +
-        objIndex + "\t" +
-        objIndexEnd + "\t" +
-        confidenceGloss() + "\t" +
-        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ") + "\t" +
-        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(CoreLabel::tag), " ") + "\t" +
-        subjectLemmaGloss().replace('\t', ' ') + "\t" +
-        relationLemmaGloss().replace('\t', ' ') + "\t" +
+    return (docid == null ? "no_doc_id" : docid) + '\t' +
+        sentIndex + '\t' +
+        subjectGloss().replace('\t', ' ') + '\t' +
+        relationGloss().replace('\t', ' ') + '\t' +
+        objectGloss().replace('\t', ' ') + '\t' +
+        subjIndex + '\t' +
+        subjIndexEnd+ '\t' +
+        relationIndex + '\t' +
+        relationIndexEnd + '\t' +
+        objIndex + '\t' +
+        objIndexEnd + '\t' +
+        confidenceGloss() + '\t' +
+        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ") + '\t' +
+        StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(CoreLabel::tag), " ") + '\t' +
+        subjectLemmaGloss().replace('\t', ' ') + '\t' +
+        relationLemmaGloss().replace('\t', ' ') + '\t' +
         objectLemmaGloss().replace('\t', ' ');
   }
 
   @Override
   public int compareTo(RelationTriple o) {
-    if (this.confidence < o.confidence) {
-      return -1;
-    } else if (this.confidence > o.confidence) {
-      return 1;
-    } else {
-      return 0;
-    }
+    return Double.compare(this.confidence, o.confidence);
   }
 
   @SuppressWarnings("unchecked")
@@ -641,21 +635,13 @@ public WithLink(List<CoreLabel> subject, List<CoreLabel> canonicalSubject, List<
     /** {@inheritDoc} */
     @Override
     public String subjectLink() {
-      if (subjectLink.isPresent()) {
-        return subjectLink.get();
-      } else {
-        return super.subjectLink();
-      }
+      return subjectLink.orElseGet(super::subjectLink);
     }
 
     /** {@inheritDoc} */
     @Override
     public String objectLink() {
-      if (objectLink.isPresent()) {
-        return objectLink.get();
-      } else {
-        return super.objectLink();
-      }
+      return objectLink.orElseGet(super::objectLink);
     }
   }
 

diff --git a/src/edu/stanford/nlp/ling/AbstractCoreLabel.java b/src/edu/stanford/nlp/ling/AbstractCoreLabel.java
@@ -16,8 +16,8 @@ public interface AbstractCoreLabel extends AbstractToken, Label, TypesafeMap {
    * @return "" if the key is not in the map or has the value {@code null}
    *     and the String value of the key otherwise
    */
-  public <KEY extends Key<String>> String getString(Class<KEY> key);
+  <KEY extends Key<String>> String getString(Class<KEY> key);
 
-  public <KEY extends Key<String>> String getString(Class<KEY> key, String def);
+  <KEY extends Key<String>> String getString(Class<KEY> key, String def);
 
 }
diff --git a/src/edu/stanford/nlp/ling/AnnotationLookup.java b/src/edu/stanford/nlp/ling/AnnotationLookup.java
@@ -79,7 +79,7 @@ private enum KeyLookup {
     STACKED_NER_KEY(CoreAnnotations.StackedNamedEntityTagAnnotation.class, "stackedNer"),
 
     // Thang Sep13: for Genia NER
-    HEAD_KEY(CoreAnnotations.HeadWordStringAnnotation.class, "head"),
+    HEADWORD_KEY(CoreAnnotations.HeadWordStringAnnotation.class, "headword"),
     GOVERNOR_KEY(CoreAnnotations.GovernorAnnotation.class, "governor"),
     GAZ_KEY(CoreAnnotations.GazAnnotation.class, "gaz"),
     ABBR_KEY(CoreAnnotations.AbbrAnnotation.class, "abbr"),
@@ -89,10 +89,12 @@ private enum KeyLookup {
 
     // Also have "pos" for PartOfTag (POS is also the TAG_KEY - "tag", but "pos" makes more sense)
     // Still keep "tag" for POS tag so we don't break anything
-    POS_TAG_KEY(CoreAnnotations.PartOfSpeechAnnotation.class, "pos");
+    POS_TAG_KEY(CoreAnnotations.PartOfSpeechAnnotation.class, "pos"),
+    DEPREL_KEY(CoreAnnotations.CoNLLDepTypeAnnotation.class, "deprel"),
+    HEADIDX_KEY(CoreAnnotations.CoNLLDepParentIndexAnnotation.class, "headidx");
 
 
-    private final Class<? extends CoreAnnotation<?>> coreKey; // todo [cdm 2016]: Make this private if can sort out typing
+    private final Class<? extends CoreAnnotation<?>> coreKey;
     private final String oldKey;
 
     <T> KeyLookup(Class<? extends CoreAnnotation<T>> coreKey, String oldKey) {
@@ -121,20 +123,6 @@ <T> KeyLookup(Class<? extends CoreAnnotation<T>> coreKey, String oldKey) {
   } // end enum KeyLookup
 
 
-  /*
-   * Returns a CoreAnnotation class key for the given old-style FeatureLabel
-   * key if one exists; null otherwise.
-   */
-  // no longer needed, simply return the Class object directly
-//  public static KeyLookup getCoreKey(String oldKey) {
-//    for (KeyLookup lookup : KeyLookup.values()) {
-//      if (lookup.oldKey.equals(oldKey)) {
-//        return lookup;
-//      }
-//    }
-//    return null;
-//  }
-
   /**
    * Returns a CoreAnnotation class key for the given string
    * key if one exists; null otherwise.

diff --git a/src/edu/stanford/nlp/ling/CoreAnnotation.java b/src/edu/stanford/nlp/ling/CoreAnnotation.java
@@ -8,18 +8,18 @@
  * parameterized by the type of the value associated with the annotation.
  * Subclasses of this class are the keys in the {@link CoreMap}, so they are
  * instantiated only by utility methods in {@link CoreAnnotations}.
- * 
+ *
  * @author dramage
  * @author rafferty
  */
-public interface CoreAnnotation<V>
-  extends TypesafeMap.Key<V> {
+public interface CoreAnnotation<V> extends TypesafeMap.Key<V> {
 
   /**
    * Returns the type associated with this annotation.  This method must
    * return the same class type as its value type parameter.  It feels like
    * one should be able to get away without this method, but because Java
    * erases the generic type signature, that info disappears at runtime.
    */
-  public Class<V> getType();
+  Class<V> getType();
+
 }
diff --git a/src/edu/stanford/nlp/ling/CoreAnnotations.java b/src/edu/stanford/nlp/ling/CoreAnnotations.java
@@ -464,7 +464,7 @@ public Class<String> getType() {
   }
 
   /**
-   * CoNLL dep parsing - the dependency type
+   * CoNLL dep parsing - the dependency type, such as SBJ or OBJ. This should be unified with CoNLLDepTypeAnnotation.
    */
   public static class CoNLLDepAnnotation implements CoreAnnotation<CoreMap> {
     @Override
@@ -495,7 +495,7 @@ public Class<Map<Integer,String>> getType() {
   }
 
   /**
-   * CoNLL dep parsing - the dependency type
+   * CoNLL dep parsing - the dependency type, such as SBJ or OBJ. This should be unified with CoNLLDepAnnotation.
    */
   public static class CoNLLDepTypeAnnotation implements CoreAnnotation<String> {
     @Override

diff --git a/src/edu/stanford/nlp/ling/tokensregex/SequenceMatcher.java b/src/edu/stanford/nlp/ling/tokensregex/SequenceMatcher.java
@@ -522,8 +522,8 @@ protected boolean findMatchStartBacktracking(int start, boolean matchAllTokens)
         if (Thread.interrupted()) {
           throw new RuntimeInterruptedException();
         }
-        boolean match = cStates.match(i);
-        if (cStates == null || cStates.size() == 0) {
+        cStates.match(i);
+        if (cStates.size() == 0) {
           break;
         }
         if (!matchAllTokens) {

diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotations.java b/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotations.java
@@ -35,6 +35,18 @@ public Class<Polarity> getType() {
     }
   }
 
+  /**
+   * An annotation, similar to {@link PolarityAnnotation}, which just measures whether
+   * the polarity of a token is upwards, downwards, or flat.
+   * This annotation always has values either "up", "down", or "flat".
+   */
+  public static final class PolarityDirectionAnnotation implements CoreAnnotation<String> {
+    @Override
+    public Class<String> getType() {
+      return String.class;
+    }
+  }
+
   /**
    * The set of sentences which are entailed by the original sentence, according to Natural Logic semantics.
    */

diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotator.java b/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotator.java
@@ -604,6 +604,20 @@ private static void annotatePolarity(CoreMap sentence) {
       // Set polarity
       token.set(PolarityAnnotation.class, polarity);
     }
+
+    // Set the PolarityDirectionAnnotation
+    for (CoreLabel token : tokens) {
+      Polarity polarity = token.get(PolarityAnnotation.class);
+      if (polarity != null) {
+        if (polarity.isUpwards()) {
+          token.set(PolarityDirectionAnnotation.class, "up");
+        } else if (polarity.isDownwards()) {
+          token.set(PolarityDirectionAnnotation.class, "down");
+        } else {
+          token.set(PolarityDirectionAnnotation.class, "flat");
+        }
+      }
+    }
   }
 
   /**