From fa9ece994fbdcd81462649b712a62fa130e7b9f0 Mon Sep 17 00:00:00 2001
From: Lee Hinman <lee@writequit.org>
Date: Mon, 21 Mar 2011 22:29:37 -0600
Subject: [PATCH] add a test from corpus that wasn't passing due to improper
 tokenization

---
 src/opennlp/nlp.clj       | 12 ++++++------
 test/opennlp/test/nlp.clj |  8 +++++++-
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/opennlp/nlp.clj b/src/opennlp/nlp.clj
index 5ea51c6..6aa4081 100644
--- a/src/opennlp/nlp.clj
+++ b/src/opennlp/nlp.clj
@@ -132,10 +132,10 @@
     (loop [ts tokens dt-ops detoken-ops]
       (let [op (first dt-ops)
             op2 (second dt-ops)]
-        ;;(println :op op)
-        ;;(println :op2 op)
-        ;;(println :ts (first ts))
-        ;;(println :sb (.toString sb))
+        ;; (println :op op)
+        ;; (println :op2 op)
+        ;; (println :ts (first ts))
+        ;; (println :sb (.toString sb))
         (cond
          (or (= op2 nil)
              (= op2 Detokenizer$DetokenizationOperation/MERGE_TO_LEFT))
@@ -148,8 +148,8 @@
          (= op DetokenizationDictionary$Operation/RIGHT_LEFT_MATCHING)
          (if (contains? @token-set (first ts))
            (do
-             ;;(println :token-set @token-set)
-             ;;(println :ts (first ts))
+             ;; (println :token-set @token-set)
+             ;; (println :ts (first ts))
              (swap! token-set disj (first ts))
              (.append sb (first ts)))
            (do
diff --git a/test/opennlp/test/nlp.clj b/test/opennlp/test/nlp.clj
index 5a04741..696c452 100644
--- a/test/opennlp/test/nlp.clj
+++ b/test/opennlp/test/nlp.clj
@@ -53,7 +53,13 @@
   (is (= (detokenize (tokenize "Mary likes cows (Mary is a cow)."))
          "Mary likes cows (Mary is a cow)."))
   (is (= (detokenize (tokenize "Mary exclaimed \"I am a cow!\""))
-         "Mary exclaimed \"I am a cow!\"")))
+         "Mary exclaimed \"I am a cow!\""))
+  (is (= (detokenize ["I" "know" "what" "\"" "it" "\"" "means" "well" "enough"
+                      "," "when" "I" "find" "a" "thing" "," "said" "the" "Duck"
+                      ":" "its" "generally" "a" "frog" "or" "a" "worm" "."])
+         (str "I know what \"it\" means well enough, when"
+              " I find a thing, said the Duck: its"
+              " generally a frog or a worm."))))
 
 (deftest precondition-test
   (is (thrown? java.lang.AssertionError (get-sentences 1)))