Skip to content

Commit

Permalink
add a test from corpus that wasn't passing due to improper tokenization
Browse files Browse the repository at this point in the history
  • Loading branch information
dakrone committed Mar 22, 2011
1 parent f8dae01 commit fa9ece9
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
12 changes: 6 additions & 6 deletions src/opennlp/nlp.clj
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@
(loop [ts tokens dt-ops detoken-ops]
(let [op (first dt-ops)
op2 (second dt-ops)]
;;(println :op op)
;;(println :op2 op)
;;(println :ts (first ts))
;;(println :sb (.toString sb))
;; (println :op op)
;; (println :op2 op)
;; (println :ts (first ts))
;; (println :sb (.toString sb))
(cond
(or (= op2 nil)
(= op2 Detokenizer$DetokenizationOperation/MERGE_TO_LEFT))
Expand All @@ -148,8 +148,8 @@
(= op DetokenizationDictionary$Operation/RIGHT_LEFT_MATCHING)
(if (contains? @token-set (first ts))
(do
;;(println :token-set @token-set)
;;(println :ts (first ts))
;; (println :token-set @token-set)
;; (println :ts (first ts))
(swap! token-set disj (first ts))
(.append sb (first ts)))
(do
Expand Down
8 changes: 7 additions & 1 deletion test/opennlp/test/nlp.clj
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,13 @@
(is (= (detokenize (tokenize "Mary likes cows (Mary is a cow)."))
"Mary likes cows (Mary is a cow)."))
(is (= (detokenize (tokenize "Mary exclaimed \"I am a cow!\""))
"Mary exclaimed \"I am a cow!\"")))
"Mary exclaimed \"I am a cow!\""))
(is (= (detokenize ["I" "know" "what" "\"" "it" "\"" "means" "well" "enough"
"," "when" "I" "find" "a" "thing" "," "said" "the" "Duck"
":" "its" "generally" "a" "frog" "or" "a" "worm" "."])
(str "I know what \"it\" means well enough, when"
" I find a thing, said the Duck: its"
" generally a frog or a worm."))))

(deftest precondition-test
(is (thrown? java.lang.AssertionError (get-sentences 1)))
Expand Down

0 comments on commit fa9ece9

Please sign in to comment.