Skip to content

Commit

Permalink
Merge remote-tracking branch 'alexott/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
dakrone committed May 1, 2013
2 parents 2fb9e10 + c7c0999 commit 19970b8
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
15 changes: 8 additions & 7 deletions src/opennlp/nlp.clj
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@

;; OpenNLP property for pos-tagging. Meant to be rebound before
;; calling the tagging creators
(def #^{:dynamic true} *beam-size* 3)
(def ^:dynamic *beam-size* 3)

;; Caching to use for pos-tagging
(def #^{:dynamic true} *cache-size* 1024)
(def ^:dynamic *cache-size* 1024)

(defn- opennlp-span-strings
"Takes a collection of spans and the data they refer to. Returns a list of
Expand Down Expand Up @@ -123,12 +123,13 @@ start and end positions of the span."
(fn name-finder
[tokens & contexts]
{:pre [(seq tokens)
(every? #(= (class %) String) tokens)]}
(every? string? tokens)]}
(let [finder (NameFinderME. model feature-generator beam)
matches (.find finder (into-array String tokens))
a-tokens (into-array String tokens)
matches (.find finder a-tokens)
probs (seq (.probs finder))]
(with-meta
(distinct (Span/spansToStrings matches (into-array String tokens)))
(distinct (Span/spansToStrings matches a-tokens))
{:probabilities probs
:spans (map to-native-span matches)}))))

Expand Down Expand Up @@ -215,7 +216,7 @@ start and end positions of the span."
(fn detokenizer
[tokens]
{:pre [(coll? tokens)
(every? #(= (class %) String) tokens)]}
(every? string? tokens)]}
(let [detoken (DictionaryDetokenizer. model)
ops (.detokenize detoken (into-array String tokens))]
(detokenize* tokens ops))))
Expand All @@ -225,7 +226,7 @@ start and end positions of the span."
(fn detokenizer
[tokens]
{:pre [(coll? tokens)
(every? #(= (class %) String) tokens)]}
(every? string? tokens)]}
(-> (DictionaryDetokenizer. model)
(TokenSample. (into-array String tokens))
(.getText))))
Expand Down
2 changes: 1 addition & 1 deletion src/opennlp/tools/train.clj
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

(defn write-model
"Write a model to disk"
[#^BaseModel model out-stream]
[^BaseModel model out-stream]
(with-open [out (output-stream out-stream)]
(.serialize model out)))

Expand Down
4 changes: 2 additions & 2 deletions src/opennlp/treebank.clj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
(ns #^{:doc "Namespace containing tools pertaining to the treebank NLP tools.
(ns ^{:doc "Namespace containing tools pertaining to the treebank NLP tools.
This includes treebank chuncking, parsing and linking (coref)."
:author "Lee Hinman"}
opennlp.treebank
Expand All @@ -14,7 +14,7 @@

;; Default advance percentage as defined by
;; AbstractBottomUpParser.defaultAdvancePercentage
(def #^{:dynamic true} *advance-percentage* 0.95)
(def ^:dynamic *advance-percentage* 0.95)

(defn- split-chunks
"Partition a sequence of treebank chunks by their phrases."
Expand Down

0 comments on commit 19970b8

Please sign in to comment.