From d3bd025834d800fc53565e61a4ad35d02cd7c3b9 Mon Sep 17 00:00:00 2001 From: Kevin Downey Date: Sun, 24 Mar 2013 13:56:45 -0700 Subject: [PATCH] document sample literals --- src/data_readers.clj | 1 + src/opennlp/sample.clj | 46 ++++++++++++++++++++++++++++++++++++ test/opennlp/test/sample.clj | 24 +++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 src/data_readers.clj create mode 100644 src/opennlp/sample.clj create mode 100644 test/opennlp/test/sample.clj diff --git a/src/data_readers.clj b/src/data_readers.clj new file mode 100644 index 0000000..d249d56 --- /dev/null +++ b/src/data_readers.clj @@ -0,0 +1 @@ +{opennlp/sample opennlp.sample/read-document-sample} diff --git a/src/opennlp/sample.clj b/src/opennlp/sample.clj new file mode 100644 index 0000000..46c8a23 --- /dev/null +++ b/src/opennlp/sample.clj @@ -0,0 +1,46 @@ +(ns opennlp.sample + (:require [clojure.java.io :as io]) + (:import (opennlp.tools.doccat DocumentSample) + (opennlp.tools.util ObjectStream))) + +(defn print-sample [sample ^java.io.Writer w] + (.write w "#opennlp/sample {") + (.write w ":category ") + (binding [*out* w] + (prn (.getCategory sample))) + (.write w " :text ") + (binding [*out* w] + (prn (vec (.getText sample)))) + (.write w "}")) + +(defmethod print-method DocumentSample + [sample w] + (print-sample sample w)) + +(defmethod print-dup DocumentSample + [sample w] + (print-sample sample w)) + +(defn read-document-sample [{:keys [category text]}] + (DocumentSample. category (into-array String text))) + +(defn clojure-document-sample-stream [in] + (let [i (java.io.PushbackReader. (io/reader in)) + buf (atom []) + pos (atom 0)] + (reify + ObjectStream + (read [_] + (if (= @pos (count @buf)) + (when-let [obj (read i false nil)] + (swap! buf conj obj) + (swap! pos inc) + obj) + (let [p @pos] + (swap! pos inc) + (nth @buf p)))) + (close [_] + (.close i) + (.close in)) + (reset [_] + (reset! pos 0))))) diff --git a/test/opennlp/test/sample.clj b/test/opennlp/test/sample.clj new file mode 100644 index 0000000..814a909 --- /dev/null +++ b/test/opennlp/test/sample.clj @@ -0,0 +1,24 @@ +(ns opennlp.test.sample + (:require [clojure.test :refer :all] + [opennlp.sample :refer [clojure-document-sample-stream]]) + (:import (opennlp.tools.doccat DocumentSample))) + +(deftest test-samples-round-trip + (let [d #opennlp/sample {:category "foo" :text ["bar"]}] + (is (= d (read-string (pr-str d)))))) + +(deftest test-clojure-document-sample-stream + (let [d #opennlp/sample {:category "foo" :text ["bar"]} + x (java.io.ByteArrayInputStream. + (.getBytes + (with-out-str + (prn d) + (prn d)))) + s (clojure-document-sample-stream x)] + (is (= (.read s) d)) + (is (= (.read s) d)) + (is (nil? (.read s))) + (.reset s) + (is (= (.read s) d)) + (is (= (.read s) d)) + (is (nil? (.read s)))))