forked from logseq/logseq
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Copied tests pass! - Still a number of TODOs left
- Loading branch information
1 parent
7d00b54
commit 3bc2479
Showing
5 changed files
with
254 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
(ns ^:nbb-compatible logseq.graph-parser | ||
"Main ns for parsing graph from source files" | ||
(:require [datascript.core :as d] | ||
[logseq.graph-parser.extract :as extract] | ||
[logseq.graph-parser.util :as gp-util] | ||
[logseq.graph-parser.date-time-util :as date-time-util] | ||
[logseq.graph-parser.config :as gp-config] | ||
[frontend.db-schema :as db-schema] | ||
[frontend.db.default :as default-db] | ||
[clojure.set :as set])) | ||
|
||
(defn- db-set-file-content! | ||
"Modified copy of frontend.db.model/db-set-file-content!" | ||
[db path content] | ||
(let [tx-data {:file/path path | ||
:file/content content}] | ||
(d/transact! db [tx-data] {:skip-refresh? true}))) | ||
|
||
;; TODO: Reuse from frontend.config | ||
(def supported-formats | ||
#{:dat :markdown :bmp :js :png :gif :txt :yml :erl :excalidraw :css :webp :asciidoc :ts :rb :ml :java :c :org :ex :edn :svg :php :rst :json :jpeg :ico :jpg :clj :adoc :html :md}) | ||
|
||
(defn parse-file | ||
"Parse file and save parsed data to the given db" | ||
[db file content {:keys [new? delete-blocks-fn new-graph? extract-options] | ||
:or {new? true | ||
new-graph? false | ||
delete-blocks-fn (constantly []) | ||
;; TODO: Reuse these options from state and config | ||
extract-options {:block-pattern "-" | ||
:date-formatter "MMM do, yyyy" | ||
:supported-formats supported-formats}}}] | ||
|
||
(db-set-file-content! db file content) | ||
(let [format (gp-util/get-format file) | ||
file-content [{:file/path file}] | ||
tx (if (contains? gp-config/mldoc-support-formats format) | ||
(let [[pages blocks] | ||
(extract/extract-blocks-pages | ||
file | ||
content | ||
(merge extract-options {:db @db})) | ||
delete-blocks (delete-blocks-fn (first pages) file) | ||
block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks) | ||
block-refs-ids (->> (mapcat :block/refs blocks) | ||
(filter (fn [ref] (and (vector? ref) | ||
(= :block/uuid (first ref))))) | ||
(map (fn [ref] {:block/uuid (second ref)})) | ||
(seq)) | ||
;; To prevent "unique constraint" on datascript | ||
block-ids (set/union (set block-ids) (set block-refs-ids)) | ||
pages (extract/with-ref-pages pages blocks) | ||
pages-index (map #(select-keys % [:block/name]) pages)] | ||
;; does order matter? | ||
(concat file-content pages-index delete-blocks pages block-ids blocks)) | ||
file-content) | ||
tx (concat tx [(cond-> {:file/path file} | ||
new? | ||
;; TODO: use file system timestamp? | ||
(assoc :file/created-at (date-time-util/time-ms)))])] | ||
(d/transact! db (gp-util/remove-nils tx) (when new-graph? {:new-graph? true})))) | ||
|
||
(defn init-db | ||
[] | ||
;; TODO: Reuse code from frontend | ||
(let [conn (d/create-conn db-schema/schema)] | ||
(d/transact! conn [{:schema/version db-schema/version}]) | ||
(d/transact! conn default-db/built-in-pages) | ||
conn)) | ||
|
||
(defn parse | ||
[db files] | ||
(doseq [{:file/keys [path content]} files] | ||
(parse-file db path content {}))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
(ns logseq.graph-parser-test | ||
"TODO: Should I reuse repo-test or split it?" | ||
(:require [cljs.test :refer [deftest is testing]] | ||
[logseq.graph-parser :as graph-parser] | ||
[frontend.test.docs-graph-helper :as docs-graph-helper] | ||
[datascript.core :as d])) | ||
|
||
(defn- get-top-block-properties | ||
[db] | ||
(->> (d/q '[:find (pull ?b [*]) | ||
:where | ||
[?b :block/properties] | ||
[(missing? $ ?b :block/name)]] | ||
db) | ||
(map first) | ||
(map (fn [m] (zipmap (keys (:block/properties m)) (repeat 1)))) | ||
(apply merge-with +) | ||
(filter #(>= (val %) 5)) | ||
(into {}))) | ||
|
||
(defn- get-all-page-properties | ||
[db] | ||
(->> (d/q '[:find (pull ?b [*]) | ||
:where | ||
[?b :block/properties] | ||
[?b :block/name]] | ||
db) | ||
(map first) | ||
(map (fn [m] (zipmap (keys (:block/properties m)) (repeat 1)))) | ||
(apply merge-with +) | ||
(into {}))) | ||
|
||
;; Integration test that test parsing a large graph like docs | ||
(deftest ^:integration parse-and-load-files-to-db | ||
(let [graph-dir "src/test/docs" | ||
_ (docs-graph-helper/clone-docs-repo-if-not-exists graph-dir) | ||
files (docs-graph-helper/build-graph-files graph-dir) | ||
conn (graph-parser/init-db) | ||
; _ (repo-handler/parse-files-and-load-to-db! test-helper/test-db files {:re-render? false}) | ||
_ (graph-parser/parse conn files) | ||
db @conn] | ||
|
||
;; Counts assertions help check for no major regressions. These counts should | ||
;; only increase over time as the docs graph rarely has deletions | ||
(testing "Counts" | ||
(is (= 206 (count files)) "Correct file count") | ||
(is (= 40888 (count (d/datoms db :eavt))) "Correct datoms count") | ||
|
||
(is (= 3597 | ||
(ffirst | ||
(d/q '[:find (count ?b) | ||
:where [?b :block/path-refs ?bp] [?bp :block/name]] db))) | ||
"Correct referenced blocks count") | ||
(is (= 21 | ||
(ffirst | ||
(d/q '[:find (count ?b) | ||
:where [?b :block/content ?content] | ||
[(clojure.string/includes? ?content "+BEGIN_QUERY")]] | ||
db))) | ||
"Advanced query count")) | ||
|
||
(testing "Query based stats" | ||
(is (= (set (map :file/path files)) | ||
(->> (d/q '[:find (pull ?b [* {:block/file [:file/path]}]) | ||
:where [?b :block/name] [?b :block/file]] | ||
db) | ||
(map (comp #(get-in % [:block/file :file/path]) first)) | ||
set)) | ||
"Journal and pages files on disk should equal ones in db") | ||
|
||
(is (= (count (filter #(re-find #"journals/" (:file/path %)) | ||
files)) | ||
(->> (d/q '[:find (count ?b) | ||
:where | ||
[?b :block/journal? true] | ||
[?b :block/name] | ||
[?b :block/file]] | ||
db) | ||
ffirst)) | ||
"Journal page count on disk equals count in db") | ||
|
||
(is (= {"CANCELED" 2 "DONE" 6 "LATER" 4 "NOW" 5} | ||
(->> (d/q '[:find (pull ?b [*]) :where [?b :block/marker] ] | ||
db) | ||
(map first) | ||
(group-by :block/marker) | ||
(map (fn [[k v]] [k (count v)])) | ||
(into {}))) | ||
"Task marker counts") | ||
|
||
(is (= {:markdown 3140 :org 460} | ||
(->> (d/q '[:find (pull ?b [*]) :where [?b :block/format]] db) | ||
(map first) | ||
(group-by :block/format) | ||
(map (fn [[k v]] [k (count v)])) | ||
(into {}))) | ||
"Block format counts") | ||
|
||
(is (= {:title 98 :id 98 | ||
:updated-at 47 :created-at 47 | ||
:collapsed 22 | ||
:card-last-score 6 :card-repeats 6 :card-next-schedule 6 | ||
:card-last-interval 6 :card-ease-factor 6 :card-last-reviewed 6 | ||
:alias 6} | ||
(get-top-block-properties db)) | ||
"Counts for top block properties") | ||
|
||
(is (= {:title 98 | ||
:alias 6 | ||
:tags 2 :permalink 2 | ||
:name 1 :type 1 :related 1 :sample 1 :click 1 :id 1 :example 1} | ||
(get-all-page-properties db)) | ||
"Counts for all page properties") | ||
|
||
(is (= {:block/scheduled 2 | ||
:block/priority 4 | ||
:block/deadline 1 | ||
:block/collapsed? 22 | ||
:block/heading-level 57 | ||
:block/repeated? 1} | ||
(->> [:block/scheduled :block/priority :block/deadline :block/collapsed? | ||
:block/heading-level :block/repeated?] | ||
(map (fn [attr] | ||
[attr | ||
(ffirst (d/q [:find (list 'count '?b) :where ['?b attr]] | ||
db))])) | ||
(into {}))) | ||
"Counts for blocks with common block attributes") | ||
|
||
(is (= #{"term" "setting" "book" "Templates" "Query" "Query/table" "page"} | ||
(->> (d/q '[:find (pull ?n [*]) :where [?b :block/namespace ?n]] db) | ||
(map (comp :block/original-name first)) | ||
set)) | ||
"Has correct namespaces")))) |