Skip to content

Commit

Permalink
deps: diff-merge
Browse files Browse the repository at this point in the history
dev: graph parser IoC hook

test: use test db for diff-merge tests

fix: ci lint

dev: refactoring post block-parsing process

feat: diff-merge 2 way merge integration

fix: key namespace of uuid in fix-duplicated-id

fix: duplicated uuid ci
  • Loading branch information
cnrpman authored and logseq-cldwalker committed Apr 17, 2023
1 parent b547ad8 commit 5aba871
Show file tree
Hide file tree
Showing 13 changed files with 596 additions and 41 deletions.
3 changes: 2 additions & 1 deletion deps/graph-parser/src/logseq/graph_parser.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ Options available:
* :new? - Boolean which indicates if this file already exists. Default is true.
* :delete-blocks-fn - Optional fn which is called with the new page, file and existing block uuids
which may be referenced elsewhere.
which may be referenced elsewhere. Used to delete the existing blocks before saving the new ones.
Implemented in file-common-handler/validate-and-get-blocks-to-delete for IoC
* :skip-db-transact? - Boolean which skips transacting in order to batch transactions. Default is false
* :extract-options - Options map to pass to extract/extract"
([conn file content] (parse-file conn file content {}))
Expand Down
52 changes: 29 additions & 23 deletions deps/graph-parser/src/logseq/graph_parser/block.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@
refs (distinct (concat (:refs block) ref-blocks))]
(assoc block :refs refs)))

(defn- block-keywordize
(defn block-keywordize
[block]
(update-keys
block
Expand All @@ -381,6 +381,7 @@
(keyword "block" k)))))

(defn- sanity-blocks-data
"Clean up blocks data and add `block` ns to all keys"
[blocks]
(map (fn [block]
(if (map? block)
Expand All @@ -396,7 +397,7 @@
[:block/name (gp-util/page-name-sanity-lc tag)])) tags))
block))

(defn- get-block-content
(defn get-block-content
[utf8-content block format meta block-pattern]
(let [content (if-let [end-pos (:end_pos meta)]
(utf8/substring utf8-content
Expand Down Expand Up @@ -608,25 +609,38 @@
[block]
(println "Logseq will assign a new id for this block: " block)
(-> block
(assoc :uuid (d/squuid))
(update :properties dissoc :id)
(update :properties-text-values dissoc :id)
(update :properties-order #(vec (remove #{:id} %)))
(update :content (fn [c]
(assoc :block/uuid (d/squuid))
(update :block/properties dissoc :id)
(update :block/properties-text-values dissoc :id)
(update :block/properties-order #(vec (remove #{:id} %)))
(update :block/content (fn [c]
(let [replace-str (re-pattern
(str
"\n*\\s*"
(if (= :markdown (:format block))
(str "id" gp-property/colons " " (:uuid block))
(str (gp-property/colons-org "id") " " (:uuid block)))))]
(if (= :markdown (:block/format block))
(str "id" gp-property/colons " " (:block/uuid block))
(str (gp-property/colons-org "id") " " (:block/uuid block)))))]
(string/replace-first c replace-str ""))))))

(defn block-exists-in-another-page?
(defn block-exists-in-another-page?
"For sanity check only.
For renaming file externally, the file is actually deleted and transacted before-hand."
[db block-uuid current-page-name]
(when (and db current-page-name)
(when-let [block-page-name (:block/name (:block/page (d/entity db [:block/uuid block-uuid])))]
(not= current-page-name block-page-name))))

(defn fix-block-id-if-duplicated!
"If the block exists in another page, we need to fix it
If the block exists in the current extraction process, we also need to fix it"
[db page-name *block-exists-in-extraction block]
(let [block (if (or (@*block-exists-in-extraction (:block/uuid block))
(block-exists-in-another-page? db (:block/uuid block) page-name))
(fix-duplicate-id block)
block)]
(swap! *block-exists-in-extraction conj (:block/uuid block))
block))

(defn extract-blocks
"Extract headings from mldoc ast.
Args:
Expand All @@ -635,12 +649,10 @@
`with-id?`: If `with-id?` equals to true, all the referenced pages will have new db ids.
`format`: content's format, it could be either :markdown or :org-mode.
`options`: Options supported are :user-config, :block-pattern :supported-formats,
:extract-macros, :extracted-block-ids, :date-formatter, :page-name and :db"
[blocks content with-id? format {:keys [user-config db page-name extracted-block-ids] :as options}]
:extract-macros, :date-formatter, :page-name and :db"
[blocks content with-id? format {:keys [user-config] :as options}]
{:pre [(seq blocks) (string? content) (boolean? with-id?) (contains? #{:markdown :org} format)]}
(let [encoded-content (utf8/encode content)
*block-ids (or extracted-block-ids (atom #{}))
;; TODO: nbb doesn't support `Atom`
[blocks body pre-block-properties]
(loop [headings []
blocks (reverse blocks)
Expand All @@ -666,14 +678,8 @@

(heading-block? block)
(let [block' (construct-block block properties timestamps body encoded-content format pos-meta with-id? options)
block'' (assoc block' :macros (extract-macros-from-ast (cons block body)))
block-uuid (:uuid block'')
fixed-block (if (or (@*block-ids block-uuid)
(block-exists-in-another-page? db block-uuid page-name))
(fix-duplicate-id block'')
block'')]
(swap! *block-ids conj (:uuid fixed-block))
(recur (conj headings fixed-block) (rest blocks) {} {} []))
block'' (assoc block' :macros (extract-macros-from-ast (cons block body)))]
(recur (conj headings block'') (rest blocks) {} {} []))

:else
(recur headings (rest blocks) timestamps properties (conj body block))))
Expand Down
32 changes: 31 additions & 1 deletion deps/graph-parser/src/logseq/graph_parser/extract.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -127,14 +127,44 @@
(seq invalid-properties)
(assoc :block/invalid-properties invalid-properties))))

(defn- attach-block-ids-if-match
"If block-ids are provided and match the number of blocks, attach them to blocks
If block-ids are provided but don't match the number of blocks, WARN and ignore
If block-ids are not provided (nil), just ignore"
[block-ids blocks]
(or (when block-ids
(if (= (count block-ids) (count blocks))
(mapv (fn [block-id block]
(if (some? block-id)
(assoc block :block/uuid (uuid block-id))
block))
block-ids blocks)
(log/error :gp-extract/attach-block-ids-not-match "attach-block-ids-if-match: block-ids provided, but doesn't match the number of blocks, ignoring")))
blocks))

;; TODO: performance improvement
(defn- extract-pages-and-blocks
[format ast properties file content {:keys [date-formatter db filename-format] :as options}]
"uri-encoded? - if is true, apply URL decode on the file path
options -
:extracted-block-ids - An atom that contains all block ids that have been extracted in the current page (not yet saved to db)
:resolve-uuid-fn - Optional fn which is called to resolve uuids of each block. Enables diff-merge
(2 ways diff) based uuid resolution upon external editing.
returns a list of the uuids, given the receiving ast, or nil if not able to resolve.
Implemented in file-common-handler/diff-merge-uuids for IoC
Called in gp-extract/extract as AST is being parsed and properties are extracted there"
[format ast properties file content {:keys [date-formatter db filename-format extracted-block-ids resolve-uuid-fn]
:or {extracted-block-ids (atom #{})
resolve-uuid-fn (constantly nil)}
:as options}]
(try
(let [page (get-page-name file ast false filename-format)
[page page-name _journal-day] (gp-block/convert-page-if-journal page date-formatter)
options' (assoc options :page-name page-name)
;; In case of diff-merge (2way) triggered, use the uuids to override the ones extracted from the AST
override-uuids (resolve-uuid-fn format ast content options')
blocks (->> (gp-block/extract-blocks ast content false format options')
(attach-block-ids-if-match override-uuids)
(mapv #(gp-block/fix-block-id-if-duplicated! db page-name extracted-block-ids %))
(gp-block/with-parent-and-left {:block/name page-name})
(vec))
ref-pages (atom #{})
Expand Down
8 changes: 4 additions & 4 deletions deps/graph-parser/test/logseq/graph_parser/block_test.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@

(deftest test-fix-duplicate-id
(are [x y]
(let [result (gp-block/fix-duplicate-id x)]
(and (:uuid result)
(not= (:uuid x) (:uuid result))
(let [result (gp-block/fix-duplicate-id (gp-block/block-keywordize x))]
(and (:block/uuid result)
(not= (:uuid x) (:block/uuid result))
(= (select-keys result
[:properties :content :properties-text-values :properties-order]) y)))
[:block/properties :block/content :block/properties-text-values :block/properties-order]) (gp-block/block-keywordize y))))
{:properties {:id "63f199bc-c737-459f-983d-84acfcda14fe"}, :tags [], :format :markdown, :meta {:start_pos 51, :end_pos 101}, :macros [], :unordered true, :content "bar\nid:: 63f199bc-c737-459f-983d-84acfcda14fe", :properties-text-values {:id "63f199bc-c737-459f-983d-84acfcda14fe"}, :level 1, :uuid #uuid "63f199bc-c737-459f-983d-84acfcda14fe", :properties-order [:id]}
{:properties {},
:content "bar",
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
"@excalidraw/excalidraw": "0.12.0",
"@hugotomazi/capacitor-navigation-bar": "^2.0.0",
"@logseq/capacitor-file-sync": "0.0.22",
"@logseq/diff-merge": "^0.0.1",
"@logseq/react-tweet-embed": "1.3.1-1",
"@sentry/react": "^6.18.2",
"@sentry/tracing": "^6.18.2",
Expand Down
29 changes: 29 additions & 0 deletions src/main/frontend/db/model.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@
db-utils/seq-flatten)))

(defn set-file-last-modified-at!
"Refresh file timestamps to DB"
[repo path last-modified-at]
(when (and repo path last-modified-at)
(when-let [conn (conn/get-db repo false)]
Expand Down Expand Up @@ -459,6 +460,34 @@ independent of format as format specific heading characters are stripped"
blocks-map (zipmap (map :db/id blocks) blocks)]
(keep blocks-map sorted-ids)))

;; Diverged of get-sorted-page-block-ids
(defn get-sorted-page-block-ids-and-levels
"page-name: the page name, original name
return: a list with elements in:
:id - a list of block ids, sorted by :block/left
:level - the level of the block, 1 for root, 2 for children of root, etc."
[page-name]
{:pre [(string? page-name)]}
(let [sanitized-page (gp-util/page-name-sanity-lc page-name)
page-id (:db/id (db-utils/entity [:block/name sanitized-page]))
root (db-utils/entity page-id)]
(loop [result []
children (sort-by-left (:block/_parent root) root)
;; BFS log of walking depth
levels (repeat (count children) 1)]
(if (seq children)
(let [child (first children)
cur-level (first levels)
next-children (sort-by-left (:block/_parent child) child)]
(recur (conj result {:id (:db/id child) :level cur-level})
(concat
next-children
(rest children))
(concat
(repeat (count next-children) (inc cur-level))
(rest levels))))
result))))

(defn has-children?
([block-id]
(has-children? (conn/get-db) block-id))
Expand Down
93 changes: 93 additions & 0 deletions src/main/frontend/fs/diff_merge.cljc
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
(ns frontend.fs.diff-merge
;; Disable clj linters since we don't support clj
#?(:clj {:clj-kondo/config {:linters {:unresolved-namespace {:level :off}
:unresolved-symbol {:level :off}}}})
(:require #?(:org.babashka/nbb ["@logseq/diff-merge$default" :refer [Merger Differ visualizeAsHTML attach_uuids]]
:default ["@logseq/diff-merge" :refer [Differ Merger visualizeAsHTML attach_uuids]])
[logseq.graph-parser.block :as gp-block]
[logseq.graph-parser.property :as gp-property]
[logseq.graph-parser.utf8 :as utf8]
[cljs-bean.core :as bean]
[frontend.db.utils :as db-utils]
[frontend.db.model :as db-model]))

;; (defn diff-merge
;; "N-ways diff & merge
;; Accept: blocks
;; https://github.com/logseq/diff-merge/blob/44546f2427f20bd417b898c8ba7b7d10a9254774/lib/mldoc.ts#L17-L22
;; https://github.com/logseq/diff-merge/blob/85ca7e9bf7740d3880ed97d535a4f782a963395d/lib/merge.ts#L40"
;; [base & branches]
;; ()
;; (let [merger (Merger.)]
;; (.mergeBlocks merger (bean/->js base) (bean/->js branches))))

(defn diff
"2-ways diff
Accept: blocks
https://github.com/logseq/diff-merge/blob/44546f2427f20bd417b898c8ba7b7d10a9254774/lib/mldoc.ts#L17-L22"
[base income]
(let [differ (Differ.)]
(.diff_logseqMode differ (bean/->js base) (bean/->js income))))

;; (defonce getHTML visualizeAsHTML)

(defonce attachUUID attach_uuids)

(defn db->diff-blocks
"db: datascript db
page-name: string"
[page-name]
{:pre (string? page-name)}
(let [walked (db-model/get-sorted-page-block-ids-and-levels page-name)
blocks (db-utils/pull-many [:block/uuid :block/content :block/level] (map :id walked))
levels (map :level walked)
blocks (map (fn [block level]
{:uuid (str (:block/uuid block)) ;; Force to be string
:body (:block/content block)
:level level})
blocks levels)]
blocks))

;; TODO Junyi: merge back to gp-block/extract-blocks
;; From back to first to ensure end_pos is correct
(defn ast->diff-blocks
"Prepare the blocks for diff-merge
blocks: ast of blocks
content: corresponding raw content"
[blocks content format {:keys [user-config block-pattern]}]
{:pre [(string? content) (contains? #{:markdown :org} format)]}
(let [encoded-content (utf8/encode content)]
(loop [headings []
blocks (reverse blocks)
properties {}
end-pos (.-length encoded-content)]
(if (seq blocks)
(let [[block pos-meta] (first blocks)
;; fix start_pos
pos-meta (assoc pos-meta :end_pos end-pos)]
(cond
(gp-block/heading-block? block)
(let [content (gp-block/get-block-content encoded-content block format pos-meta block-pattern)]
(recur (conj headings {:body content
:level (:level (second block))
:uuid (:id properties)})
(rest blocks) {} (:start_pos pos-meta))) ;; The current block's start pos is the next block's end pos

(gp-property/properties-ast? block)
(let [new-props (:properties (gp-block/extract-properties (second block) (assoc user-config :format format)))]
;; sending the current end pos to next, as it's not finished yet
;; supports multiple properties sub-block possible in future
(recur headings (rest blocks) (merge properties new-props) (:end_pos pos-meta)))

:else
(recur headings (rest blocks) properties (:end_pos pos-meta))))
(if (empty? properties)
(reverse headings)
(let [[block _] (first blocks)
pos-meta {:start_pos 0 :end_pos end-pos}
content (gp-block/get-block-content encoded-content block format pos-meta block-pattern)
uuid (:id properties)]
(cons {:body content
:level 1
:uuid uuid}
(reverse headings))))))))
4 changes: 3 additions & 1 deletion src/main/frontend/fs/watcher_handler.cljs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
;; all IPC paths must be normalized! (via gp-util/path-normalize)

(defn- set-missing-block-ids!
"For every referred block in the content, fix their block ids in files if missing."
[content]
(when (string? content)
(doseq [block-id (block-ref/get-all-block-ref-ids content)]
Expand All @@ -43,7 +44,8 @@
(p/catch #(js/console.error "❌ Bak Error: " path %))))

_ (file-handler/alter-file repo path content {:re-render-root? true
:from-disk? true})]
:from-disk? true
:fs/event :fs/local-file-change})]
(set-missing-block-ids! content)
(db/set-file-last-modified-at! repo path mtime)))

Expand Down
Loading

0 comments on commit 5aba871

Please sign in to comment.