diff --git a/denops/@ddc-filters/ngram/ngram.ts b/denops/@ddc-filters/ngram/ngram.ts new file mode 100644 index 00000000..f54c4e40 --- /dev/null +++ b/denops/@ddc-filters/ngram/ngram.ts @@ -0,0 +1,110 @@ +export type Trie = { + next: Record; + start: number; + len: number; +}; + +/* + +ngramをベースとした曖昧マッチ&スコアリングプログラム +入力をhogeとして +- hoge +- oge +- ge +- e +のように分解してTrieを構築、任意長でこれと文字列をマッチすると入力の一部分からなるマッチ情報が手に入る +- Trieの構築を除くと実行効率多分O(n)くらい? + +*/ + +export type Needle = { + trie: Trie; + length: number; +}; + +export function makeTrie(input: string): Needle { + const root: Trie = { + next: {}, + start: -1, + len: 0, + }; + for (let start = 0; start < input.length; start++) { + const pinput = input.slice(start); + let current = root; + for (let i = 0; i < pinput.length; i++) { + const c = pinput[i]; + // 重複した場合に末尾の候補を優先したいので上書きする。 + // 例えばhogepiyoを絞り込んだ後、改めてpiyoを前に持ってきたかったら、 + // 再びpiyoを打てば実現できるようにしたい + current.next[c] = { + next: current.next[c]?.next ?? {}, + start, + len: i + 1, + }; + current = current.next[c]; + } + } + return { + trie: root, + length: input.length, + }; +} + +export type MatchResult = { + start: number; + len: number; + text: string; + needleStart: number; +}; + +export type Result = { + matches: MatchResult[]; + score: number; +}; + +export type MatchOptions = { + minMatchLength?: number; +}; + +export function match( + input: string, + needle: Needle, + options: MatchOptions = {}, +): Result { + const matches: MatchResult[] = []; + const root = needle.trie; + let current = root; + const score = Array(needle.length).fill(0); + + // 終了後にチェックするのがめんどいので末尾+1まで処理させる + for (let i = 0; i <= input.length; i++) { + const c = input[i]; + if (current.next[c] == null) { + if (current.len != 0) { + const len = current.len; + const start = i - current.len; + if (options.minMatchLength ?? 1 <= len) { + matches.push({ + start, + len, + text: input.slice(start, start + len), + needleStart: current.start, + }); + // 類似度と言っても、重複の多い文字列が優先されるのも嬉しくないので + // score matrixっぽいことをやってみる + for (let i = current.start; i < current.start + len; i++) { + score[i] = Math.max(score[i], len); + } + } + } + current = root; + } + if (current.next[c] != null) { + current = current.next[c]; + } + } + return { + matches, + score: score.reduce((a, b) => a + b), + }; +} diff --git a/denops/@ddc-filters/sorter_ngram.ts b/denops/@ddc-filters/sorter_ngram.ts new file mode 100644 index 00000000..38c07699 --- /dev/null +++ b/denops/@ddc-filters/sorter_ngram.ts @@ -0,0 +1,70 @@ +import { + BaseFilter, + FilterArguments, +} from "https://deno.land/x/ddc_vim@v4.1.0/base/filter.ts"; +import { Item } from "https://deno.land/x/ddc_vim@v4.1.0/types.ts"; +import { makeTrie, match } from "./ngram/ngram.ts"; + +function byteLength(input: string): number { + return new TextEncoder().encode(input).length; +} + +export type Params = { + highlightMatched: string; + minMatchLength: number; + minMatchHighlightLength: number; +}; + +export class Filter extends BaseFilter { + filter(args: FilterArguments): Item[] { + if (args.completeStr.length < args.filterParams.minMatchLength) { + return args.items; + } + const input = args.sourceOptions.ignoreCase + ? args.completeStr.toLowerCase() + : args.completeStr; + const needle = makeTrie(input); + const ranked = args.items.map((item) => { + const word = args.sourceOptions.ignoreCase + ? item.word.toLowerCase() + : item.word; + const result = match(word, needle, { + minMatchLength: args.filterParams.minMatchLength, + }); + return { + item, + result, + }; + }); + + const hl_group = args.filterParams.highlightMatched; + if (hl_group != "") { + const name = "ddc-filter-sorter_ngram-" + hl_group; + for (const { item, result } of ranked) { + item.highlights ??= []; + for (const m of result.matches) { + if (args.filterParams.minMatchHighlightLength <= m.len) { + item.highlights.push({ + name, + type: "abbr", + hl_group, + col: 1 + byteLength(item.word.slice(0, m.start)), + width: byteLength(m.text), + }); + } + } + } + } + + return ranked.sort((a, b) => b.result.score - a.result.score) + .map((value) => value.item); + } + + params(): Params { + return { + highlightMatched: "", + minMatchLength: 1, + minMatchHighlightLength: 1, + }; + } +} diff --git a/denops/@ddu-filters/ngram/ngram.ts b/denops/@ddu-filters/ngram/ngram.ts new file mode 100644 index 00000000..f54c4e40 --- /dev/null +++ b/denops/@ddu-filters/ngram/ngram.ts @@ -0,0 +1,110 @@ +export type Trie = { + next: Record; + start: number; + len: number; +}; + +/* + +ngramをベースとした曖昧マッチ&スコアリングプログラム +入力をhogeとして +- hoge +- oge +- ge +- e +のように分解してTrieを構築、任意長でこれと文字列をマッチすると入力の一部分からなるマッチ情報が手に入る +- Trieの構築を除くと実行効率多分O(n)くらい? + +*/ + +export type Needle = { + trie: Trie; + length: number; +}; + +export function makeTrie(input: string): Needle { + const root: Trie = { + next: {}, + start: -1, + len: 0, + }; + for (let start = 0; start < input.length; start++) { + const pinput = input.slice(start); + let current = root; + for (let i = 0; i < pinput.length; i++) { + const c = pinput[i]; + // 重複した場合に末尾の候補を優先したいので上書きする。 + // 例えばhogepiyoを絞り込んだ後、改めてpiyoを前に持ってきたかったら、 + // 再びpiyoを打てば実現できるようにしたい + current.next[c] = { + next: current.next[c]?.next ?? {}, + start, + len: i + 1, + }; + current = current.next[c]; + } + } + return { + trie: root, + length: input.length, + }; +} + +export type MatchResult = { + start: number; + len: number; + text: string; + needleStart: number; +}; + +export type Result = { + matches: MatchResult[]; + score: number; +}; + +export type MatchOptions = { + minMatchLength?: number; +}; + +export function match( + input: string, + needle: Needle, + options: MatchOptions = {}, +): Result { + const matches: MatchResult[] = []; + const root = needle.trie; + let current = root; + const score = Array(needle.length).fill(0); + + // 終了後にチェックするのがめんどいので末尾+1まで処理させる + for (let i = 0; i <= input.length; i++) { + const c = input[i]; + if (current.next[c] == null) { + if (current.len != 0) { + const len = current.len; + const start = i - current.len; + if (options.minMatchLength ?? 1 <= len) { + matches.push({ + start, + len, + text: input.slice(start, start + len), + needleStart: current.start, + }); + // 類似度と言っても、重複の多い文字列が優先されるのも嬉しくないので + // score matrixっぽいことをやってみる + for (let i = current.start; i < current.start + len; i++) { + score[i] = Math.max(score[i], len); + } + } + } + current = root; + } + if (current.next[c] != null) { + current = current.next[c]; + } + } + return { + matches, + score: score.reduce((a, b) => a + b), + }; +} diff --git a/denops/@ddu-filters/sorter_ngram.ts b/denops/@ddu-filters/sorter_ngram.ts new file mode 100644 index 00000000..91c1fe82 --- /dev/null +++ b/denops/@ddu-filters/sorter_ngram.ts @@ -0,0 +1,67 @@ +import { + BaseFilter, + FilterArguments, +} from "https://deno.land/x/ddu_vim@v3.10.0/base/filter.ts"; +import { DduItem } from "https://deno.land/x/ddu_vim@v3.10.0/types.ts"; +import { makeTrie, match } from "./ngram/ngram.ts"; + +function byteLength(input: string): number { + return new TextEncoder().encode(input).length; +} + +export type Params = { + highlightMatched: string; + minMatchLength: number; + minMatchHighlightLength: number; +}; + +export class Filter extends BaseFilter { + filter(args: FilterArguments): DduItem[] { + if (args.input.length < args.filterParams.minMatchLength) { + return args.items; + } + const ignoreCase = args.sourceOptions.ignoreCase && + !(args.sourceOptions.smartCase && /[A-Z]/.test(args.input)); + const input = ignoreCase ? args.input.toLowerCase() : args.input; + const needle = makeTrie(input); + const ranked = args.items.map((item) => { + const key = ignoreCase ? item.matcherKey.toLowerCase() : item.matcherKey; + const result = match(key, needle, { + minMatchLength: args.filterParams.minMatchLength, + }); + return { + item, + result, + }; + }); + + const hl_group = args.filterParams.highlightMatched; + if (hl_group != "") { + const name = "ddu-filter-ngram-" + hl_group; + for (const { item, result } of ranked) { + item.highlights ??= []; + for (const m of result.matches) { + if (args.filterParams.minMatchHighlightLength <= m.len) { + item.highlights.push({ + name, + hl_group, + col: 1 + byteLength(item.word.slice(0, m.start)), + width: byteLength(m.text), + }); + } + } + } + } + + return ranked.sort((a, b) => b.result.score - a.result.score) + .map((value) => value.item); + } + + params(): Params { + return { + highlightMatched: "", + minMatchLength: 1, + minMatchHighlightLength: 1, + }; + } +}