Skip to content

Commit

Permalink
とりあえず持ってきた
Browse files Browse the repository at this point in the history
  • Loading branch information
kuuote committed Feb 23, 2024
1 parent 5e02e79 commit c504f0e
Show file tree
Hide file tree
Showing 4 changed files with 357 additions and 0 deletions.
110 changes: 110 additions & 0 deletions denops/@ddc-filters/ngram/ngram.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
export type Trie = {
next: Record<string, Trie>;
start: number;
len: number;
};

/*
ngramをベースとした曖昧マッチ&スコアリングプログラム
入力をhogeとして
- hoge
- oge
- ge
- e
のように分解してTrieを構築、任意長でこれと文字列をマッチすると入力の一部分からなるマッチ情報が手に入る
- Trieの構築を除くと実行効率多分O(n)くらい?
*/

export type Needle = {
trie: Trie;
length: number;
};

export function makeTrie(input: string): Needle {
const root: Trie = {
next: {},
start: -1,
len: 0,
};
for (let start = 0; start < input.length; start++) {
const pinput = input.slice(start);
let current = root;
for (let i = 0; i < pinput.length; i++) {
const c = pinput[i];
// 重複した場合に末尾の候補を優先したいので上書きする。
// 例えばhogepiyoを絞り込んだ後、改めてpiyoを前に持ってきたかったら、
// 再びpiyoを打てば実現できるようにしたい
current.next[c] = {
next: current.next[c]?.next ?? {},
start,
len: i + 1,
};
current = current.next[c];
}
}
return {
trie: root,
length: input.length,
};
}

export type MatchResult = {
start: number;
len: number;
text: string;
needleStart: number;
};

export type Result = {
matches: MatchResult[];
score: number;
};

export type MatchOptions = {
minMatchLength?: number;
};

export function match(
input: string,
needle: Needle,
options: MatchOptions = {},
): Result {
const matches: MatchResult[] = [];
const root = needle.trie;
let current = root;
const score = Array(needle.length).fill(0);

// 終了後にチェックするのがめんどいので末尾+1まで処理させる
for (let i = 0; i <= input.length; i++) {
const c = input[i];
if (current.next[c] == null) {
if (current.len != 0) {
const len = current.len;
const start = i - current.len;
if (options.minMatchLength ?? 1 <= len) {
matches.push({
start,
len,
text: input.slice(start, start + len),
needleStart: current.start,
});
// 類似度と言っても、重複の多い文字列が優先されるのも嬉しくないので
// score matrixっぽいことをやってみる
for (let i = current.start; i < current.start + len; i++) {
score[i] = Math.max(score[i], len);
}
}
}
current = root;
}
if (current.next[c] != null) {
current = current.next[c];
}
}
return {
matches,
score: score.reduce((a, b) => a + b),
};
}
70 changes: 70 additions & 0 deletions denops/@ddc-filters/sorter_ngram.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import {
BaseFilter,
FilterArguments,
} from "https://deno.land/x/[email protected]/base/filter.ts";
import { Item } from "https://deno.land/x/[email protected]/types.ts";
import { makeTrie, match } from "./ngram/ngram.ts";

function byteLength(input: string): number {
return new TextEncoder().encode(input).length;
}

export type Params = {
highlightMatched: string;
minMatchLength: number;
minMatchHighlightLength: number;
};

export class Filter extends BaseFilter<Params> {
filter(args: FilterArguments<Params>): Item[] {
if (args.completeStr.length < args.filterParams.minMatchLength) {
return args.items;
}
const input = args.sourceOptions.ignoreCase
? args.completeStr.toLowerCase()
: args.completeStr;
const needle = makeTrie(input);
const ranked = args.items.map((item) => {
const word = args.sourceOptions.ignoreCase
? item.word.toLowerCase()
: item.word;
const result = match(word, needle, {
minMatchLength: args.filterParams.minMatchLength,
});
return {
item,
result,
};
});

const hl_group = args.filterParams.highlightMatched;
if (hl_group != "") {
const name = "ddc-filter-sorter_ngram-" + hl_group;
for (const { item, result } of ranked) {
item.highlights ??= [];
for (const m of result.matches) {
if (args.filterParams.minMatchHighlightLength <= m.len) {
item.highlights.push({
name,
type: "abbr",
hl_group,
col: 1 + byteLength(item.word.slice(0, m.start)),
width: byteLength(m.text),
});
}
}
}
}

return ranked.sort((a, b) => b.result.score - a.result.score)
.map((value) => value.item);
}

params(): Params {
return {
highlightMatched: "",
minMatchLength: 1,
minMatchHighlightLength: 1,
};
}
}
110 changes: 110 additions & 0 deletions denops/@ddu-filters/ngram/ngram.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
export type Trie = {
next: Record<string, Trie>;
start: number;
len: number;
};

/*
ngramをベースとした曖昧マッチ&スコアリングプログラム
入力をhogeとして
- hoge
- oge
- ge
- e
のように分解してTrieを構築、任意長でこれと文字列をマッチすると入力の一部分からなるマッチ情報が手に入る
- Trieの構築を除くと実行効率多分O(n)くらい?
*/

export type Needle = {
trie: Trie;
length: number;
};

export function makeTrie(input: string): Needle {
const root: Trie = {
next: {},
start: -1,
len: 0,
};
for (let start = 0; start < input.length; start++) {
const pinput = input.slice(start);
let current = root;
for (let i = 0; i < pinput.length; i++) {
const c = pinput[i];
// 重複した場合に末尾の候補を優先したいので上書きする。
// 例えばhogepiyoを絞り込んだ後、改めてpiyoを前に持ってきたかったら、
// 再びpiyoを打てば実現できるようにしたい
current.next[c] = {
next: current.next[c]?.next ?? {},
start,
len: i + 1,
};
current = current.next[c];
}
}
return {
trie: root,
length: input.length,
};
}

export type MatchResult = {
start: number;
len: number;
text: string;
needleStart: number;
};

export type Result = {
matches: MatchResult[];
score: number;
};

export type MatchOptions = {
minMatchLength?: number;
};

export function match(
input: string,
needle: Needle,
options: MatchOptions = {},
): Result {
const matches: MatchResult[] = [];
const root = needle.trie;
let current = root;
const score = Array(needle.length).fill(0);

// 終了後にチェックするのがめんどいので末尾+1まで処理させる
for (let i = 0; i <= input.length; i++) {
const c = input[i];
if (current.next[c] == null) {
if (current.len != 0) {
const len = current.len;
const start = i - current.len;
if (options.minMatchLength ?? 1 <= len) {
matches.push({
start,
len,
text: input.slice(start, start + len),
needleStart: current.start,
});
// 類似度と言っても、重複の多い文字列が優先されるのも嬉しくないので
// score matrixっぽいことをやってみる
for (let i = current.start; i < current.start + len; i++) {
score[i] = Math.max(score[i], len);
}
}
}
current = root;
}
if (current.next[c] != null) {
current = current.next[c];
}
}
return {
matches,
score: score.reduce((a, b) => a + b),
};
}
67 changes: 67 additions & 0 deletions denops/@ddu-filters/sorter_ngram.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import {
BaseFilter,
FilterArguments,
} from "https://deno.land/x/[email protected]/base/filter.ts";
import { DduItem } from "https://deno.land/x/[email protected]/types.ts";
import { makeTrie, match } from "./ngram/ngram.ts";

function byteLength(input: string): number {
return new TextEncoder().encode(input).length;
}

export type Params = {
highlightMatched: string;
minMatchLength: number;
minMatchHighlightLength: number;
};

export class Filter extends BaseFilter<Params> {
filter(args: FilterArguments<Params>): DduItem[] {
if (args.input.length < args.filterParams.minMatchLength) {
return args.items;
}
const ignoreCase = args.sourceOptions.ignoreCase &&
!(args.sourceOptions.smartCase && /[A-Z]/.test(args.input));
const input = ignoreCase ? args.input.toLowerCase() : args.input;
const needle = makeTrie(input);
const ranked = args.items.map((item) => {
const key = ignoreCase ? item.matcherKey.toLowerCase() : item.matcherKey;
const result = match(key, needle, {
minMatchLength: args.filterParams.minMatchLength,
});
return {
item,
result,
};
});

const hl_group = args.filterParams.highlightMatched;
if (hl_group != "") {
const name = "ddu-filter-ngram-" + hl_group;
for (const { item, result } of ranked) {
item.highlights ??= [];
for (const m of result.matches) {
if (args.filterParams.minMatchHighlightLength <= m.len) {
item.highlights.push({
name,
hl_group,
col: 1 + byteLength(item.word.slice(0, m.start)),
width: byteLength(m.text),
});
}
}
}
}

return ranked.sort((a, b) => b.result.score - a.result.score)
.map((value) => value.item);
}

params(): Params {
return {
highlightMatched: "",
minMatchLength: 1,
minMatchHighlightLength: 1,
};
}
}

0 comments on commit c504f0e

Please sign in to comment.