-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
357 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
export type Trie = { | ||
next: Record<string, Trie>; | ||
start: number; | ||
len: number; | ||
}; | ||
|
||
/* | ||
ngramをベースとした曖昧マッチ&スコアリングプログラム | ||
入力をhogeとして | ||
- hoge | ||
- oge | ||
- ge | ||
- e | ||
のように分解してTrieを構築、任意長でこれと文字列をマッチすると入力の一部分からなるマッチ情報が手に入る | ||
- Trieの構築を除くと実行効率多分O(n)くらい? | ||
*/ | ||
|
||
export type Needle = { | ||
trie: Trie; | ||
length: number; | ||
}; | ||
|
||
export function makeTrie(input: string): Needle { | ||
const root: Trie = { | ||
next: {}, | ||
start: -1, | ||
len: 0, | ||
}; | ||
for (let start = 0; start < input.length; start++) { | ||
const pinput = input.slice(start); | ||
let current = root; | ||
for (let i = 0; i < pinput.length; i++) { | ||
const c = pinput[i]; | ||
// 重複した場合に末尾の候補を優先したいので上書きする。 | ||
// 例えばhogepiyoを絞り込んだ後、改めてpiyoを前に持ってきたかったら、 | ||
// 再びpiyoを打てば実現できるようにしたい | ||
current.next[c] = { | ||
next: current.next[c]?.next ?? {}, | ||
start, | ||
len: i + 1, | ||
}; | ||
current = current.next[c]; | ||
} | ||
} | ||
return { | ||
trie: root, | ||
length: input.length, | ||
}; | ||
} | ||
|
||
export type MatchResult = { | ||
start: number; | ||
len: number; | ||
text: string; | ||
needleStart: number; | ||
}; | ||
|
||
export type Result = { | ||
matches: MatchResult[]; | ||
score: number; | ||
}; | ||
|
||
export type MatchOptions = { | ||
minMatchLength?: number; | ||
}; | ||
|
||
export function match( | ||
input: string, | ||
needle: Needle, | ||
options: MatchOptions = {}, | ||
): Result { | ||
const matches: MatchResult[] = []; | ||
const root = needle.trie; | ||
let current = root; | ||
const score = Array(needle.length).fill(0); | ||
|
||
// 終了後にチェックするのがめんどいので末尾+1まで処理させる | ||
for (let i = 0; i <= input.length; i++) { | ||
const c = input[i]; | ||
if (current.next[c] == null) { | ||
if (current.len != 0) { | ||
const len = current.len; | ||
const start = i - current.len; | ||
if (options.minMatchLength ?? 1 <= len) { | ||
matches.push({ | ||
start, | ||
len, | ||
text: input.slice(start, start + len), | ||
needleStart: current.start, | ||
}); | ||
// 類似度と言っても、重複の多い文字列が優先されるのも嬉しくないので | ||
// score matrixっぽいことをやってみる | ||
for (let i = current.start; i < current.start + len; i++) { | ||
score[i] = Math.max(score[i], len); | ||
} | ||
} | ||
} | ||
current = root; | ||
} | ||
if (current.next[c] != null) { | ||
current = current.next[c]; | ||
} | ||
} | ||
return { | ||
matches, | ||
score: score.reduce((a, b) => a + b), | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import { | ||
BaseFilter, | ||
FilterArguments, | ||
} from "https://deno.land/x/[email protected]/base/filter.ts"; | ||
import { Item } from "https://deno.land/x/[email protected]/types.ts"; | ||
import { makeTrie, match } from "./ngram/ngram.ts"; | ||
|
||
function byteLength(input: string): number { | ||
return new TextEncoder().encode(input).length; | ||
} | ||
|
||
export type Params = { | ||
highlightMatched: string; | ||
minMatchLength: number; | ||
minMatchHighlightLength: number; | ||
}; | ||
|
||
export class Filter extends BaseFilter<Params> { | ||
filter(args: FilterArguments<Params>): Item[] { | ||
if (args.completeStr.length < args.filterParams.minMatchLength) { | ||
return args.items; | ||
} | ||
const input = args.sourceOptions.ignoreCase | ||
? args.completeStr.toLowerCase() | ||
: args.completeStr; | ||
const needle = makeTrie(input); | ||
const ranked = args.items.map((item) => { | ||
const word = args.sourceOptions.ignoreCase | ||
? item.word.toLowerCase() | ||
: item.word; | ||
const result = match(word, needle, { | ||
minMatchLength: args.filterParams.minMatchLength, | ||
}); | ||
return { | ||
item, | ||
result, | ||
}; | ||
}); | ||
|
||
const hl_group = args.filterParams.highlightMatched; | ||
if (hl_group != "") { | ||
const name = "ddc-filter-sorter_ngram-" + hl_group; | ||
for (const { item, result } of ranked) { | ||
item.highlights ??= []; | ||
for (const m of result.matches) { | ||
if (args.filterParams.minMatchHighlightLength <= m.len) { | ||
item.highlights.push({ | ||
name, | ||
type: "abbr", | ||
hl_group, | ||
col: 1 + byteLength(item.word.slice(0, m.start)), | ||
width: byteLength(m.text), | ||
}); | ||
} | ||
} | ||
} | ||
} | ||
|
||
return ranked.sort((a, b) => b.result.score - a.result.score) | ||
.map((value) => value.item); | ||
} | ||
|
||
params(): Params { | ||
return { | ||
highlightMatched: "", | ||
minMatchLength: 1, | ||
minMatchHighlightLength: 1, | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
export type Trie = { | ||
next: Record<string, Trie>; | ||
start: number; | ||
len: number; | ||
}; | ||
|
||
/* | ||
ngramをベースとした曖昧マッチ&スコアリングプログラム | ||
入力をhogeとして | ||
- hoge | ||
- oge | ||
- ge | ||
- e | ||
のように分解してTrieを構築、任意長でこれと文字列をマッチすると入力の一部分からなるマッチ情報が手に入る | ||
- Trieの構築を除くと実行効率多分O(n)くらい? | ||
*/ | ||
|
||
export type Needle = { | ||
trie: Trie; | ||
length: number; | ||
}; | ||
|
||
export function makeTrie(input: string): Needle { | ||
const root: Trie = { | ||
next: {}, | ||
start: -1, | ||
len: 0, | ||
}; | ||
for (let start = 0; start < input.length; start++) { | ||
const pinput = input.slice(start); | ||
let current = root; | ||
for (let i = 0; i < pinput.length; i++) { | ||
const c = pinput[i]; | ||
// 重複した場合に末尾の候補を優先したいので上書きする。 | ||
// 例えばhogepiyoを絞り込んだ後、改めてpiyoを前に持ってきたかったら、 | ||
// 再びpiyoを打てば実現できるようにしたい | ||
current.next[c] = { | ||
next: current.next[c]?.next ?? {}, | ||
start, | ||
len: i + 1, | ||
}; | ||
current = current.next[c]; | ||
} | ||
} | ||
return { | ||
trie: root, | ||
length: input.length, | ||
}; | ||
} | ||
|
||
export type MatchResult = { | ||
start: number; | ||
len: number; | ||
text: string; | ||
needleStart: number; | ||
}; | ||
|
||
export type Result = { | ||
matches: MatchResult[]; | ||
score: number; | ||
}; | ||
|
||
export type MatchOptions = { | ||
minMatchLength?: number; | ||
}; | ||
|
||
export function match( | ||
input: string, | ||
needle: Needle, | ||
options: MatchOptions = {}, | ||
): Result { | ||
const matches: MatchResult[] = []; | ||
const root = needle.trie; | ||
let current = root; | ||
const score = Array(needle.length).fill(0); | ||
|
||
// 終了後にチェックするのがめんどいので末尾+1まで処理させる | ||
for (let i = 0; i <= input.length; i++) { | ||
const c = input[i]; | ||
if (current.next[c] == null) { | ||
if (current.len != 0) { | ||
const len = current.len; | ||
const start = i - current.len; | ||
if (options.minMatchLength ?? 1 <= len) { | ||
matches.push({ | ||
start, | ||
len, | ||
text: input.slice(start, start + len), | ||
needleStart: current.start, | ||
}); | ||
// 類似度と言っても、重複の多い文字列が優先されるのも嬉しくないので | ||
// score matrixっぽいことをやってみる | ||
for (let i = current.start; i < current.start + len; i++) { | ||
score[i] = Math.max(score[i], len); | ||
} | ||
} | ||
} | ||
current = root; | ||
} | ||
if (current.next[c] != null) { | ||
current = current.next[c]; | ||
} | ||
} | ||
return { | ||
matches, | ||
score: score.reduce((a, b) => a + b), | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import { | ||
BaseFilter, | ||
FilterArguments, | ||
} from "https://deno.land/x/[email protected]/base/filter.ts"; | ||
import { DduItem } from "https://deno.land/x/[email protected]/types.ts"; | ||
import { makeTrie, match } from "./ngram/ngram.ts"; | ||
|
||
function byteLength(input: string): number { | ||
return new TextEncoder().encode(input).length; | ||
} | ||
|
||
export type Params = { | ||
highlightMatched: string; | ||
minMatchLength: number; | ||
minMatchHighlightLength: number; | ||
}; | ||
|
||
export class Filter extends BaseFilter<Params> { | ||
filter(args: FilterArguments<Params>): DduItem[] { | ||
if (args.input.length < args.filterParams.minMatchLength) { | ||
return args.items; | ||
} | ||
const ignoreCase = args.sourceOptions.ignoreCase && | ||
!(args.sourceOptions.smartCase && /[A-Z]/.test(args.input)); | ||
const input = ignoreCase ? args.input.toLowerCase() : args.input; | ||
const needle = makeTrie(input); | ||
const ranked = args.items.map((item) => { | ||
const key = ignoreCase ? item.matcherKey.toLowerCase() : item.matcherKey; | ||
const result = match(key, needle, { | ||
minMatchLength: args.filterParams.minMatchLength, | ||
}); | ||
return { | ||
item, | ||
result, | ||
}; | ||
}); | ||
|
||
const hl_group = args.filterParams.highlightMatched; | ||
if (hl_group != "") { | ||
const name = "ddu-filter-ngram-" + hl_group; | ||
for (const { item, result } of ranked) { | ||
item.highlights ??= []; | ||
for (const m of result.matches) { | ||
if (args.filterParams.minMatchHighlightLength <= m.len) { | ||
item.highlights.push({ | ||
name, | ||
hl_group, | ||
col: 1 + byteLength(item.word.slice(0, m.start)), | ||
width: byteLength(m.text), | ||
}); | ||
} | ||
} | ||
} | ||
} | ||
|
||
return ranked.sort((a, b) => b.result.score - a.result.score) | ||
.map((value) => value.item); | ||
} | ||
|
||
params(): Params { | ||
return { | ||
highlightMatched: "", | ||
minMatchLength: 1, | ||
minMatchHighlightLength: 1, | ||
}; | ||
} | ||
} |