-
Notifications
You must be signed in to change notification settings - Fork 67
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
后缀自动机 #18
Comments
求多个字符串的最长公共子串(方法1) function getLCS(strs) {
var s = strs.shift();
var sam = new SuffixAutomaton();
sam.extend(s);
var nodes = sam.nodes;
for (var i = 1; i < nodes.length; i++) {
var node = nodes[i]
node.nlcs = node.len; //表示多个串的最小值
node.lcs = 0 //表示当前匹配的最大值
}
//根据节点的len属性进行计数排序
var count = [];
for (var i = 0; i <= s.length; i++) {
count[i] = 0;
}
for (var i = 0; i < nodes.length; i++) {
count[nodes[i].len]++;
}
for (var i = 1; i <= s.length; i++) {
count[i] += count[i - 1];//求前缀和
}
var sa = [];
for (var i = 0; i < nodes.length; i++) {
var index = --count[nodes[i].len]
sa[index] = nodes[i];
}
function compute(str) {
var f = []
var i = 0, len = 0, n = str.length, node = sam.root
while (i < n) {
var c = str[i];
if (node.children[c]) {
len++;
node = node.children[c]
} else {
while (node && !node.children[c]) {
node = node.fail
}
if (!node) {
len = 0
node = sam.root;
} else {
len = node.len + 1;
node = node.children[c]
}
}
i++
node.lcs = Math.max(node.lcs, len)
}
for (var i = sa.length - 1; i >= 0; i--) {
var node = sa[i];
node.nlcs = Math.min(node.nlcs, node.lcs)
if (node.fail && node.fail.lcs < node.lcs) {
node.fail.lcs = node.lcs
}
node.lcs = 0
}
}
var ans = 0;
while (str = strs.shift()) {
compute(str)
}
for (var i = 1; i < nodes.length; i++) {
ans = Math.max(ans, nodes[i].nlcs)
}
console.log(count, sa);//only test
return ans
}
console.log(getLCS(["uabcdefo", "dabcdui", "eabc87eew"])); //abc |
求K小子串 function findK(str, k, op) {
//op为0则表示不同位置的相同子串算作一个(不重复子串)。po=1则表示不同位置的相同子串算作多个(重复子串)
var sam = new SuffixAutomaton();
sam.extend(str);
var nodes = sam.nodes;
var total = nodes.length;
var indegree = new Array(total).fill(0);
var rank = [];
/*
我们令cnt[i]为i这个状态在原串中出现的次数。再在每个状态维护一个size。
若T=0,siz[i[siz[i[表示ii的所有儿子的size之和+1(子树大小),
否则siz[i[siz[i[表示ii的所有儿子的sizsiz之和+cnt[i](类似子树大小的东西)。
接下来我们用size像平衡树一样查询就好了。
————————————————*/
for (var i = 0; i < total; i++) {
nodes[i].endposCount = 0;
nodes[i].sum = 0;
}
if (op) {
var p = nodes[0];
for (var i = 0; i < str.length; i++) {
p = p.children[str[i]];
p.endposCount++;
}
} else {
for (var i = 0; i < total; i++) {
nodes[i].endposCount = 1;
}
}
//计数排序
for (var i = 0; i < total; i++) {
indegree[nodes[i].len]++;// 统计相同度数的节点的个数
}
for (var i = 1; i < total; i++) {
indegree[i] += indegree[i - 1];// 统计度数小于等于 i 的节点的总数
}
for (var i = 0; i < total; i++) {
rank[--indegree[nodes[i].len]] = nodes[i];// 为每个节点编号,节点度数越大编号越靠后
}
if (op) {
for (var i = total - 1; i >= 0; i--) {
if (rank[i].fail) {
rank[i].fail.endposCount += rank[i].endposCount;
}
}
}
console.log(rank, "rank");
// console.log(pos)
for (var i = total - 1; i >= 0; i--) {
var node = rank[i];
node.sum += node.endposCount;
for (var c in node.children) {
node.sum += node.children[c].sum;
}
}
var res = 0;
var root = nodes[0];
for (var c in root.children) {
res += root.children[c].sum;
}
if (res < k) {
console.log(-1, nodes);
return -1;
}
var ok = false,
ans = [];
function dfs(node, k) {
if (ok) {
return;
}
if (k <= 0) {
ok = 1;
return;
}
for (var c in node.children) {
var child = node.children[c];
if (ok) {
break;
}
if (k > child.sum) {
k -= child.sum;
} else {
k -= child.endposCount;
ans.push(c);
dfs(child, k);
}
}
}
dfs(root, k);
if (ok) {
return ans.join("");
} else {
return -1;
}
}
// https://blog.csdn.net/blankcqk/article/details/47337959
// https://www.cnblogs.com/shuaihui520/p/10695207.html
console.log(findK("aaa", 2, 0));
console.log(findK("aabc", 3, 0)); |
function findK(str, k, op) {
//op为0则表示不同位置的相同子串算作一个(不重复子串)。po=1则表示不同位置的相同子串算作多个(重复子串)
var sam = new SuffixAutomaton();
sam.extend(str);
var nodes = sam.nodes;
var total = nodes.length;
//计数排序
var indegree = new Array(total).fill(0);
var rank = [];
for (var i = 0; i < total; i++) {
nodes[i].index = i;
indegree[nodes[i].len]++;// 统计相同度数的节点的个数
}
for (var i = 1; i < total; i++) {
indegree[i] += indegree[i - 1];// 统计度数小于等于 i 的节点的总数
}
for (var i = 0; i < total; i++) {
rank[--indegree[nodes[i].len]] = nodes[i]// 为每个节点编号,节点度数越大编号越靠后
}
var endpos = new Array(total).fill(1), sum = [0]
for (var i = total - 1; i >= 1; i--) {
var node = rank[i]
if (op === 1) {
endpos[node.fail.index] += endpos[node.index]
}
}
endpos[0] = 0;
for (var i = total - 1; i >= 1; i--) {
var node = rank[i];
var index = node.index
sum[index] = endpos[index]
for (var c in node.children) {
sum[index] += sum[node.children[c].index];
}
}
var ok = false,
ans = [];
function dfs(node, k) {
if (k <= 0) {
ok = true
return
}
for (var c in node.children) {
var child = node.children[c];
if (ok) {
break
}
if (k > sum[child.index]) {
k -= sum[child.index]
} else {
k -= endpos[child.index];
ans.push(c);
dfs(child, k);
}
}
}
dfs(sam.root, k);
return ok ? ans.join("") : -1
}
// https://blog.csdn.net/blankcqk/article/details/47337959
// https://www.cnblogs.com/shuaihui520/p/10695207.html
console.log(findK("aaa", 2, 0));
console.log(findK("aabc", 3, 0)); |
统计一个字符串P在另一个字符串T中出现了多少 次 如果在创建后缀自动机时,为狀态添加一个count属性, 默认为1, 当它添加一个失配指针(不为root)加1 或 对它的克隆对象 的count+1 做了改动,可以简化成这样 function Node(key) {
this.children = {};
this.len = 0; //最长子串的长度(该节点的子串数量=this.len - this.fail.len)
this.isEnd = false;
this.key = key;
this.count = 0
this.fail = null;
}
class SuffixAutomaton {
constructor() {
var node = (this.root = this.last = new Node(""));
this.nodes = [node];
}
extend(word) {
for (var i = 0; i < word.length; i++) {
this.insert(word[i]);
}
}
insert(c, n) {
var p = this.last;
var node = new Node(c);
node.len = p.len + 1;
this.last = node;
this.nodes.push(node);
node.count++
while (p && !p.children[c]) {
//如果p 没有一条 c 的出边
p.children[c] = node; //为a为一个c
p = p.fail;
}
if (!p) {
node.fail = this.root;
} else {
var q = p.children[c];
if (p.len + 1 == q.len) {
node.fail = q;
q.count++
q.isEnd = true; //主轴上冲突
} else {
var clone = new Node(c); //克隆节点
clone.count = q.count + 1
this.nodes.push(clone);
clone.children = q.children;
clone.fail = q.fail;
clone.len = p.len + 1;
clone.isEnd = true;
while (p && p.children[c] == q) {
p.children[c] = clone; //将树中所有q相关的链接替换成clone
p = p.fail;
}
q.fail = node.fail = clone;
}
}
}
}
function timeOf(t, p) {
var sam = new SuffixAutomaton()
sam.extend(t);
var node = sam.root, time = Infinity;
for (var i = 0; i < p.length; i++) {
var c = p[i];
var node = node.children[c];
if (node) {
time = Math.min(time, node.count)
} else {
time = 0;
}
}
var res = time === Infinity ? 0 : time
console.log(`${p}在${t}中出现了${res}次`)
return res;
}
console.log(timeOf('abcdabcabasab', 'abc'))
console.log(timeOf('abcdabcabasab', 'ab'))
console.log(timeOf('abcdabcabasab', 'da')) 每个狀态对应的字符肯定会出现一次,当出现失配时,它会重新指向第一次匹配的同字符的狀态上,因此我们把这个数字加1。圆圈中的数字就是字符出现的次数了。但由于我们并没有在构建自动机时处理出现次数,我们需要曲线求国,通过拓朴排序来求。
```javascript
function topoSort(nodes) {
var total = nodes.length;
var indegree = new Array(total).fill(0);
//统计每个字符出现的次数
var counts = new Array(total).fill(1);
var rank = [];
for (var i = 0; i < total; i++) {
nodes[i].index = i;//index用于关联counts数组
indegree[nodes[i].len]++;// 将endpos相同的节点放在同一个桶中
}
for (var i = 1; i < total; i++) {
indegree[i] += indegree[i - 1];// 统计endpos小于等于 i 的节点的总数
}
for (var i = 1; i < total; i++) {
rank[--indegree[nodes[i].len]] = nodes[i]// 为每个节点编号,节点度数越大编号越靠后
}
for (var i = total - 1; i >= 1; i--) {
var node = rank[i] //求得每个字符的出现次数
counts[node.fail.index] += counts[node.index]
}
return counts
}
function timeOf(t, p) {
var sam = new SuffixAutomaton()
sam.extend(t);
var counts = topoSort(sam.nodes)
var node = sam.root, time = Infinity;
for (var i = 0; i < p.length; i++) {
var c = p[i];
var node = node.children[c];
if (node) {
time = Math.min(time, counts[node.index])
}
}
var res = time === Infinity ? 0 : time
console.log(`${p}在${t}中出现了${res}次`); // only test
return res;
}
console.log(timeOf('abcdababc', 'abc'))
console.log(timeOf('abcdababc', 'ab'))
console.log(timeOf('abcdababc', 'da')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The text was updated successfully, but these errors were encountered: