forked from wangzheng0822/algo
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
unknown
authored and
unknown
committed
Dec 14, 2018
1 parent
6b50ac0
commit 9fd6594
Showing
2 changed files
with
95 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#!/usr/bin/python | ||
# -*- coding: UTF-8 -*- | ||
|
||
from trie_ import Node, Trie | ||
from queue import Queue | ||
|
||
|
||
class ACNode(Node): | ||
def __init__(self, c: str): | ||
super(ACNode, self).__init__(c) | ||
self.fail = None | ||
self.length = 0 | ||
|
||
def insert_child(self, c: str): | ||
self._insert_child(ACNode(c)) | ||
|
||
|
||
class ACTrie(Trie): | ||
def __init__(self): | ||
self.root = ACNode(None) | ||
|
||
|
||
def ac_automata(main: str, ac_trie: ACTrie) -> list: | ||
root = ac_trie.root | ||
build_failure_pointer(ac_trie) | ||
|
||
ret = [] | ||
p = root | ||
for i, c in enumerate(main): | ||
while p != root and not p.has_child(c): | ||
p = p.fail | ||
|
||
if p.has_child(c): # a char matched, try to find all potential pattern matched | ||
q = p.get_child(c) | ||
while q != root: | ||
if q.is_ending_char: | ||
ret.append((i-q.length+1, i)) | ||
# ret.append(main[i-q.length+1:i+1]) | ||
q = q.fail | ||
p = p.get_child(c) | ||
|
||
return ret | ||
|
||
|
||
def build_failure_pointer(ac_trie: ACTrie) -> None: | ||
root = ac_trie.root | ||
|
||
# queue: [(node, node.length) ....] | ||
node_queue = Queue() | ||
node_queue.put((root, root.length)) | ||
|
||
root.fail = None | ||
while not node_queue.empty(): | ||
p, length = node_queue.get() | ||
for pc in p.children: | ||
pc.length = length + 1 | ||
if p == root: | ||
pc.fail = root | ||
else: | ||
q = p.fail | ||
# same as kmp | ||
while q != root and not q.has_child(pc.data): | ||
q = q.fail | ||
|
||
# cases now: | ||
# 1. q == root | ||
# 2. q != root and q.has_child(pc.data) | ||
if q.has_child(pc.data): | ||
pc.fail = q.get_child(pc.data) | ||
else: | ||
pc.fail = root | ||
node_queue.put((pc, pc.length)) | ||
|
||
|
||
if __name__ == '__main__': | ||
ac_trie = ACTrie() | ||
ac_trie.gen_tree(['fuck', 'shit', 'TMD', '傻叉']) | ||
|
||
print('--- ac automata ---') | ||
m_str = 'fuck you, what is that shit, TMD你就是个傻叉傻叉傻叉叉' | ||
print('original str : {}'.format(m_str)) | ||
|
||
filter_range_list = ac_automata(m_str, ac_trie) | ||
str_filtered = m_str | ||
for start, end in filter_range_list: | ||
str_filtered = str_filtered.replace(str_filtered[start:end+1], '*'*(end+1-start)) | ||
|
||
print('after filtered: {}'.format(str_filtered)) |