Skip to content

Commit

Permalink
ac automata in python
Browse files Browse the repository at this point in the history
  • Loading branch information
unknown authored and unknown committed Dec 14, 2018
1 parent 6b50ac0 commit 9fd6594
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 2 deletions.
9 changes: 7 additions & 2 deletions python/35_trie/trie_.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@ def __init__(self, c):
self.children = []

def insert_child(self, c):
self._insert_child(Node(c))

def _insert_child(self, node):
"""
插入一个子节点
:param c:
:return:
"""
v = ord(c)
v = ord(node.data)
idx = self._find_insert_idx(v)
length = len(self.children)

node = Node(c)
if idx == length:
self.children.append(node)
else:
Expand All @@ -33,6 +35,9 @@ def insert_child(self, c):
self.children[i] = self.children[i-1]
self.children[idx] = node

def has_child(self, c):
return True if self.get_child(c) is not None else False

def get_child(self, c):
"""
搜索子节点并返回
Expand Down
88 changes: 88 additions & 0 deletions python/36_ac_automata/ac_automata_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-

from trie_ import Node, Trie
from queue import Queue


class ACNode(Node):
def __init__(self, c: str):
super(ACNode, self).__init__(c)
self.fail = None
self.length = 0

def insert_child(self, c: str):
self._insert_child(ACNode(c))


class ACTrie(Trie):
def __init__(self):
self.root = ACNode(None)


def ac_automata(main: str, ac_trie: ACTrie) -> list:
root = ac_trie.root
build_failure_pointer(ac_trie)

ret = []
p = root
for i, c in enumerate(main):
while p != root and not p.has_child(c):
p = p.fail

if p.has_child(c): # a char matched, try to find all potential pattern matched
q = p.get_child(c)
while q != root:
if q.is_ending_char:
ret.append((i-q.length+1, i))
# ret.append(main[i-q.length+1:i+1])
q = q.fail
p = p.get_child(c)

return ret


def build_failure_pointer(ac_trie: ACTrie) -> None:
root = ac_trie.root

# queue: [(node, node.length) ....]
node_queue = Queue()
node_queue.put((root, root.length))

root.fail = None
while not node_queue.empty():
p, length = node_queue.get()
for pc in p.children:
pc.length = length + 1
if p == root:
pc.fail = root
else:
q = p.fail
# same as kmp
while q != root and not q.has_child(pc.data):
q = q.fail

# cases now:
# 1. q == root
# 2. q != root and q.has_child(pc.data)
if q.has_child(pc.data):
pc.fail = q.get_child(pc.data)
else:
pc.fail = root
node_queue.put((pc, pc.length))


if __name__ == '__main__':
ac_trie = ACTrie()
ac_trie.gen_tree(['fuck', 'shit', 'TMD', '傻叉'])

print('--- ac automata ---')
m_str = 'fuck you, what is that shit, TMD你就是个傻叉傻叉傻叉叉'
print('original str : {}'.format(m_str))

filter_range_list = ac_automata(m_str, ac_trie)
str_filtered = m_str
for start, end in filter_range_list:
str_filtered = str_filtered.replace(str_filtered[start:end+1], '*'*(end+1-start))

print('after filtered: {}'.format(str_filtered))

0 comments on commit 9fd6594

Please sign in to comment.