Skip to content

Commit

Permalink
bf and rk in python
Browse files Browse the repository at this point in the history
  • Loading branch information
KPatr1ck committed Dec 12, 2018
1 parent 17bbd62 commit 3fd9a80
Showing 1 changed file with 93 additions and 0 deletions.
93 changes: 93 additions & 0 deletions python/32_bf_rk/bf_rk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/python
# -*- coding: UTF-8 -*-

from time import time


def bf(main, pattern):
"""
字符串匹配,bf暴搜
:param main: 主串
:param pattern: 模式串
:return:
"""
n = len(main)
m = len(pattern)

if n <= m:
return 0 if pattern == main else -1

for i in range(n-m+1):
for j in range(m):
if main[i+j] == pattern[j]:
if j == m-1:
return i
else:
continue
else:
break
return -1


def simple_hash(s, start, end):
"""
计算子串的哈希值
每个字符取acs-ii码后求和
:param s:
:param start:
:param end:
:return:
"""
assert start <= end

ret = 0
for c in s[start: end+1]:
ret += ord(c)
return ret


def rk(main, pattern):
n = len(main)
m = len(pattern)

if n <= m:
return 0 if pattern == main else -1

# 子串哈希值表
hash_memo = [None] * (n-m+1)
hash_memo[0] = simple_hash(main, 0, m-1)
for i in range(1, n-m+1):
hash_memo[i] = hash_memo[i-1] - simple_hash(main, i-1, i-1) + simple_hash(main, i+m-1, i+m-1)

# 模式串哈希值
hash_p = simple_hash(pattern, 0, m-1)

for i, h in enumerate(hash_memo):
# 可能存在哈希冲突
if h == hash_p:
if pattern == main[i:i+m]:
return i
else:
continue
return -1


if __name__ == '__main__':
m_str = 'a'*10000
p_str = 'a'*200+'b'

print('--- time consume ---')
t = time()
print('[bf] result:', bf(m_str, p_str))
print('[bf] time cost: {0:.5}s'.format(time()-t))

t = time()
print('[rk] result:', rk(m_str, p_str))
print('[rk] time cost: {0:.5}s'.format(time()-t))

print('')
print('--- search ---')
m_str = 'thequickbrownfoxjumpsoverthelazydog'
p_str = 'jump'
print('[bf] result:', bf(m_str, p_str))
print('[rk] result:', rk(m_str, p_str))

0 comments on commit 3fd9a80

Please sign in to comment.