-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathburrows_wheeler.py
67 lines (46 loc) · 1.8 KB
/
burrows_wheeler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import numpy as np
class BWT:
""" Implement the Burrow Wheeler Transform for a given string """
def __init__(self,string:str) -> str:
self.string = string
self.processed_string = string + '$'
self.length = len(self.string)
self.processed_length = len(self.processed_string)
def bwt_matrix(self,lexical_sort) -> np.ndarray:
"""Return the permuted matrix for the given string"""
m = self.processed_length
char_list = [i for i in self.processed_string]
p_array = np.array(np.zeros([m,m]),dtype=object)
for i in range(m):
new_items = char_list[1:] + char_list[:1]
char_list = new_items
p_array[i,:] = new_items
p_words = np.sum(p_array,axis=1)
if lexical_sort == False:
p_words = p_words.reshape(m,1)
else :
p_vec = p_words.reshape(m,)
p_vec.sort()
p_words = p_vec.reshape(m,1)
return p_words
def bwt(self):
p_mat = self.bwt_matrix(lexical_sort=True)
bwt_char = [word[0][-1] for word in p_mat]
bwt = ''.join(bwt_char)
return bwt
def ibwt_matrix(self):
m = self.length
char_list = np.array([i for i in self.string])
sorted_bwt = np.array(sorted(char_list))
for i in range(m-1):
arr = np.array(np.vstack((char_list,sorted_bwt)),dtype=object)
arr_sum = np.sum(arr,axis=0)
sorted_bwt = np.array(sorted(arr_sum))
ibwt_matrix = sorted_bwt.reshape(m,1)
return ibwt_matrix
def ibwt(self):
ibwt_mat = self.ibwt_matrix()
last_col = [i[0][-1] for i in ibwt_mat]
carot_index = last_col.index('$')
ibwt = ibwt_mat[carot_index][0]
return ibwt