-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathw_1.py
130 lines (105 loc) · 4.46 KB
/
w_1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import networkx as nx
def inputFile(file):
kmers= []
with open(file) as f:
for line in f:
line = line[: -1]
kmers.append(line)
return kmers
def Composition(dna, k):
kmers = []
for i in range(0, len(dna) - k + 1):
kmers.append(dna[i:i+k])
return kmers
def StringSpelledByAGenomePath(kmers):
string = []
string.append(kmers[0])
x = len(kmers[0])
for i in range(1, len(kmers)):
string.append(kmers[i][x - 1])
string = ''.join(string)
return string
def Prefix(kmer):
return kmer[0:len(kmer) - 1]
def Suffix(kmer):
return kmer[1:]
'''
def Overlap(kmers):
file = open('data.txt', 'w')
G = nx.DiGraph()
G.add_nodes_from(kmers)
for kmer in kmers:
for kmer_ in kmers:
if (Suffix(kmer) == Prefix(kmer_)) and (kmer != kmer_):
G.add_edge(kmer, kmer_)
for kmer in kmers:
l = list(G.adj[kmer])
if len(l) > 0:
my_list = ','.join(l)
print kmer,' -> ',my_list
file.write(str(kmer + ' -> ' + my_list + '\n'))
file.close()
return None
'''
def Overlap(kmers):
d = {}
for kmer in kmers:
if kmer not in d:
d[kmer] = []
for kmer in kmers:
for kmer_ in kmers:
if (Suffix(kmer) == Prefix(kmer_)) and (kmer != kmer_):
d[kmer].append(kmer_)
for kmer in kmers:
l = d[kmer]
if len(l) > 0:
my_list = ','.join(l)
print(kmer, '->', my_list)
return None
def DeBruijn(dna, k):
kmers = []
file = open('hello.txt', 'w')
t = len(dna)
for i in range(0, t - k + 1):
kmers.append(dna[i:i+k])
d = {}
for kmer in kmers:
if Prefix(kmer) not in d:
d[Prefix(kmer)] = []
for kmer in kmers:
d[Prefix(kmer)].append(Suffix(kmer))
for key in d:
l = d[key]
l.sort()
if len(l) > 0:
my_list = ','.join(l)
print(key, '->', my_list)
file.write(str(key + ' -> ' + my_list + '\n'))
file.close()
return None
def DeBruijnGraphFromKmers(kmers):
#file = open('bruijn.txt', 'w')
d = {}
for kmer in kmers:
if Prefix(kmer) not in d:
d[Prefix(kmer)] = []
for kmer in kmers:
d[Prefix(kmer)].append(Suffix(kmer))
#for key in d:
# l = d[key]
# l.sort()
# if len(l) > 0:
# my_list = ','.join(l)
## #print key, '->', my_list
# file.write(str(key + ' -> ' + my_list + '\n'))
#file.close()
return d
#dna = 'CCACCAAGCTCAAAACTTGACCGCCTAAGAGCACCATTTACTGCTTCGTATGTTAGGAAGATGTCATTACGACGGGAGTCTATGTCCCGCTCACGCATCTGGGTGAGGATGAATGTAGGGGAACCCCCCGGCTTTTCTACCGGACGGAAGTTGGTAGCGACCGTGCCTGCAAAATGTACTGATGGCGTTGGCCAGACATTAACCCTACTACCCCACCCGCTCATTGTCACGTCTGCCTTCGTCAAGAGTCGGTGACCCCTAACCGGACGGTGGAGGCATGGTTATTTCTTGCTTGGGTTGGTTGATGGTAAAAGCAGCTGCAAGCAGTATCGTGTCGGAAGCTTAAAGTTCACGCAAAAGTCCGTCCATTACCGGTACTGATGGAGTGAACGAGACCGAATACATGCTCACGCATCTTAGATGAATACAGCAGAGCATGCTACACGGTCGGTGGAGAGCTTGCCAGAGTTAATACGAGCGATGGGGAGTTTGGATCCTACGTGCGATAGCCTAAACTTTGCTTCCTCAGCATTGACTCATTAACGCCTAGCTATTCAGCATGCTAGATTGCATGCTGATATGTCTGATTTTCGTTTGCACTCATGCAGTAGTGGTCAGCTCCCACGGCGTATCTCCGCATTCACACCACTTTCTTCCTACGGACCGTATTACAGTCGGTAAGCGTGATGCGAGAACGTTCAGCGGACACGGGCTATTGACTTTGTCACATCTTGTCTTTTGCGTTACGCGTGCGTTAGCTACAGTCGAACAGTCCAACGCCCCTGTTACGGTATGTTAGCTCTTGTAGATTACGTCCAGCGAACAATGCATACCTGGTCGCGGCTCCGGGGCGCCAGAAACATTCCAGATCTATAGTTACACCTCCTTCCAGATACTAGAGAAGTCTTTACTCTATTGATAAGTACCCGACGGACCAGTAAGGATTTACGTGGTGGATGCGCGGAGGGGGTGTGCGCAGTTACACAGCGGGCCTAGGTTGGGCTTTGTATAAGGCGCGCATCTAAGGTACCTCGGGGGTCGACCGGGATTCTACAGTCGTAACGGGGCGTTAGTCTCACACGGATTAAATCTTTACGTCAAAGCGCATAGGAGGGCGGTATGAGCAATACCAGCCCGAGTCGAGTTCTTTATAATGCTTATGTGGCAACGTGATAGTGCTCATGGGATAATGGGCTGAGCCCAGGCACCTGTCCGACCTCTGATGGGGGTAAAGCACTCGATTTTTGGTTATGTAGTGCGAGTAGGTTTCTAGTCACCCTAAGGACGGCACGCAATCATCTTCTTCCCCCCTGTCGTATGATTGTGAGTTGGGCATTCAAGTCGCACTGCCAGGGGTCATTCAGTGCCGGCGTCTGGGTTAATTCCTCGGTAGGCGGTAGCGGTGCTAATGAAGAGATTGATTTGGCCTGCAAGCCTTCTTGCTTCATTCTAGACAGGACTAGCTAGTGTATCCCCGCTGAGAGCAACCAGCCTGCCACCGACACTAGGTCAAACTAAAAGCTGAGCCCTTCGGAACAGCGCCTTCTGCCCATTCCAACTGCGGCAGCCTGTCTAACTGCGGCCATTGTCCATGCAGGTCGAGTATTGGTTTAGGTCACGCCACTAGACAGCTTCATATCAAATTGCTGAAATGAGAAAGACGCGGTGTTGGTCGGGTAAGGGGGTTTGACCGCTGACCCCGGACGGAGCAGAGACTTATTTAACGTTACATGCTCGAATATTTTTACAGGATTTGGTATCAGTCGTGACCCGGGTGGCTAAGGCTCAGCATGCCTCTTCCTTCAGTGTACCATGATGAAGATTCATCTAAAGACCCTCTGACGATAAGTCTTCTCTAACACCGTTGGAATCGGATCGTGACAGATCAACCTTCGGGTCGCAAAGCGCCGTTTGCATGGTTAAGTAAGTGGCAATAGTCCATGGTTGAGAGTTCCTGCTAAACCTACCCCGTCGCAGATGCTCTACACTGCAACAATCCT'
#k = 12
#DeBruin(dna, k)
#kmers = 'GCGA CAAG AAGA GCCG ACAA AGTA TAGG AGTA ACGT AGCC TTCG AGTT AGTA CGTA GCGC GCGA GGTC GCAT AAGC TAGA ACAG TAGA TCCT CCCC GCGC ATCC AGTA AAGA GCGA CGTA'
#kmers = inputFile('../Downloads/dataset_200_8.txt')
#kmers = 'GAGG CAGG GGGG GGGA CAGG AGGG GGAG'
#kmers = kmers.split(' ')
#DeBruijnGraphFromKmers(kmers)
#DeBruijn(dna, k)