forked from dib-lab/khmer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathedit-kmers.py
51 lines (36 loc) · 1.03 KB
/
edit-kmers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import khmer
import sys
import screed
K = 32
HASHTABLE_SIZE=int(1e9)
def get_neighbors(kmer):
neighbors = []
bases = ['A', 'C', 'G', 'T']
for n, base in enumerate(kmer):
for new_base in bases:
if new_base == base:
continue
new_kmer = kmer[0:n] + new_base + kmer[n+1:]
neighbors.append(new_kmer)
return neighbors
ht = khmer.new_hashtable(K, HASHTABLE_SIZE)
filename = sys.argv[1]
ht.consume_fasta(filename)
for n, record in enumerate(screed.fasta.fasta_iter(open(filename))):
read = record['sequence']
name = record['name']
if len(read) < K:
continue
seq_len = len(read)
for n in range(0,seq_len+1-K):
kmer = read[n:n+K]
kmer_count = ht.get(kmer)
if kmer_count == 1:
neighbors = get_neighbors(kmer)
for neighbor in neighbors:
neig_count = ht.get(neighbor)
if neig_count > 1:
read = read[0:n] + neighbor + read[n+K:]
break
print ">" + name
print read