forked from kyclark/biofx_python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
solution1_manual.py
executable file
·121 lines (95 loc) · 3.67 KB
/
solution1_manual.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
""" Annotate BLAST output """
import argparse
import csv
import os
from typing import NamedTuple, TextIO
class Args(NamedTuple):
""" Command-line arguments """
hits: TextIO
annotations: TextIO
outfile: TextIO
delimiter: str
pctid: float
# --------------------------------------------------
def get_args():
""" Get command-line arguments """
parser = argparse.ArgumentParser(
description='Annotate BLAST output',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-b',
'--blasthits',
metavar='FILE',
type=argparse.FileType('rt'),
help='BLAST -outfmt 6',
required=True)
parser.add_argument('-a',
'--annotations',
help='Annotations file',
metavar='FILE',
type=argparse.FileType('rt'),
required=True)
parser.add_argument('-o',
'--outfile',
help='Output file',
metavar='FILE',
type=argparse.FileType('wt'),
default='out.csv')
parser.add_argument('-d',
'--delimiter',
help='Output field delimiter',
metavar='DELIM',
type=str,
default='')
parser.add_argument('-p',
'--pctid',
help='Minimum percent identity',
metavar='PCTID',
type=float,
default=0.)
args = parser.parse_args()
return Args(hits=args.blasthits,
annotations=args.annotations,
outfile=args.outfile,
delimiter=args.delimiter or guess_delimiter(args.outfile.name),
pctid=args.pctid)
# --------------------------------------------------
def main():
""" Make a jazz noise here """
args = get_args()
annots_reader = csv.DictReader(args.annotations, delimiter=',')
annots = {row['seq_id']: row for row in annots_reader}
headers = ['qseqid', 'pident', 'depth', 'lat_lon']
args.outfile.write(args.delimiter.join(headers) + '\n')
hits = csv.DictReader(args.hits,
delimiter=',',
fieldnames=[
'qseqid', 'sseqid', 'pident', 'length',
'mismatch', 'gapopen', 'qstart', 'qend',
'sstart', 'send', 'evalue', 'bitscore'
])
num_written = 0
for hit in hits:
if float(hit.get('pident', -1)) < args.pctid:
continue
if seq_id := hit.get('qseqid'):
if seq := annots.get(seq_id):
num_written += 1
args.outfile.write(
args.delimiter.join(
map(lambda s: f'"{s}"', [
seq_id,
hit.get('pident'),
seq.get('depth'),
seq.get('lat_lon')
])) + '\n')
args.outfile.close()
print(f'Exported {num_written:,} to "{args.outfile.name}".')
# --------------------------------------------------
def guess_delimiter(filename: str) -> str:
""" Guess the field separator from the file extension """
ext = os.path.splitext(filename)[1]
return ',' if ext == '.csv' else '\t'
# --------------------------------------------------
if __name__ == '__main__':
main()