forked from kyclark/biofx_python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
solution1_regex.py
executable file
·88 lines (66 loc) · 2.49 KB
/
solution1_regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python3
""" Find locations of N-glycosylation motif """
import argparse
import os
import re
import sys
from typing import NamedTuple, List, TextIO
import requests
from Bio import SeqIO
class Args(NamedTuple):
""" Command-line arguments """
file: TextIO
download_dir: str
# --------------------------------------------------
def get_args() -> Args:
""" Get command-line arguments """
parser = argparse.ArgumentParser(
description='Find locations of N-glycosylation motif',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('file',
help='Input text file of UniProt IDs',
metavar='FILE',
type=argparse.FileType('rt'))
parser.add_argument('-d',
'--download_dir',
help='Directory for downloads',
metavar='DIR',
type=str,
default='fasta')
args = parser.parse_args()
return Args(args.file, args.download_dir)
# --------------------------------------------------
def main():
""" Make a jazz noise here """
args = get_args()
files = fetch_fasta(args.file, args.download_dir)
regex = re.compile('(?=(N[^P][ST][^P]))')
for file in files:
prot_id, _ = os.path.splitext(os.path.basename(file))
recs = SeqIO.parse(file, 'fasta')
if rec := next(recs):
if matches := list(regex.finditer(str(rec.seq))):
print(prot_id)
print(*[match.start() + 1 for match in matches])
# --------------------------------------------------
def fetch_fasta(fh: TextIO, fasta_dir: str) -> List[str]:
""" Fetch the FASTA files into the download directory """
if not os.path.isdir(fasta_dir):
os.makedirs(fasta_dir)
files = []
for prot_id in map(str.rstrip, fh):
fasta = os.path.join(fasta_dir, prot_id + '.fasta')
if not os.path.isfile(fasta):
url = f'http://www.uniprot.org/uniprot/{prot_id}.fasta'
response = requests.get(url)
if response.status_code == 200:
print(response.text, file=open(fasta, 'wt'))
else:
print(f'Error fetching "{url}": "{response.status_code}"',
file=sys.stderr)
continue
files.append(fasta)
return files
# --------------------------------------------------
if __name__ == '__main__':
main()