-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdownload_alleles_bigsdb.py
executable file
·74 lines (68 loc) · 2.42 KB
/
download_alleles_bigsdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python3
# Example script to download alleles from a sequence definition database
# Written by Keith Jolley
# Copyright (c) 2017, University of Oxford
# E-mail: [email protected]
#
# This file is part of Bacterial Isolate Genome Sequence Database (BIGSdb).
#
# BIGSdb is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# BIGSdb is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
import argparse
import os
import requests
parser = argparse.ArgumentParser()
parser.add_argument(
"--base_url",
default="http://rest.pubmlst.org",
help="pubmlst or other bigsdb API urls",
)
parser.add_argument("--database", required=True, help="Database configuration name")
parser.add_argument("--dir", help="Output directory")
parser.add_argument(
"--scheme_id",
type=int,
help="Only return loci belonging to scheme. If this option is not used then all \
loci from the database will be downloaded",
)
args = parser.parse_args()
base_uri = args.base_url
def main():
if args.dir and not os.path.exists(args.dir):
os.makedirs(args.dir)
dir = args.dir or "./"
url = base_uri + "/db/" + args.database
r = requests.get(url)
if r.status_code == 404:
print("Database " + args.database + " does not exist.")
os._exit(1)
loci = []
if args.scheme_id:
url = base_uri + "/db/" + args.database + "/schemes/" + str(args.scheme_id)
r = requests.get(url)
if r.status_code == 404:
print("Scheme " + str(args.scheme_id) + " does not exist.")
os._exit(1)
loci = r.json()["loci"]
else:
url = base_uri + "/db/" + args.database + "/loci?return_all=1"
r = requests.get(url)
loci = r.json()["loci"]
for locus_path in loci:
r = requests.get(locus_path)
locus = r.json()["id"]
if r.json()["alleles_fasta"]:
r = requests.get(r.json()["alleles_fasta"])
fasta_file = open(dir + "/" + locus + ".fas", "w")
fasta_file.write(r.text)
fasta_file.close()
return
if __name__ == "__main__":
main()