-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_me.py
70 lines (50 loc) · 2.57 KB
/
run_me.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os, sys, time
from Bio.Blast import NCBIXML
from Bio.Blast import NCBIWWW
from Bio import SearchIO, SeqIO
current_dir = os.getcwd()
util_path = current_dir + "/util/"
sys.path.insert(0, util_path)
from util import *
def analyze_BLAST_result(input_fasta_name_wo_path, result_handle):
show_header("Step 2. Analyzing the BLAST result.")
output_file_name = "retrieved_from_" + str(input_fasta_name_wo_path)[:-6] + ".xml"
if not os.path.exists('sample/output'):
os.makedirs('sample/output')
current_dir = os.getcwd()
output_folder = os.path.join(current_dir, "sample/output")
os.chdir(output_folder)
output_file = open(output_file_name, "w") # since it is 'w', an existing file will be overwritten. (if this is "a", new info will be appended to an existing file)
output_file.write(result_handle.read())
output_file.close()
blast_qresult = SearchIO.read(output_file_name, "blast-xml") # query_result
filter_for_no_predicted_hypothetical = lambda hit: ("PREDICTED" in hit.description == False)
filtered_qresult = blast_qresult.hit_filter(filter_for_no_predicted_hypothetical)
for hit in filtered_qresult:
print("%s" %(hit.description))
######## end of def analyze_BLAST_result(blast_records)
def run_BLAST_by_NCBI(input_fasta_name):
show_header("Step 1. Searching NCBI website (blastp) with " + str(fasta_file_name))
print "\tPlease wait."
print "\tAs references,"
print "\t\t 41 amino acids take ~1 minute."
print "\t\t451 amino acids take ~2 minutes."
record = SeqIO.read(input_fasta_name, format="fasta")
time_start_of_searching = time.time()
result_handle = NCBIWWW.qblast("blastp", "nr", record.format("fasta"))
# "nr" means "Non-redundant protein sequences"
time_end_of_searching = time.time()
print show_time("\tBLAST searching", time_start_of_searching, time_end_of_searching)
input_fasta_name_wo_path = os.path.basename(input_fasta_name)
return input_fasta_name_wo_path, result_handle
######## end of def run_BLAST_by_NCBI(fasta_file_name)
if (__name__ == "__main__") :
args=sys.argv[1:]
if len(args) < 1:
print "Input format: python run_me.py <a fasta file name that NCBI BLAST will be ran against> \n"
print "Example usage: python run_me.py DGAT_target.fasta \n"
exit(1)
fasta_file_name = args[0]
input_fasta_name_wo_path, result_handle = run_BLAST_by_NCBI(fasta_file_name)
#print "\tSee retrieved results at the output folder.\n"
analyze_BLAST_result(input_fasta_name_wo_path, result_handle)