forked from HudsonAlpha/aRNAPipe
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspider.py
232 lines (218 loc) · 12.4 KB
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import os
import sys
import optparse
import lib.vcrparser as vcrparser
import lib.spider_stats as spider_stats
import lib.html_lib as html
#########################################################################
# PARSER
#########################################################################
desc = "aRNApipe: SPIDER module"
parser = optparse.OptionParser(description = desc)
parser.add_option("-p", "--path", dest = "path", default = "", help = "Required: Path to the project folder")
(opt, args) = parser.parse_args()
#########################################################################
# INITIAL ARRANGEMENTS
#########################################################################
pathscript = os.path.dirname(sys.argv[0]) + "/R/" # PATH TO R SCRIPTS
project, path = html.check_project(opt.path) # CHECKS IF PROJECT FOLDER, SAMPLES FILE AND CONFIGURATION FILE EXIST
html.skeleton(path, os.path.dirname(sys.argv[0])) # CREATES THE SKELETON FOR THE HTML AND OUTPUT RESULTS
config = html.check_config(path) # PARSES THE CONFIGURATION FILE
try:
samples = vcrparser.get_samples(path.replace(project, ""), project, path + "/samples.list") # PARSES THE SAMPLE FILE AND ASSOCIATED FASTQ FILES
except:
samples = vcrparser.get_samples_nocheck(path.replace(project, ""), project, path + "/samples.list")
f = open(path + "/samples.list", 'r')
h = f.readline()
samples_ordered = [(i.split("\t")[0]) for i in f]
f.close()
lmenu = html.get_menu(config, len(samples))
#########################################################################
# PROCESSES AND ARRANGES LOGS AND OUTPUT DATA FROM STAR, KALLISTO AND HTSEQ
#########################################################################
try:
spider_stats.stats_varcall(path, "varscan")
except:
print "Unexpected error generating stats of VARSCAN."
try:
spider_stats.stats_varcall(path, "gatk")
except:
print "Unexpected error generating stats of GATK."
try:
spider_stats.stats_trimgalore(path)
except:
print "Unexpected error generating stats of TRIMGALORE."
try:
spider_stats.stats_fusion(path)
except:
print "Unexpected error generating stats of Star-Fusion."
try:
spider_stats.stats_log(path + "/logs/") # PARSES THE ASSOCIATED LSF LOG FILES AND WRTIES THE RELEVANT PARAMETERS IN OUTPUTS
except:
print "Unexpected error generating stats of HPC."
try:
spider_stats.stats_kallisto(path + "/results_kallisto/", samples_ordered) # GENERATES STATISTICS, ESTIMATED COUNTS, TPM AND ANNOTATION MATRICES
except:
print "Error: Statistics of KALLISTO not yet ready or unexpected error."
try:
spider_stats.stats_star(path + "/results_star/", samples_ordered) # GENERATES STATISTICS, COUNTS, RPKM AND ANNOTATION MATRICES
except:
print "Error: Statistics of STAR not yet ready or unexpected error."
try:
spider_stats.stats_htseq(path + "/results_htseq-gene/", samples_ordered, "gene") # GENERATES STATISTICS, COUNTS, RPKM AND ANNOTATION MATRICES
except:
print "Error: Statistics of HTSEQ-GENE not yet ready or unexpected error."
try:
spider_stats.stats_htseq(path + "/results_htseq-exon/", samples_ordered, "exon") # GENERATES STATISTICS, COUNTS, RPKM AND ANNOTATION MATRICES
except:
print "Error: Statistics of HTSEQ-EXON not yet ready or unexpected error."
#########################################################################
# HTML SUMMARY WEBPAGE
#########################################################################
try:
print "> Generating webpage with samples list and configuration..."
print " - " + path + "/HTML/summary.html"
html_table = html.print_samples(path,config) # PROVIDES HTML TABLE WITH SAMPLES STATS
html_table2 = html.config_file(path, "config.txt") # PROVIDES HTML TABLE WITH CONFIGURATION SETTINGS
html.build_from_template("SUMMARY", project, "", html_table, html_table2, path+"/HTML/summary.html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_SUMMARY.html", lmenu)
except:
print " - Not ready"
#########################################################################
# HTML DOWNLOADS
#########################################################################
try:
print "> Generating webpage with download links..."
print " - " + path + "/HTML/downloads.html"
html.build_from_template("DOWNLOADS", project, "", "", "", path+"/HTML/downloads.html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_DOWNLOAD.html", lmenu)
except:
print " - Not ready"
#########################################################################
# HPC STATS
#########################################################################
try:
print "> Generating webpage with HPC and LOG statistics..."
print " - " + path + "/HTML/hpc.html"
os.system("Rscript "+pathscript+"/log_stats.R " + path + "/outputs/log_stats.txt") # PLOT OF HPC USAGE
html_table = html.print_table_default(path + "/outputs/log_stats.txt", 2, []) # PROVIDES HTML TABLE WITH HPC STATS
fils = os.listdir(path + "/outputs/")
gg = ""
for i in fils:
if i.endswith(".png") and i.startswith("log_stats_"):
gg = gg + '<tr bgcolor="#A8A8A8"><td><center><b>Analysis run number '+i.split("_")[2].replace(".png","")+'</b></center></td></tr><tr bgcolor="#00CC66"><td><img src="../outputs/'+i+'" style="width:800px;"></td></tr>'
html_table2 = "<table>" + gg + "</table>"
html.build_from_template("HPC", project, "", html_table, html_table2, path+"/HTML/hpc.html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_HPC.html", lmenu)
except:
print " - Not ready"
#########################################################################
# TRIM_GALORE
#########################################################################
try:
if config.has_key("trimgalore"):
print "> Generating webpage with TrimGalore/Cutadapt statistics..."
print " - " + path + "/HTML/trim.html"
html_table = html.print_table_default(path + "/outputs/stats_trim.txt", -1, []) # PROVIDES HTML TABLE WITH HPC STATS
data = html.bar_getdata (path + "/outputs/stats_trim_plot.txt",0,[],[])
html.build_from_template("TrimGalore", project, data, html_table, "", path+"/HTML/trim.html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_TRIMG.html", lmenu)
except:
print " - Not ready"
#########################################################################
# FASTQ
#########################################################################
try:
if config.has_key("fastqc"):
print "> Generating webpage with fastqc statistics..."
print " - " + path + "/HTML/fastqc.html"
html_table = spider_stats.stats_fastq(path,samples,config) # PROVIDES HTML TABLE WITH FASTQ STATS
html.build_from_template("FASTQC", project, "", html_table, "", path+"/HTML/fastqc.html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_FASTQC.html", lmenu)
except:
print " - Not ready"
#########################################################################
# PICARD
#########################################################################
try:
if config.has_key("picard"):
print "> Generating webpage with picard statistics..."
print " - " + path + "/HTML/picard.html"
html_table = html.stats_picard(path,samples,config) # PROVIDES HTML TABLE WITH PICARD STATS
data = html.bar_getdata (path + "/outputs/stats_picard.txt",0,range(1,7), range(7,11))
html.build_from_template("PICARD", project, data, html_table, "", path+"/HTML/picard.html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_PICARD.html", lmenu)
except:
print " - Not ready"
#########################################################################
# HTML STAR-FUSION WEBPAGE
#########################################################################
try:
if config.has_key("star-fusion"):
print "> Generating webpage with Star-Fusion results..."
print " - " + path + "/HTML/star-fusion.html"
html_table = html.print_table_default(path + "/outputs/starfusion_aggregate.txt", -1, []) # PROVIDES HTML TABLE WITH HPC STATS
html.build_from_template("STAR-FUSION", project, "", html_table, "", path+"/HTML/star-fusion.html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_STARFUSION.html", lmenu)
except:
print " - Not ready"
#########################################################################
# STAR_QC
#########################################################################
try:
if config.has_key("star"):
print "> Generating webpage with STAR statistics..."
print " - " + path + "/HTML/star.html"
html_table = html.print_table_default(path + "/outputs/star_unstranded_stats.txt", -1, []) # PROVIDES HTML TABLE WITH HPC STATS
data = html.bar_getdata (path + "/outputs/star_unstranded_stats.txt",0,range(1,6),[])
html.build_from_template("STAR", project, data, html_table, "", path+"/HTML/star.html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_STAR.html", lmenu)
except:
print " - Not ready"
#########################################################################
# HTSEQ_QC
#########################################################################
try:
for ij in ["htseq-gene", "htseq-exon"]:
if config.has_key(ij):
print "> Generating webpage with "+ij+" statistics..."
print " - " + path + "/HTML/"+ij+".html"
html_table = html.print_table_default(path + "/outputs/"+ij+"_stats.txt", -1, []) # PROVIDES HTML TABLE WITH HPC STATS
data = html.bar_getdata (path + "/outputs/"+ij+"_stats.txt",0,range(1,7),[])
html.build_from_template(ij.upper(), project, data, html_table, "", path+"/HTML/" + ij + ".html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_HTSEQ.html", lmenu)
except:
print " - Not ready"
#########################################################################
# STAR & HTSEQ STATS ON COUNTS/RPKMS
#########################################################################
try:
if len(samples) > 1:
if config["programs"]["strandedness"] == "yes":
n = {"star":["STAR","star_stranded"],"htseq-gene":["HTseq-count Gene", "htseq-gene"],"htseq-exon":["HTseq-count Exon", "htseq-exon"]}
elif config["programs"]["strandedness"] == "no":
n = {"star":["STAR","star_unstranded"],"htseq-gene":["HTseq-count Gene", "htseq-gene"],"htseq-exon":["HTseq-count Exon", "htseq-exon"]}
elif config["programs"]["strandedness"] == "reverse":
n = {"star":["STAR","star_stranded-reverse"],"htseq-gene":["HTseq-count Gene", "htseq-gene"],"htseq-exon":["HTseq-count Exon", "htseq-exon"]}
for prog, pname in n.iteritems():
if config.has_key(prog):
os.system("Rscript "+pathscript+"/stats_algs.R " + path + "/outputs/ " + pname[1]) # PLOT OF HPC USAGE
html_table = html.print_table_default(path + "/outputs/" + pname[1] + "_pca.txt", -1, [0, 1, 2, 3, 4, 6, 7, 8, 12, 13, 14, 15, 17, 18, 19])
html.build_amcharts(os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_PCA.html", path + "/HTML/" + prog + "2.html", prog, pname, path, html_table, project, lmenu)
except:
print " - Not ready"
#########################################################################
# VARSCAN
#########################################################################
try:
if config.has_key("varscan"):
print "> Generating webpage with VARSCAN statistics..."
print " - " + path + "/HTML/varscan.html"
html_table = html.print_table_default(path + "/outputs/stats_varscan.txt", -1, []) # PROVIDES HTML TABLE WITH HPC STATS
data = html.bar_getdata (path + "/outputs/stats_varscan.txt",0,[],[])
html.build_from_template("VARSCAN", project, data, html_table, "", path+"/HTML/varscan.html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_GATK.html", lmenu)
except:
print " - Not ready"
#########################################################################
# GATK
#########################################################################
try:
if config.has_key("gatk"):
print "> Generating webpage with GATK statistics..."
print " - " + path + "/HTML/gatk.html"
html_table = html.print_table_default(path + "/outputs/stats_gatk.txt", -1, []) # PROVIDES HTML TABLE WITH HPC STATS
data = html.bar_getdata (path + "/outputs/stats_gatk.txt",0,[],[])
html.build_from_template("GATK", project, data, html_table, "", path+"/HTML/gatk.html", os.path.dirname(sys.argv[0]) + "/template/TEMPLATE_GATK.html", lmenu)
except:
print " - Not ready"