forked from DRL/blobtools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot.py
188 lines (165 loc) · 7.74 KB
/
plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""usage: blobtools plot -i BLOBDB [-p INT] [-l INT] [-c] [-n] [-s]
[-r RANK] [-x TAXRULE] [--label GROUPS...]
[-o PREFIX] [-m] [--sort ORDER] [--hist HIST] [--title]
[--colours FILE] [--include FILE] [--exclude FILE]
[--format FORMAT] [--noblobs] [--noreads]
[--refcov FILE] [--catcolour FILE]
[-h|--help]
Options:
-h --help show this
-i, --infile BLOBDB BlobDB file (created with "blobtools create")
-p, --plotgroups INT Number of (taxonomic) groups to plot, remaining
groups are placed in 'other' [default: 7]
-l, --length INT Minimum sequence length considered for plotting [default: 100]
-c, --cindex Colour blobs by 'c index' [default: False]
-n, --nohit Hide sequences without taxonomic annotation [default: False]
-s, --noscale Do not scale sequences by length [default: False]
-o, --out PREFIX Output prefix
-m, --multiplot Multi-plot. Print plot after addition of each (taxonomic) group
[default: False]
--sort <ORDER> Sort order for plotting [default: span]
span : plot with decreasing span
count : plot with decreasing count
--hist <HIST> Data for histograms [default: span]
span : span-weighted histograms
count : count histograms
--title Add title of BlobDB to plot [default: False]
-r, --rank RANK Taxonomic rank used for colouring of blobs [default: phylum]
(Supported: species, genus, family, order, phylum, superkingdom)
-x, --taxrule TAXRULE Taxrule which has been used for computing taxonomy
(Supported: bestsum, bestsumorder) [default: bestsum]
--label GROUPS... Relabel (taxonomic) groups (not 'all' or 'other'),
e.g. "Bacteria=Actinobacteria,Proteobacteria"
--colours COLOURFILE File containing colours for (taxonomic) groups
--exclude GROUPS.. Place these (taxonomic) groups in 'other',
e.g. "Actinobacteria,Proteobacteria"
--format FORMAT Figure format for plot (png, pdf, eps, jpeg,
ps, svg, svgz, tiff) [default: png]
--noblobs Omit blobplot [default: False]
--noreads Omit plot of reads mapping [default: False]
--refcov FILE File containing number of "total" and "mapped" reads
per coverage file. (e.g.: bam0,900,100). If provided, info
will be used in read coverage plot(s).
--catcolour FILE Colour plot based on categories from FILE
(format : "seq\tcategory").
"""
from __future__ import division
from docopt import docopt
import lib.BtCore as bt
import lib.BtLog as BtLog
import lib.BtIO as BtIO
import lib.BtPlot as BtPlot
from os.path import dirname, isfile
if __name__ == '__main__':
TAXRULES = ['bestsum', 'bestsumorder']
RANKS = ['species', 'genus', 'family', 'order', 'phylum', 'superkingdom']
main_dir = dirname(__file__)
#print data_dir
args = docopt(__doc__)
blobdb_f = args['--infile']
rank = args['--rank']
c_index = args['--cindex']
min_length = int(args['--length'])
multiplot = args['--multiplot']
hide_nohits = args['--nohit']
out_prefix = args['--out']
max_group_plot = int(args['--plotgroups'])
sort_order = args['--sort']
taxrule = args['--taxrule']
hist_type = args['--hist']
plot_title = args['--title']
ignore_contig_length = args['--noscale']
labels = args['--label']
colour_f = args['--colours']
exclude_groups = args['--exclude']
format = args['--format']
no_plot_blobs = args['--noblobs']
no_plot_reads = args['--noreads']
refcov_f = args['--refcov']
catcolour_f = args['--catcolour']
# Does blobdb_f exist ?
if not isfile(blobdb_f):
BtLog.error('0', blobdb_f)
# Are ranks sane ?
if rank not in RANKS:
BtLog.error('9', rank)
# Are sort_order and hist_type sane?
if not sort_order in ['span', 'count']:
BtLog.error('14', sort_order)
if not hist_type in ['span', 'count']:
BtLog.error('15', hist_type)
# is taxrule provided?
if taxrule not in TAXRULES:
BtLog.error('8', taxrule)
# compute labels if supplied
user_labels = BtPlot.parse_labels(labels)
if (exclude_groups):
if "," in exclude_groups:
exclude_groups = exclude_groups.rsplit(",")
else:
exclude_groups = exclude_groups
refcov_dict = {}
if (refcov_f):
refcov_dict = BtPlot.parseRefCov(refcov_f)
catcolour_dict = {}
if (catcolour_f) and (c_index):
BtLog.error('24')
elif (catcolour_f):
catcolour_dict = BtPlot.parseCatColour(catcolour_f)
else:
pass
# Load BlobDb
print BtLog.status_d['9'] % blobdb_f
blobDB = bt.BlobDb('new')
blobDB.load(blobdb_f)
title = blobDB.title
if plot_title:
plot_title = title
# Is taxrule sane and was it computed?
if taxrule not in blobDB.taxrules:
BtLog.error('11', taxrule, blobDB.taxrules)
data_dict, max_cov, cov_libs, cov_libs_total_reads = blobDB.getPlotData(rank, min_length, hide_nohits, taxrule, c_index, catcolour_dict)
plotObj = BtPlot.PlotObj(data_dict, cov_libs, cov_libs_total_reads)
plotObj.exclude_groups = exclude_groups
plotObj.format = format
plotObj.max_cov = max_cov
plotObj.title = title
plotObj.multiplot = multiplot
plotObj.hist_type = hist_type
plotObj.ignore_contig_length = ignore_contig_length
plotObj.max_group_plot = max_group_plot
plotObj.group_order = BtPlot.getSortedGroups(data_dict, sort_order)
plotObj.labels.update(plotObj.group_order)
if (user_labels):
for group, label in user_labels.items():
plotObj.labels.add(label)
plotObj.group_labels = {group : set() for group in plotObj.group_order}
plotObj.relabel_and_colour(colour_f, user_labels)
plotObj.compute_stats()
info_flag = 1
for cov_lib in plotObj.cov_libs:
if (plotObj.title):
plotObj.title = "%s.%s.%s" % (title, taxrule, cov_lib)
out_f = "%s.%s.%s.p%s.%s" % (title, hist_type, rank, max_group_plot, cov_lib)
if out_prefix:
out_f = "%s.%s" % (out_prefix, out_f)
if catcolour_dict:
out_f = "%s.%s" % (out_f, "catcolour")
if ignore_contig_length:
out_f = "%s.%s" % (out_f, "noscale")
if c_index:
out_f = "%s.%s" % (out_f, "c_index")
if exclude_groups:
out_f = "%s.%s" % (out_f, "exclude" + "_".join(exclude_groups))
if labels:
out_f = "%s.%s" % (out_f, "label_" + "_".join(set([name for name in user_labels.values()])))
out_f = "%s.%s.%s" % (out_f, min_length, taxrule)
plotObj.out_f = out_f
if not (no_plot_blobs):
plotObj.plotBlobs(cov_lib, info_flag)
info_flag = 0
plotObj.write_stats()
if not (no_plot_reads):
plotObj.plotReadCov(refcov_dict)