forked from karel-brinda/MiniPhy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
stats.smk
55 lines (45 loc) Β· 1.4 KB
/
stats.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""
2 types of statistics to compute:
- batch statistics: 1 record per batch; 1 file
- genome statistics: 1 record per genome, 1 file per 1 batch
"""
rule stats_batches:
"""
Create global statistics file by merging individual stats from all batches
"""
output:
fn_stats_batches(),
input:
[fn_stats_batches_1batch(_batch=batch) for batch in get_batches()],
params:
s="workflow/scripts/merge_global_stats.py",
conda:
"../envs/basic_env.yaml"
shell:
"""
{params.s} {input} > {output}
"""
def get_stats_files(protocol):
stats_files = []
if config[f"protocol_{protocol}"]:
if config["kmer_statistics"]:
stats_files.append(fn_hist_summary(_batch="{batch}", _protocol=protocol))
if config["sequence_statistics"]:
stats_files.append(fn_nscl_summary(_batch="{batch}", _protocol=protocol))
stats_files.append(fn_compr_summary(_batch="{batch}", _protocol=protocol))
return stats_files
rule stats_batches_1batch:
"""
For a given batch, merge stats from individual protocols
"""
output:
fn_stats_batches_1batch(_batch="{batch}"),
input:
[get_stats_files(protocol=x) for x in ("asm", "pre", "post")],
shell:
"""
(
printf 'batch\t%s\n' {wildcards.batch}
cat {input}
)> {output}
"""