-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathDEEPSIGNAL2.nf
167 lines (138 loc) · 5.62 KB
/
DEEPSIGNAL2.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
/*
=========================================================================================
NANOME(Nanopore methylation) pipeline for Oxford Nanopore sequencing
=========================================================================================
NANOME Analysis Pipeline.
#### Homepage / Documentation
https://github.com/LabShengLi/nanome
@Author : Yang Liu
@FileName : DEEPSIGNAL2.nf
@Software : NANOME project
@Organization : JAX Sheng Li Lab
----------------------------------------------------------------------------------------
*/
process DEEPSIGNAL2 {
tag "${resquiggle.baseName}"
publishDir "${params.outdir}/${params.dsname}_intermediate/deepsignal2",
mode: "copy",
enabled: params.outputIntermediate
publishDir "${params.outdir}/${params.dsname}-run-log",
mode: "copy", pattern: "*.DeepSignal2.run.log"
input:
path resquiggle
each path (reference_genome)
each path (ch_src)
each path (ch_utils)
each path (deepsignal2_model_file) // online model file
output:
path "batch_${resquiggle.baseName}_deepsignal2_per_read.tsv.gz", emit: deepsignal2_batch_per_read, optional: true
path "batch_${resquiggle.baseName}_deepsignal2_feature.tsv.gz", emit: deepsignal2_batch_feature, optional: true
path "*.DeepSignal2.run.log", optional:true, emit: runlog
when:
params.runBasecall && params.runMethcall && params.runDeepSignal
script:
cores = task.cpus * params.highProcTimes
shell:
'''
set +xu
. /opt/conda/etc/profile.d/conda.sh
conda activate /opt/conda/envs/deepsignal2
set -x
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
export HDF5_PLUGIN_PATH="$CONDA_PREFIX/hdf5/lib/plugin"
which deepsignal2
## wget !{params.DEEPSIGNAL2_MODEL_FILE}
tar -xzf !{deepsignal2_model_file}
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
if [[ "${CUDA_VISIBLE_DEVICES:-}" == "" ]] ; then
echo "Detect no GPU, using CPU commandType"
commandType='cpu'
gpuOptions=" "
else
echo "Detect GPU, using GPU commandType"
commandType='gpu'
gpuOptions="--nproc_gpu 1"
fi
deepsignal2 extract \
-i !{resquiggle}/workspace/ \
-o batch_!{resquiggle.baseName}_deepsignal2_feature.tsv \
--corrected_group !{params.ResquiggleCorrectedGroup} \
--nproc !{cores} --motifs CG \
&>> !{resquiggle.baseName}.DeepSignal2.run.log
echo "### DeepSignal2 feature extract DONE"
deepsignal2 call_mods \
--model_path !{params.DEEPSIGNAL2_MODEL_NAME} \
--input_path batch_!{resquiggle.baseName}_deepsignal2_feature.tsv \
--result_file batch_!{resquiggle.baseName}_deepsignal2_per_read.tsv \
--nproc !{cores} ${gpuOptions} \
&>> !{resquiggle.baseName}.DeepSignal2.run.log
echo "### DeepSignal2 methylation DONE"
> batch_!{resquiggle.baseName}_deepsignal2_feature.tsv.gz
> batch_!{resquiggle.baseName}_deepsignal2_per_read.tsv.gz
cat batch_!{resquiggle.baseName}_deepsignal2_feature.tsv | gzip -f >> \
batch_!{resquiggle.baseName}_deepsignal2_feature.tsv.gz
cat batch_!{resquiggle.baseName}_deepsignal2_per_read.tsv| gzip -f >> \
batch_!{resquiggle.baseName}_deepsignal2_per_read.tsv.gz
## Clean
if [[ !{params.cleanStep} == "true" ]]; then
rm -rf batch_!{resquiggle.baseName}_deepsignal2_feature.tsv \
batch_!{resquiggle.baseName}_deepsignal2_per_read.tsv \
!{params.DEEPSIGNAL2_MODEL_NAME}
fi
echo "### DeepSignal2 batch DONE"
'''
}
process DEEPSIGNAL2COMB {
tag "${params.dsname}"
publishDir "${params.outdir}/${params.dsname}-methylation-callings/Raw_Results-${params.dsname}",
mode: "copy",
pattern: "${params.dsname}_deepsignal2_per_read_combine.*.gz",
enabled: params.outputRaw
publishDir "${params.outdir}/${params.dsname}-methylation-callings/Features-${params.dsname}",
mode: "copy",
pattern: "${params.dsname}_deepsignal2_feature_combine.*.gz"
publishDir "${params.outdir}/${params.dsname}-methylation-callings",
mode: "copy",
pattern: "Read_Level-${params.dsname}/${params.dsname}_*-perRead-score*.gz"
publishDir "${params.outdir}/${params.dsname}-methylation-callings",
mode: "copy",
pattern: "Site_Level-${params.dsname}/*-perSite-cov1*.gz"
input:
path deepsignal2_batch_per_read_collect
path deepsignal2_batch_feature_collect
path ch_src
path ch_utils
output:
path "${params.dsname}_deepsignal2_per_read_combine.tsv.gz", emit: deepsignal2_per_read_combine
path "${params.dsname}_deepsignal2_feature_combine.tsv.gz", emit: deepsignal2_feature_combine
path "Read_Level-${params.dsname}/${params.dsname}_*-perRead-score*.gz", emit: read_unify
path "Site_Level-${params.dsname}/*-perSite-cov*.gz", emit: site_unify
when:
params.runCombine
script:
cores = task.cpus * params.highProcTimes
shell:
'''
## combine batches
> !{params.dsname}_deepsignal2_feature_combine.tsv.gz
> !{params.dsname}_deepsignal2_per_read_combine.tsv.gz
cat batch_*deepsignal2_feature.tsv.gz > !{params.dsname}_deepsignal2_feature_combine.tsv.gz
cat batch_*deepsignal2_per_read.tsv.gz > !{params.dsname}_deepsignal2_per_read_combine.tsv.gz
if [[ !{params.deduplicate} == true ]] ; then
echo "### Deduplicate for read-level outputs"
## sort order: Chr, Start, (End), ID, Strand
zcat !{params.dsname}_deepsignal2_per_read_combine.tsv.gz |\
sort -V -u -k1,1 -k2,2n -k5,5 -k3,3 |\
gzip -f > !{params.dsname}_deepsignal2_per_read_combine.sort.tsv.gz
rm !{params.dsname}_deepsignal2_per_read_combine.tsv.gz &&\
mv !{params.dsname}_deepsignal2_per_read_combine.sort.tsv.gz \
!{params.dsname}_deepsignal2_per_read_combine.tsv.gz
fi
## Unify format output
bash utils/unify_format_for_calls.sh \
!{params.dsname} DeepSignal2 DeepSignal\
!{params.dsname}_deepsignal2_per_read_combine.tsv.gz \
. !{cores} 12 !{params.sort ? true : false} \
"!{params.chrSet1.replaceAll(',', ' ')}"
'''
}