@@ -35,48 +35,34 @@ if (! params.input) exit 1, "Missing --input option for input data, check comma
35
35
// if ( !file(params.input.toString()).exists() ) exit 1, "input does not exist, check params: --input ${params.input}"
36
36
37
37
// Parse genome params
38
- genome_map = params. genome_map
38
+ gbl_genome_map = params. genome_map
39
39
40
- if (genome_map[params. genome]) { genome_path = genome_map[params. genome] }
41
- else { genome_path = params. genome }
40
+ gbl_genome_path = gbl_genome_map[params. genome] ? gbl_genome_map[params. genome] : params. genome
42
41
43
42
// infer dataType, chrSet based on reference genome name, hg - human, ecoli - ecoli, otherwise is other reference genome
44
43
humanChrSet = ' chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr21,chr22,chrX,chrY'
45
- if (params. genome. contains(' hg' ) || (params. dataType && params. dataType == ' human' )) {
46
- dataType = " human"
47
- if (! params. chrSet) {
48
- // default for human, if false or 'false' (string), using ' '
49
- chrSet = humanChrSet
50
- } else {
51
- chrSet = params. chrSet
52
- }
53
- } else if (params. dataType && params. dataType == ' mouse' ) {
54
- dataType = " mouse"
55
- if (! params. chrSet) {
56
- // default for human, if false or 'false' (string), using ' '
57
- chrSet = humanChrSet
58
- } else {
59
- chrSet = params. chrSet
60
- }
61
- } else if (params. genome. contains(' ecoli' ) || (params. dataType && params. dataType == ' ecoli' )) {
62
- dataType = " ecoli"
63
- if (! params. chrSet) {
64
- // default for ecoli
65
- chrSet = ' NC_000913.3'
66
- } else {
67
- chrSet = params. chrSet
68
- }
44
+
45
+ genome_basefn = (new File (params. genome)). name
46
+ if (genome_basefn. startsWith(' hg' ) || (params. dataType && params. dataType == ' human' )) {
47
+ dataType = params. dataType ? params. dataType : " human"
48
+ // default for human chr
49
+ chrSet = params. chrSet ? params. chrSet : humanChrSet
50
+ } else if (genome_basefn. startsWith(' mm' ) || (params. dataType && params. dataType == ' mouse' ) ){
51
+ dataType = params. dataType ? params. dataType : " mouse"
52
+ // default for mouse chr
53
+ chrSet = params. chrSet ? params. chrSet : humanChrSet
54
+ } else if (genome_basefn. startsWith(' ecoli' ) || (params. dataType && params. dataType == ' ecoli' )) {
55
+ dataType = params. dataType ? params. dataType : " ecoli"
56
+ chrSet = params. chrSet ? params. chrSet : ' NC_000913.3'
69
57
} else {
70
- // default will not found name, use other
71
- if (! params. dataType) { dataType = ' other' } else { dataType = params. dataType }
72
- if (! params. chrSet) {
73
- // No default value for other reference genome
74
- exit 1 , " Missing --chrSet option for other reference genome, please specify chromosomes used in reference genome [${ params.genome} ]"
75
- }
76
- chrSet = params. chrSet
58
+ // if not infer data type, use other
59
+ dataType = params. dataType ? params. dataType : " other"
60
+
61
+ if (params. chrSet) chrSet = params. chrSet
62
+ else exit 1 , " Missing --chrSet option for other reference genome, please specify chromosomes used in reference genome [${ params.genome} ]"
77
63
}
78
64
79
- // chrSet1 and dataType1 is the infered params, defined from chrSet and dataType (not in scope of params)
65
+ // chrSet1 and dataType1 is the infered params, defined from chrSet and dataType (not in scope of params), will be used in every modules
80
66
params. chrSet1 = chrSet
81
67
params. dataType1 = dataType
82
68
@@ -85,7 +71,7 @@ projectDir = workflow.projectDir
85
71
ch_utils = Channel . fromPath(" ${ projectDir} /utils" , type : ' dir' , followLinks : false )
86
72
ch_src = Channel . fromPath(" ${ projectDir} /src" , type : ' dir' , followLinks : false )
87
73
88
- // Reference genome, chom size file
74
+ // Reference genome, chom size file name, will be used in every modules
89
75
params. referenceGenome = " ${ params.GENOME_DIR} /${ params.GENOME_FN} "
90
76
params. chromSizesFile = " ${ params.GENOME_DIR} /${ params.CHROM_SIZE_FN} "
91
77
@@ -106,23 +92,23 @@ if (params.input.endsWith(".filelist.txt")) {
106
92
return file(it[0 ])
107
93
}
108
94
}
109
- .set{ inputCh }
95
+ .set{ ch_inputs }
110
96
} else if (params. input. contains(' *' ) || params. input. contains(' ?' )) {
111
97
// match all files in the folder, note: input must use quote string '', prevent expand in advance
112
98
// such as --input '/fastscratch/liuya/nanome/NA12878/NA12878_CHR22/input_chr22/*'
113
99
Channel . fromPath(params. input, type : ' any' , checkIfExists : true )
114
- .set{ inputCh }
100
+ .set{ ch_inputs }
115
101
} else {
116
102
// For single file/wildcard matched files
117
- Channel . fromPath( params. input, checkIfExists : true ). set{ inputCh }
103
+ Channel . fromPath( params. input, checkIfExists : true ). set{ ch_inputs }
118
104
}
119
105
120
106
// Header log info
121
107
def summary = [:]
122
108
summary[' dsname' ] = params. dsname
123
109
summary[' input' ] = params. input
124
110
125
- if (genome_map [params. genome] != null ) { summary[' genome' ] = " ${ params.genome} - [${ genome_path } ]" }
111
+ if (gbl_genome_map [params. genome]) { summary[' genome' ] = " ${ params.genome} - [${ gbl_genome_path } ]" }
126
112
else { summary[' genome' ] = params. genome }
127
113
128
114
summary[' \n Running settings' ] = " --------"
@@ -196,6 +182,11 @@ if (params.runMethcall && params.runDeepMod) {
196
182
summary[' DEEPMOD_RNN_MODEL' ] = " ${ params.DEEPMOD_RNN_MODEL} "
197
183
}
198
184
}
185
+ if (params. runNANOME) {
186
+ summary[' NANOME_MODEL' ] = " ${ params.NANOME_MODEL} "
187
+ summary[' CS_MODEL_FILE' ] = " ${ params.CS_MODEL_FILE} "
188
+ summary[' CS_MODEL_SPEC' ] = " ${ params.CS_MODEL_SPEC} "
189
+ }
199
190
200
191
summary[' \n Pipeline settings' ] = " --------"
201
192
summary[' Working dir' ] = workflow. workDir
@@ -284,46 +275,45 @@ include { DEEPSIGNAL; DPSIGCOMB } from './modules/DEEPSIGNAL'
284
275
285
276
include { DEEPSIGNAL2 ; DEEPSIGNAL2COMB } from ' ./modules/DEEPSIGNAL2'
286
277
287
- include { REPORT } from ' ./modules/REPORT'
288
-
289
278
include { Guppy ; GuppyComb ; Tombo ; TomboComb ; DeepMod ; DpmodComb ; METEORE } from ' ./modules/OLDTOOLS'
290
279
291
280
include { NewTool ; NewToolComb } from ' ./modules/NEWTOOLS'
292
281
293
282
include { CLAIR3 ; PHASING } from ' ./modules/PHASING'
294
283
284
+ include { CONSENSUS } from ' ./modules/CONSENSUS'
285
+
286
+ include { EVAL } from ' ./modules/EVAL'
287
+
288
+ include { REPORT } from ' ./modules/REPORT'
289
+
290
+ // place holder channel, used for empty file of a channel
291
+ null1 = Channel . fromPath(" ${ projectDir} /utils/null1" )
292
+ null2 = Channel . fromPath(" ${ projectDir} /utils/null2" )
293
+ null3 = Channel . fromPath(" ${ projectDir} /utils/null3" )
295
294
296
295
workflow {
297
- if ( ! file(genome_path . toString()). exists() )
296
+ if ( ! file(gbl_genome_path . toString()). exists() )
298
297
exit 1 , " genome reference path does not exist, check params: --genome ${ params.genome} "
299
298
300
- genome_ch = Channel . fromPath(genome_path , type : ' any' , checkIfExists : true )
299
+ ch_genome = Channel . fromPath(gbl_genome_path , type : ' any' , checkIfExists : true )
301
300
302
- if (! params. rerioDir) { // default if null, will online downloading
303
- // This is only a place holder for input
304
- rerioDir = Channel . fromPath(" ${ projectDir} /utils/null1" , type : ' any' , checkIfExists : false )
305
- } else {
306
- // User provide the dir
307
- if ( ! file(params. rerioDir. toString()). exists() )
308
- exit 1 , " rerioDir does not exist, check params: --rerioDir ${ params.rerioDir} "
309
- rerioDir = Channel . fromPath(params. rerioDir, type : ' any' , checkIfExists : true )
310
- }
301
+ // rerio model dir will be download in ENVCHECK if needed
302
+ ch_rerio_dir = (params. rerio && params. rerioDir) ? Channel . fromPath(params. rerioDir, type : ' any' , checkIfExists : true ) :
303
+ null1
311
304
312
- if (! params. runDeepSignal) {
313
- // use null placeholder
314
- deepsignalDir = Channel . fromPath(" ${ projectDir} /utils/null2" , type : ' any' , checkIfExists : true )
315
- } else if (! params. deepsignalDir) {
316
- // default if null, will online staging
317
- deepsignalDir = Channel . fromPath(params. DEEPSIGNAL_MODEL_ONLINE , type : ' any' , checkIfExists : true )
305
+ // deepsignal model dir will be downloaded in ENVCHECK if needed
306
+ if (params. runDeepSignal) {
307
+ ch_deepsignal_dir = params. deepsignalDir ?
308
+ Channel . fromPath(params. deepsignalDir, type : ' any' , checkIfExists : true ) :
309
+ Channel . fromPath(params. DEEPSIGNAL_MODEL_ONLINE , type : ' any' , checkIfExists : true )
318
310
} else {
319
- // User provide the dir
320
- if ( ! file(params. deepsignalDir. toString()). exists() )
321
- exit 1 , " deepsignalDir does not exist, check params: --deepsignalDir ${ params.deepsignalDir} "
322
- deepsignalDir = Channel . fromPath(params. deepsignalDir, type : ' any' , checkIfExists : true )
311
+ // use null placeholder
312
+ ch_deepsignal_dir = null2
323
313
}
324
314
325
- ENVCHECK (genome_ch , ch_utils, rerioDir, deepsignalDir )
326
- UNTAR (inputCh )
315
+ ENVCHECK (ch_genome , ch_utils, ch_rerio_dir, ch_deepsignal_dir )
316
+ UNTAR (ch_inputs )
327
317
328
318
if (params. runBasecall) {
329
319
BASECALL (UNTAR . out. untar)
@@ -334,9 +324,12 @@ workflow {
334
324
}
335
325
336
326
// Resquiggle running if use Tombo or DeepSignal
337
- if (((params. runDeepSignal || params. runTombo || params. runDeepSignal2) && params. runMethcall) || params. runResquiggle) {
338
- // BASECALL.out.basecall.subscribe({ println("BASECALL.out.basecall: $it") })
339
- RESQUIGGLE (BASECALL . out. basecall, ENVCHECK . out. reference_genome)
327
+ if (((params. runDeepSignal || params. runTombo || params. runDeepSignal2) && params. runMethcall)
328
+ || params. runResquiggle) {
329
+ resquiggle = RESQUIGGLE (BASECALL . out. basecall, ENVCHECK . out. reference_genome)
330
+ f1 = params. feature_extract ? resquiggle. feature_extract : Channel . empty()
331
+ } else {
332
+ f1 = Channel . empty()
340
333
}
341
334
342
335
if (params. runNanopolish && params. runMethcall) {
@@ -373,12 +366,19 @@ workflow {
373
366
}
374
367
375
368
if (params. runDeepSignal2 && params. runMethcall) {
376
- DEEPSIGNAL2 (RESQUIGGLE . out. resquiggle. collect(),
369
+ deepsignal2 = DEEPSIGNAL2 (RESQUIGGLE . out. resquiggle. collect(),
377
370
ENVCHECK . out. reference_genome,
378
371
ch_src, ch_utils)
379
- DEEPSIGNAL2COMB (DEEPSIGNAL2 . out. deepsignal2_combine_out,
372
+ comb_deepsignal2 = DEEPSIGNAL2COMB (DEEPSIGNAL2 . out. deepsignal2_combine_out,
380
373
ch_src, ch_utils
381
374
)
375
+ f2 = deepsignal2. deepsignal2_feature_out
376
+ s3_1 = comb_deepsignal2. site_unify
377
+ r3_1 = comb_deepsignal2. read_unify
378
+ } else {
379
+ f2 = Channel . empty()
380
+ s3_1 = Channel . empty()
381
+ r3_1 = Channel . empty()
382
382
}
383
383
384
384
if (params. runGuppy && params. runMethcall) {
@@ -453,23 +453,58 @@ workflow {
453
453
r_new = Channel . empty()
454
454
}
455
455
456
- // Site level combine a list
457
- Channel . fromPath(" ${ projectDir} /utils/null1" ). concat(
458
- s1, s2, s3, s4, s5, s6, s7, s_new
459
- ). toList(). set { tools_site_unify }
456
+ null2. concat(
457
+ r1, r2, r3, f1, f2
458
+ ). toList(). set { top3_tools_read_unify }
460
459
461
- Channel . fromPath(" ${ projectDir} /utils/null2" ). concat(
462
- r1, r2, r3
460
+ if (params. runNANOME) {
461
+ consensus = CONSENSUS (top3_tools_read_unify, ch_src, ch_utils)
462
+ s8 = consensus. site_unify
463
+ r8 = consensus. read_unify
464
+ } else {
465
+ s8 = Channel . empty()
466
+ r8 = Channel . empty()
467
+ }
468
+
469
+ null2. concat(
470
+ r1, r2, r3, r8, f1, f2
463
471
). toList(). set { tools_read_unify }
464
472
465
- REPORT (tools_site_unify, tools_read_unify,
473
+ // perform evaluation of tools' methylation results
474
+ if (params. runEval) {
475
+ bg1 = params. bg1 ? Channel . fromPath(params. bg1) : Channel . empty()
476
+ bg2 = params. bg2 ? Channel . fromPath(params. bg2) : Channel . empty()
477
+
478
+ null1. concat(
479
+ bg1, bg2
480
+ ). toList(). set { bg_list }
481
+
482
+ if (params. genome_annotation_dir) {
483
+ genome_annotation_ch = Channel . fromPath(params. genome_annotation_dir)
484
+ } else {
485
+ genome_annotation_ch = null3
486
+ }
487
+
488
+ EVAL (tools_read_unify, bg_list, ch_src, ch_utils, genome_annotation_ch)
489
+ }
490
+
491
+ // Site level combine a list
492
+ null1. concat(
493
+ s1, s2, s3, s4, s5, s6, s7, s_new, s8
494
+ ). toList(). set { tools_site_unify }
495
+
496
+ REPORT (tools_site_unify, top3_tools_read_unify,
466
497
ENVCHECK . out. tools_version_tsv, QCEXPORT . out. qc_report,
467
498
ENVCHECK . out. reference_genome, ch_src, ch_utils)
468
499
469
500
if (params. phasing) {
470
501
CLAIR3 (QCEXPORT . out. bam_data, ENVCHECK . out. reference_genome)
471
- Channel . fromPath(" ${ projectDir} /utils/null1" ). concat(
472
- MGLDNCOMB . out. megalodon_combine, REPORT . out. nanome_combine_out
502
+ null1. concat(
503
+ MGLDNCOMB . out. megalodon_combine,
504
+ MGLDNCOMB . out. read_unify,
505
+ CONSENSUS . out. nanome_combine_out,
506
+ CONSENSUS . out. read_unify,
507
+ NPLSHCOMB . out. nanopolish_combine_out_ch
473
508
). toList(). set { mega_and_nanome_ch }
474
509
PHASING (mega_and_nanome_ch, CLAIR3 . out. clair3_out_ch,
475
510
ch_src, QCEXPORT . out. bam_data, ENVCHECK . out. reference_genome)
0 commit comments