This tutorial shows how convert the basic rnaseq-nf pipeline to Nextflow DSL-2.
Clone this repository in your computer, then change in the repo directory
https://github.com/nextflow-io/nfcamp-tutorial.git \
&& cd nfcamp-tutorial
Pull the required container:
docker pull nextflow/rnaseq-nf:latest
Change the standard profile in the nextflow.config
as follow:
standard {
process.container = 'nextflow/rnaseq-nf:latest'
docker.enabled = true
resume = true
}
Run the following script checking the differences each others.
nextflow run main1.nf
nextflow run main2.nf -params-file reads.yml
nextflow run main3.nf
nextflow run main4.nf
save the process to the file rnaseq-processes.nf
nextflow run main6.nf
include index from './rnaseq-processes' params(params)
include quant from './rnaseq-processes' params(params)
include fastqc from './rnaseq-processes' params(params)
include multiqc from './rnaseq-processes' params(params)
workflow rnaseq_analysis {
get:
transcriptome
read_pairs_ch
main:
index( transcriptome )
quant( index.out, read_pairs_ch )
fastqc( read_pairs_ch )
multiqc(
quant.out.mix(fastqc.out).collect(),
params.multiqc )
}
workflow rnaseqForTranscrip1 {
rnaseq_analysis (
params.transcriptome,
Channel .fromFilePairs( params.reads, checkExists: true ) )
}
workflow rnaseqForTranscrip2 {
rnaseq_analysis (
params.transcriptome,
Channel .fromFilePairs( params.reads, checkExists: true ) )
}
nextflow run main9.nf -entry rnaseqForTranscrip1
nextflow run main9.nf -entry rnaseqForTranscrip2
params.transcript1 = "$baseDir/data/ggal/transcriptome_1.fa"
params.transcript2 = "$baseDir/data/ggal/transcriptome_2.fa"
invoke both
workflow {
rnaseqForTranscrip1()
rnaseqForTranscrip2()
}
workflow {
reads = Channel .fromFilePairs( 'data/ggal/ggal_*_{1,2}.fq' )
transcripts = Channel.fromPath('data/ggal/transcriptome_*.fa')
transcripts
.combine( reads )
.fork { tuple ->
trascript: tuple[0]
reads: [ tuple[1], tuple[2] ]
}
.set { fork_out }
rnaseq_analysis(fork_out)
}
workflow {
Channel .fromFilePairs( 'data/ggal/ggal_*_{1,2}.fq' ).set {reads}
Channel.fromPath('data/ggal/transcriptome_*.fa') \
| combine( reads ) \
| fork { tuple ->
trascript: tuple[0]
reads: [ tuple[1], tuple[2] ]
} \
| rnaseq_analysis
}
workflow {
separateTranscriptFromReads = forkCriteria({ tuple ->
trascript: tuple[0]
reads: [ tuple[1], tuple[2] ]
})
Channel.fromFilePairs( 'data/ggal/ggal_*_{1,2}.fq' ).set {reads}
Channel.fromPath('data/ggal/transcriptome_*.fa') \
| combine( reads ) \
| fork(separateTranscriptFromReads) \
| rnaseq_analysis
}
def getInputForRnaseq( transcriptsPath, readsPath ) {
def separateTranscriptFromReads = forkCriteria({ tuple ->
trascript: tuple[0]
reads: [ tuple[1], tuple[2] ]
})
def reads = Channel.fromFilePairs(readsPath)
Channel.fromPath(transcriptsPath) \
| combine( reads ) \
| fork(separateTranscriptFromReads)
}
workflow {
getInputForRnaseq(params.transcripts, params.reads) | rnaseq_analysis
}
-
Remove publishDir from processes
-
Add emit/out to rnaseq_analysis
-
Add publish to the main workflow
workflow { main: getInputForRnaseq(params.transcripts, params.reads) | rnaseq_analysis publish: rnaseq_analysis.out.fastqc to: 'results/fastqc_files' rnaseq_analysis.out.quant to: 'results/quant_files' rnaseq_analysis.out.multiqc to: 'results/multiqc_report' }