diff --git a/modules/nf-core/custom/tabulartogseachip/environment.yml b/modules/nf-core/custom/tabulartogseachip/environment.yml new file mode 100644 index 00000000000..cc49deffd16 --- /dev/null +++ b/modules/nf-core/custom/tabulartogseachip/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - "conda-forge::gawk=5.1.0" diff --git a/modules/nf-core/custom/tabulartogseachip/main.nf b/modules/nf-core/custom/tabulartogseachip/main.nf new file mode 100644 index 00000000000..3065b5f925f --- /dev/null +++ b/modules/nf-core/custom/tabulartogseachip/main.nf @@ -0,0 +1,44 @@ +process CUSTOM_TABULARTOGSEACHIP { + + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'biocontainers/gawk:5.1.0' }" + + input: + path tabular + tuple val(id), val(symbol) + + output: + path "*.chip" , emit: chip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def VERSION = '9.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + function find_column_number { + file=\$1 + column=\$2 + + head -n 1 \$file | tr '\\t' '\\n' | grep -n "^\${column}\$" | awk -F':' '{print \$1}' + } + + id_col=\$(find_column_number $tabular $id) + symbol_col=\$(find_column_number $tabular $symbol) + outfile=\$(echo $tabular | sed 's/\\(.*\\)\\..*/\\1/').chip + + echo -e "Probe Set ID\\tGene Symbol\\tGene Title" > \${outfile}.tmp + tail -n +2 $tabular | awk -F'\\t' -v id=\$id_col -v symbol=\$symbol_col '{print \$id"\\t"\$symbol"\\tNA"}' >> \${outfile}.tmp + mv \${outfile}.tmp \${outfile} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + version: \$(echo $VERSION) + END_VERSIONS + """ +} diff --git a/modules/nf-core/custom/tabulartogseachip/meta.yml b/modules/nf-core/custom/tabulartogseachip/meta.yml new file mode 100644 index 00000000000..abbb01156dd --- /dev/null +++ b/modules/nf-core/custom/tabulartogseachip/meta.yml @@ -0,0 +1,44 @@ +name: "custom_tabulartogseachip" +description: Make a GSEA class file (.chip) from tabular inputs +keywords: + - gsea + - chip + - convert + - tabular +tools: + - custom: + description: "Make a GSEA class file (.chip) from tabular inputs" + tool_dev_url: "https://github.com/nf-core/modules/blob/master/modules/nf-core/custom/tabulartogseachip/main.nf" + identifier: "" +input: + - - tabular: + type: file + description: | + Tabular (NOTE that for the moment it only works for TSV file) containing a column with the + features ids, and another column with the features symbols. + pattern: "*.{tsv}" + - - id: + type: string + description: The name of the column containing feature ids + - symbol: + type: string + description: The name of the column containing feature symbols +output: + - chip: + - "*.chip": + type: file + description: | + A categorical class format file (.chip) as defined by the Broad + documentation at + https://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats + pattern: "*.chip" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@pinin4fjords" + - "@suzannejin" +maintainers: + - "@pinin4fjords" diff --git a/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test b/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test new file mode 100644 index 00000000000..08d82ef598c --- /dev/null +++ b/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process CUSTOM_TABULARTOGSEACHIP" + script "../main.nf" + process "CUSTOM_TABULARTOGSEACHIP" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/tabulartogseachip" + + test("test tsv to chip") { + when { + process { + """ + input[0] = Channel.fromPath( + file(params.modules_testdata_base_path + 'genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv', checkIfExists: true), + ) + input[1] = Channel.of(['gene_id', 'gene_name']) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.chip, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test.snap b/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test.snap new file mode 100644 index 00000000000..052333c70d7 --- /dev/null +++ b/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test.snap @@ -0,0 +1,17 @@ +{ + "test tsv to chip": { + "content": [ + [ + "SRP254919.salmon.merged.gene_counts.top1000cov.chip:md5,2ab8a685c675ce2fb97142526766044a" + ], + [ + "versions.yml:md5,8416d7d8e5723a32c360278d4a1c4597" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.2" + }, + "timestamp": "2024-12-13T15:37:38.643112727" + } +} \ No newline at end of file