[tabulartogseachip] Add the TABULAR_TO_GSEA_CHIP

module used in nf-core/differentialabundance. This module was originally written by: Co-authored-by: Jonathan Manning <[email protected]>
nf-core · suzannejin · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024
commit fe0271d2d924a68efb645435ec2ec3e4b9b48aa9
diff --git a/modules/nf-core/custom/tabulartogseachip/environment.yml b/modules/nf-core/custom/tabulartogseachip/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::gawk=5.1.0"
diff --git a/modules/nf-core/custom/tabulartogseachip/main.nf b/modules/nf-core/custom/tabulartogseachip/main.nf
@@ -0,0 +1,44 @@
+process TABULAR_TO_GSEA_CHIP {
+
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gawk:5.1.0' :
+        'biocontainers/gawk:5.1.0' }"
+
+    input:
+    path tabular
+    tuple val(id), val(symbol)
+
+    output:
+    path "*.chip"       , emit: chip
+    path "versions.yml" , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def VERSION = '9.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+    """
+    function find_column_number {
+        file=\$1
+        column=\$2
+
+        head -n 1 \$file | tr '\\t' '\\n' | grep -n "^\${column}\$" | awk -F':' '{print \$1}'
+    }
+
+    id_col=\$(find_column_number $tabular $id)
+    symbol_col=\$(find_column_number $tabular $symbol)
+    outfile=\$(echo $tabular | sed 's/\\(.*\\)\\..*/\\1/').chip
+
+    echo -e "Probe Set ID\\tGene Symbol\\tGene Title" > \${outfile}.tmp
+    tail -n +2 $tabular | awk -F'\\t' -v id=\$id_col -v symbol=\$symbol_col '{print \$id"\\t"\$symbol"\\tNA"}' >> \${outfile}.tmp
+    mv \${outfile}.tmp \${outfile}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        bash: \$(echo \$(bash --version | grep -Eo 'version [[:alnum:].]+' | sed 's/version //'))
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/custom/tabulartogseachip/meta.yml b/modules/nf-core/custom/tabulartogseachip/meta.yml
@@ -0,0 +1,48 @@
+name: "custom_tabulartogseacls"
+description: Make a GSEA class file (.chip) from tabular inputs
+keywords:
+  - gsea
+  - chip
+  - convert
+  - tabular
+tools:
+  - custom:
+      description: "Make a GSEA class file (.chip) from tabular inputs"
+      tool_dev_url: "https://github.com/nf-core/modules/blob/master/modules/nf-core/custom/tabulartogseachip/main.nf"
+      identifier: ""
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing input metadata information
+          e.g. [ id:'test' ]
+    - tabular:
+        type: file
+        description: Tabular (NOTE that for the moment it only works for TSV file) containing a column with the
+        features ids, and another column with the features symbols.
+        pattern: "*.{tsv}"
+  - - id:
+        type: str
+        description: The name of the column containing feature ids
+    - symbol:
+        type: str
+        description: The name of the column containing feature symbols
+output:
+  - chip:
+      - chip:
+          type: file
+          description: |
+            A categorical class format file (.chip) as defined by the Broad
+            documentation at
+            https://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats
+          pattern: "*.chip"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+authors:
+  - "@pinin4fjords"
+  - "@suzannejin"
+maintainers:
+  - "@pinin4fjords"
diff --git a/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test b/modules/nf-core/custom/tabulartogseachip/tests/main.nf.test
@@ -0,0 +1,74 @@
+// TODO nf-core: Once you have added the required tests, please run the following command to build this file:
+// nf-core modules test custom/tabulartogseachip
+nextflow_process {
+
+    name "Test Process CUSTOM_TABULARTOGSEACHIP"
+    script "../main.nf"
+    process "CUSTOM_TABULARTOGSEACHIP"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "custom"
+    tag "custom/tabulartogseachip"
+
+    // TODO nf-core: Change the test name preferably indicating the test-data and file-format used
+    test("sarscov2 - bam") {
+
+        // TODO nf-core: If you are created a test for a chained module
+        // (the module requires running more than one process to generate the required output)
+        // add the 'setup' method here.
+        // You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules).
+
+        when {
+            process {
+                """
+                // TODO nf-core: define inputs of the process here. Example:
+
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+                //TODO nf-core: Add all required assertions to verify the test output.
+                // See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples.
+            )
+        }
+
+    }
+
+    // TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix.
+    test("sarscov2 - bam - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                // TODO nf-core: define inputs of the process here. Example:
+
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+                //TODO nf-core: Add all required assertions to verify the test output.
+            )
+        }
+
+    }
+
+}