Skip to content

Commit

Permalink
added downsampled chr22 RNA workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
lhhunghimself committed May 3, 2023
1 parent b7322f7 commit d32ac7f
Show file tree
Hide file tree
Showing 2,471 changed files with 790,686 additions and 0 deletions.
57 changes: 57 additions & 0 deletions RNA/GDC_mRNA_multi_dr32chr22/GDC_mRNA_multi_dr32chr22.ows

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM alpine:3.12.1
MAINTAINER Hong Hung
RUN apk add --update perl && rm -rf /var/cache/apk/*
COPY fpkm.pl /usr/local/bin/fpkm.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/perl
#Ling-Hong Hung 2020
my ($gencodeFile,$sampleFile)=@ARGV;
my $STARFLAG = $ENV{'STARFLAG'};
print STDERR "Opening gene code summary $gencodeFile\n";
open (FIL, $gencodeFile) || die;
my $line=<FIL>;
my %pcLength;
my %geneLength;
my $basename=$sampleFile;
if (substr($sampleFile,-4,1) eq "."){
$basename=substr($sampleFile,0,-4);
}
while (defined(my $line=<FIL>)){
my @parts=split(/\t/,$line);
my $name=$parts[0];
my $len=$parts[10];
$geneLength{$name}=$len;
if ($parts[6] eq "protein_coding"){
$pcLength{$name}=$len;
}
}
#For STAR counts - the first four lines are information lines and there is no header
#The columns are geneID unstrandedCounts 1stReadCounts 2ndReadCounts
open (FIL, $sampleFile) || die "can't open $sampleFile";
my $header=<FIL>;
if($STARFLAG){
$header+=<FIL>;
$header+=<FIL>;
$header+=<FIL>;
}
my @names;
my @pcSums;
my @uq75s;
my @counts;
# 3 fields if STAR
my $nfields=3;
unless ($STARFLAG){
my @parts=split(/\t/,"$header");
$nfields=$#parts;
}

while (defined(my $line=<FIL>)){
chomp($line);
my @parts=split(/\t/,$line);
if ($#parts != $nfields){
next;
}
my $name=$parts[0];
my $length=$geneLength{$name};
if($length){
push(@names,$name);
if($pcLength{$name}){
foreach my $i (1..$nfields){
push(@{$counts[$i-1]},$parts[$i]);
$pcSums[$i-1]+=$parts[$i];
}
}
else{
foreach my $i (1..$nfields){
push(@{$counts[$i-1]},$parts[$i]);
}
}
}
}
foreach my $i (0..$nfields-1){
my(@sorted)=sort{$a <=> $b} @{$counts[$i]};
my $uq75index=sprintf "%d",($#sorted+1)*.75;
$uq75s[$i]=$sorted[$uq75index];
}
open (fpkmfp,">$basename\_fpkm.tsv") || die;
open (fpkmUQfp,">$basename\_fpkmUQ.tsv") || die;
open (countsfp,">$basename\_counts.tsv") || die;
print countsfp "$header";
print fpkmfp "$header";
print fpkmUQfp "$header";
foreach my $i (0..$#names){
my $name=$names[$i];
my $length=$geneLength{$name};
print countsfp "$names[$i]";
print fpkmfp "$names[$i]";
print fpkmUQfp "$names[$i]";
foreach my $j (0..$nfields-1){
my $counts=$counts[$j][$i];
my $fpkmUQ="NA";
my $fpkm=$counts*1000000000/($length*$pcSums[$j]);
if ($uq75s[$j]){
$fpkmUQ=$counts*1000000000/($length*$uq75s[$j]);
}
print countsfp "\t$counts";
print fpkmfp "\t$fpkm";
print fpkmUQfp "\t$fpkmUQ";
}
print countsfp "\n";
print fpkmfp "\n";
print fpkmUQfp "\n";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"autoMap": null, "buildCommand": null, "command": null, "description": null, "docker_image_name": null, "docker_image_tag": null, "icon": null, "inputs": null, "name": "GDC_mRNA_multi_dr32_fpkm_fpkm", "outputs": null, "parameters": null, "priority": null, "volumes": null, "runDockerMap": null}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name": "GDC_mRNA_multi_dr32_fpkm_fpkm", "description": "Calculates fpkm and fpkm-UC using gdc formula", "docker_image_name": "biodepot/fpkm", "docker_image_tag": "latest", "priority": "10", "icon": null, "inputs": {"py/reduce": [{"py/type": "collections.OrderedDict"}, {"py/tuple": []}, null, null, {"py/tuple": [{"py/tuple": ["geneinfofile", {"callback": null, "type": {"py/type": "builtins.str"}}]}, {"py/tuple": ["countsfile", {"callback": null, "type": {"py/type": "builtins.str"}}]}, {"py/tuple": ["countsfilecreated", {"callback": null, "type": {"py/type": "builtins.str"}}]}]}]}, "outputs": {"py/reduce": [{"py/type": "collections.OrderedDict"}, {"py/tuple": []}, null, null, {"py/tuple": [{"py/tuple": ["countsfile", {"default": null, "type": {"py/type": "builtins.str"}}]}]}]}, "volumes": null, "ports": null, "parameters": {"py/reduce": [{"py/type": "collections.OrderedDict"}, {"py/tuple": []}, null, null, {"py/tuple": [{"py/tuple": ["geneinfofile", {"default": "/data/gencode.gene.info.v22.tsv", "flag": null, "argument": true, "label": "Gene info file", "type": "file"}]}, {"py/tuple": ["countsfile", {"flag": null, "argument": true, "label": "Counts file", "type": "file"}]}, {"py/tuple": ["starformat", {"flag": null, "label": "STAR Count format", "type": "bool", "env": "STARFLAG"}]}]}]}, "command": ["fpkm.pl "], "autoMap": true, "runDockerMap": null, "persistentSettings": "all", "requiredParameters": ["geneinfofile", "countsfile"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
import glob
import sys
import functools
import jsonpickle
from collections import OrderedDict
from Orange.widgets import widget, gui, settings
import Orange.data
from Orange.data.io import FileFormat
from DockerClient import DockerClient
from BwBase import OWBwBWidget, ConnectionDict, BwbGuiElements, getIconName, getJsonName
from PyQt5 import QtWidgets, QtGui

class OWGDC_mRNA_multi_dr32_fpkm_fpkm(OWBwBWidget):
name = "GDC_mRNA_multi_dr32_fpkm_fpkm"
description = "Calculates fpkm and fpkm-UC using gdc formula"
priority = 10
icon = getIconName(__file__,"normalize_pl.png")
want_main_area = False
docker_image_name = "biodepot/fpkm"
docker_image_tag = "latest"
inputs = [("geneinfofile",str,"handleInputsgeneinfofile"),("countsfile",str,"handleInputscountsfile"),("countsfilecreated",str,"handleInputscountsfilecreated")]
outputs = [("countsfile",str)]
pset=functools.partial(settings.Setting,schema_only=True)
runMode=pset(0)
exportGraphics=pset(False)
runTriggers=pset([])
triggerReady=pset({})
inputConnectionsStore=pset({})
optionsChecked=pset({})
geneinfofile=pset("/data/gencode.gene.info.v22.tsv")
countsfile=pset(None)
starformat=pset(False)
def __init__(self):
super().__init__(self.docker_image_name, self.docker_image_tag)
with open(getJsonName(__file__,"GDC_mRNA_multi_dr32_fpkm_fpkm")) as f:
self.data=jsonpickle.decode(f.read())
f.close()
self.initVolumes()
self.inputConnections = ConnectionDict(self.inputConnectionsStore)
self.drawGUI()
def handleInputsgeneinfofile(self, value, *args):
if args and len(args) > 0:
self.handleInputs("geneinfofile", value, args[0][0], test=args[0][3])
else:
self.handleInputs("inputFile", value, None, False)
def handleInputscountsfile(self, value, *args):
if args and len(args) > 0:
self.handleInputs("countsfile", value, args[0][0], test=args[0][3])
else:
self.handleInputs("inputFile", value, None, False)
def handleInputscountsfilecreated(self, value, *args):
if args and len(args) > 0:
self.handleInputs("countsfilecreated", value, args[0][0], test=args[0][3])
else:
self.handleInputs("inputFile", value, None, False)
def handleOutputs(self):
outputValue=None
if hasattr(self,"countsfile"):
outputValue=getattr(self,"countsfile")
self.send("countsfile", outputValue)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"addBuildDate": [true, true], "autoMap": [true, true], "buildCommand": [true, ""], "command": [true, "fpkm.pl "], "description": [[null, null], [true, "description:"], [true, "Calculates fpkm and fpkm-UC using gdc formula"]], "docker_image_name": [[null, null], [true, "docker_image_name:"], [true, "biodepot/fpkm"]], "docker_image_tag": [[null, null], [true, "docker_image_tag:"], [true, "latest"]], "icon": [[null, null], [true, "icon:"], [true, ""]], "inputs": [[[[null, null], [true, "Name"], [true, "geneinfofile"]], [[true, false], [true, "callback"], [false, ""]], [[true, 0, ["str", "dict", "Orange.data.Table"]], [true, "Type:"]]], [[[null, null], [true, "Name"], [true, "countsfile"]], [[true, false], [true, "callback"], [false, ""]], [[true, 0, ["str", "dict", "Orange.data.Table"]], [true, "Type:"]]], [[[null, null], [true, "Name"], [true, "countsfilecreated"]], [[true, false], [true, "callback"], [false, ""]], [[true, 0, ["str", "dict", "Orange.data.Table"]], [true, "Type:"]]]], "outputs": [[[[null, null], [true, "Name"], [true, "countsfile"]], [[true, false], [true, "Default value"], [false, ""]], [[true, 0, ["str", "dict", "Orange.data.Table"]], [true, "Type:"]]]], "parameters": [[[[null, null], [true, "Name"], [true, "geneinfofile"]], [[true, 1, ["str", "file", "file list", "directory", "directory list", "bool", "bool list", "text list", "int", "int list", "double", "double list", "patternQuery"]], [true, "Type:"]], [[true, false], [true, "flag"], [false, ""]], [true, true], [[true, false], [true, "env"], [false, ""]], [[true, true], [true, "label"], [true, "Gene info file"]], [[true, true], [true, "default"], [true, "/data/gencode.gene.info.v22.tsv"]], [[true, false], [true, "group"], [false, ""]], [true, false]], [[[null, null], [true, "Name"], [true, "countsfile"]], [[true, 1, ["str", "file", "file list", "directory", "directory list", "bool", "bool list", "text list", "int", "int list", "double", "double list", "patternQuery"]], [true, "Type:"]], [[true, false], [true, "flag"], [false, ""]], [true, true], [[true, false], [true, "env"], [false, ""]], [[true, true], [true, "label"], [true, "Counts file"]], [[true, false], [true, "default"], [false, ""]], [[true, false], [true, "group"], [false, ""]], [true, false]], [[[null, null], [true, "Name"], [true, "starformat"]], [[true, 5, ["str", "file", "file list", "directory", "directory list", "bool", "bool list", "text list", "int", "int list", "double", "double list", "patternQuery"]], [true, "Type:"]], [[true, false], [true, "flag"], [false, ""]], [true, false], [[true, true], [true, "env"], [true, "STARFLAG"]], [[true, true], [true, "label"], [true, "STAR Count format"]], [[true, false], [true, "default"], [false, ""]], [[true, false], [true, "group"], [false, ""]], [true, true]]], "priority": [[null, null], [true, "priority:"], [true, "10"]], "name": "GDC_mRNA_multi_dr32_fpkm_fpkm", "runDockerMap": [true, false]}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM debian:buster-slim
Maintainer Ling-Hong Hung
ENV DEBIAN_FRONTEND=noninteractive
ENV STAR_FUSION_VERSION 1.9.0
ENV SAMTOOLS_VERSION 1.9
ENV HTSLIB_VERSION 1.9

RUN apt-get update && apt-get install -y libgomp1 libbz2-dev wget build-essential libdb-dev libz-dev libcurl4-openssl-dev liblzma-dev libncurses5-dev \
&& wget -qO- https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 | tar -xjvf - \
&& cd /samtools-${SAMTOOLS_VERSION} && ./configure && make \
&& mv /samtools-${SAMTOOLS_VERSION}/samtools /bin/ \
&& cd htslib-${HTSLIB_VERSION}/ && ./configure && make \
&& mv htsfile libhts.so* tabix bgzip /bin \
&& cd /root && rm -rf /samtools* \
&& wget -O - http://cpanmin.us | perl - --self-upgrade \
&& cpanm Data::Dumper FindBin DB_File URI::Escape Set::IntervalTree Carp::Assert JSON::XS PerlIO::gzip \
&& wget -qO- https://github.com/STAR-Fusion/STAR-Fusion/releases/download/STAR-Fusion-v${STAR_FUSION_VERSION}/STAR-Fusion.v${STAR_FUSION_VERSION}.FULL.tar.gz \
| tar -xzvf - \
&& mv STAR-Fusion.v${STAR_FUSION_VERSION} STAR-Fusion \
&& apt-get remove -y wget build-essential \
&& apt-get clean -y \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
RUN touch /usr/local/bin/STAR
RUN chmod +x /usr/local/bin/STAR
RUN sed -i 's/\&check_compatible_STAR/\#&check_compatible_STAR/' /root/STAR-Fusion/STAR-Fusion
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"autoMap": null, "buildCommand": null, "command": null, "description": null, "docker_image_name": null, "docker_image_tag": null, "icon": null, "inputs": null, "name": "StarFusion", "outputs": null, "parameters": null, "priority": null, "volumes": null, "runDockerMap": null}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"name": "StarFusion", "description": "Detect gene fusions from STAR junctions", "docker_image_name": "biodepot/star-fusion", "docker_image_tag": "1.9.0", "priority": "10", "icon": null, "inputs": {"py/reduce": [{"py/type": "collections.OrderedDict"}, {"py/tuple": []}, null, null, {"py/tuple": [{"py/tuple": ["genomelibdir", {"callback": null, "type": {"py/type": "builtins.str"}}]}, {"py/tuple": ["chimeric", {"callback": null, "type": {"py/type": "builtins.str"}}]}, {"py/tuple": ["Trigger", {"callback": null, "type": {"py/type": "builtins.str"}}]}]}]}, "outputs": {"py/reduce": [{"py/type": "collections.OrderedDict"}, {"py/tuple": []}, null, null, {"py/tuple": [{"py/tuple": ["outputdir", {"default": null, "type": {"py/type": "builtins.str"}}]}]}]}, "volumes": null, "ports": null, "parameters": {"py/reduce": [{"py/type": "collections.OrderedDict"}, {"py/tuple": []}, null, null, {"py/tuple": [{"py/tuple": ["chimeric", {"flag": "-J", "label": "Chimeric.out.junction file", "type": "file"}]}, {"py/tuple": ["genomelibdir", {"flag": "--genome_lib_dir", "label": "Genome lib directory", "type": "directory"}]}, {"py/tuple": ["cpu", {"flag": "--CPU", "label": "Number of threads", "type": "int"}]}, {"py/tuple": ["outputdir", {"default": "STAR-Fusion_outdir", "flag": "-O ", "label": "Output directory", "type": "directory"}]}, {"py/tuple": ["fullusage", {"flag": "--show_full_usage_info", "label": "Provide full usage info", "type": "bool"}]}]}]}, "command": ["/root/STAR-Fusion/STAR-Fusion "], "autoMap": true, "runDockerMap": null, "persistentSettings": "all", "requiredParameters": ["chimeric", "genomelibdir"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import os
import glob
import sys
import functools
import jsonpickle
from collections import OrderedDict
from Orange.widgets import widget, gui, settings
import Orange.data
from Orange.data.io import FileFormat
from DockerClient import DockerClient
from BwBase import OWBwBWidget, ConnectionDict, BwbGuiElements, getIconName, getJsonName
from PyQt5 import QtWidgets, QtGui

class OWStarFusion(OWBwBWidget):
name = "StarFusion"
description = "Detect gene fusions from STAR junctions"
priority = 10
icon = getIconName(__file__,"starfusion.png")
want_main_area = False
docker_image_name = "biodepot/star-fusion"
docker_image_tag = "1.9.0"
inputs = [("genomelibdir",str,"handleInputsgenomelibdir"),("chimeric",str,"handleInputschimeric"),("Trigger",str,"handleInputsTrigger")]
outputs = [("outputdir",str)]
pset=functools.partial(settings.Setting,schema_only=True)
runMode=pset(0)
exportGraphics=pset(False)
runTriggers=pset([])
triggerReady=pset({})
inputConnectionsStore=pset({})
optionsChecked=pset({})
chimeric=pset(None)
genomelibdir=pset(None)
cpu=pset(None)
outputdir=pset("STAR-Fusion_outdir")
fullusage=pset(False)
def __init__(self):
super().__init__(self.docker_image_name, self.docker_image_tag)
with open(getJsonName(__file__,"StarFusion")) as f:
self.data=jsonpickle.decode(f.read())
f.close()
self.initVolumes()
self.inputConnections = ConnectionDict(self.inputConnectionsStore)
self.drawGUI()
def handleInputsgenomelibdir(self, value, *args):
if args and len(args) > 0:
self.handleInputs("genomelibdir", value, args[0][0], test=args[0][3])
else:
self.handleInputs("inputFile", value, None, False)
def handleInputschimeric(self, value, *args):
if args and len(args) > 0:
self.handleInputs("chimeric", value, args[0][0], test=args[0][3])
else:
self.handleInputs("inputFile", value, None, False)
def handleInputsTrigger(self, value, *args):
if args and len(args) > 0:
self.handleInputs("Trigger", value, args[0][0], test=args[0][3])
else:
self.handleInputs("inputFile", value, None, False)
def handleOutputs(self):
outputValue=None
if hasattr(self,"outputdir"):
outputValue=getattr(self,"outputdir")
self.send("outputdir", outputValue)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"addBuildDate": [true, true], "autoMap": [true, true], "buildCommand": [true, ""], "command": [true, "/root/STAR-Fusion/STAR-Fusion "], "description": [[null, null], [true, "description:"], [true, "Detect gene fusions from STAR junctions"]], "docker_image_name": [[null, null], [true, "docker_image_name:"], [true, "biodepot/star-fusion"]], "docker_image_tag": [[null, null], [true, "docker_image_tag:"], [true, "1.9.0"]], "icon": [[null, null], [true, "icon:"], [true, ""]], "inputs": [[[[null, null], [true, "Name"], [true, "genomelibdir"]], [[true, false], [true, "callback"], [false, ""]], [[true, 0, ["str", "dict", "Orange.data.Table"]], [true, "Type:"]]], [[[null, null], [true, "Name"], [true, "chimeric"]], [[true, false], [true, "callback"], [false, ""]], [[true, 0, ["str", "dict", "Orange.data.Table"]], [true, "Type:"]]], [[[null, null], [true, "Name"], [true, "Trigger"]], [[true, false], [true, "callback"], [false, ""]], [[true, 0, ["str", "dict", "Orange.data.Table"]], [true, "Type:"]]]], "outputs": [[[[null, null], [true, "Name"], [true, "outputdir"]], [[true, false], [true, "Default value"], [false, ""]], [[true, 0, ["str", "dict", "Orange.data.Table"]], [true, "Type:"]]]], "parameters": [[[[null, null], [true, "Name"], [true, "chimeric"]], [[true, 1, ["str", "file", "file list", "directory", "directory list", "bool", "bool list", "text list", "int", "int list", "double", "double list", "patternQuery"]], [true, "Type:"]], [[true, true], [true, "flag"], [true, "-J"]], [true, false], [[true, false], [true, "env"], [false, ""]], [[true, true], [true, "label"], [true, "Chimeric.out.junction file"]], [[true, false], [true, "default"], [false, ""]], [[true, false], [true, "group"], [false, ""]], [true, false]], [[[null, null], [true, "Name"], [true, "genomelibdir"]], [[true, 3, ["str", "file", "file list", "directory", "directory list", "bool", "bool list", "text list", "int", "int list", "double", "double list", "patternQuery"]], [true, "Type:"]], [[true, true], [true, "flag"], [true, "--genome_lib_dir"]], [true, false], [[true, false], [true, "env"], [false, ""]], [[true, true], [true, "label"], [true, "Genome lib directory"]], [[true, false], [true, "default"], [false, ""]], [[true, false], [true, "group"], [false, ""]], [true, false]], [[[null, null], [true, "Name"], [true, "cpu"]], [[true, 8, ["str", "file", "file list", "directory", "directory list", "bool", "bool list", "text list", "int", "int list", "double", "double list", "patternQuery"]], [true, "Type:"]], [[true, true], [true, "flag"], [true, "--CPU"]], [true, false], [[true, false], [true, "env"], [false, ""]], [[true, true], [true, "label"], [true, "Number of threads"]], [[true, false], [true, "default"], [false, ""]], [[true, false], [true, "group"], [false, ""]], [true, true]], [[[null, null], [true, "Name"], [true, "outputdir"]], [[true, 3, ["str", "file", "file list", "directory", "directory list", "bool", "bool list", "text list", "int", "int list", "double", "double list", "patternQuery"]], [true, "Type:"]], [[true, true], [true, "flag"], [true, "-O "]], [true, false], [[true, false], [true, "env"], [false, ""]], [[true, true], [true, "label"], [true, "Output directory"]], [[true, true], [true, "default"], [true, "STAR-Fusion_outdir"]], [[true, false], [true, "group"], [false, ""]], [true, true]], [[[null, null], [true, "Name"], [true, "fullusage"]], [[true, 5, ["str", "file", "file list", "directory", "directory list", "bool", "bool list", "text list", "int", "int list", "double", "double list", "patternQuery"]], [true, "Type:"]], [[true, true], [true, "flag"], [true, "--show_full_usage_info"]], [true, false], [[true, false], [true, "env"], [false, ""]], [[true, true], [true, "label"], [true, "Provide full usage info"]], [[true, false], [true, "default"], [false, ""]], [[true, false], [true, "group"], [false, ""]], [true, true]]], "priority": [[null, null], [true, "priority:"], [true, "10"]], "name": "StarFusion", "runDockerMap": [true, false]}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FROM alpine:3.12.1
MAINTAINER Hong Hung
RUN apk update upgrade --no-cache && apk add bash && rm -rf /var/cache/apk*
COPY start.sh /usr/local/bin/.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

printenv

mkdir -p $work_dir || exit 1
mkdir -p $genome_dir || exit 1
Loading

0 comments on commit d32ac7f

Please sign in to comment.