Skip to content

Commit

Permalink
GL-1432 Optimus will not fail when empty drops has problem running (b…
Browse files Browse the repository at this point in the history
…roadinstitute#248)

* emptyDrop creates NA for cases where it cannot find a cutoff rate

* updated LoomUtils.wdl and pipeline versions

* reverting loomUtils

* Increased boot disk size

* Increased boot disk size again 15 ->20

* running only Optimus smart tests

* indentation error

* status 17 changed to 0 for wdl code to pass

* update emptydrops docker version
  • Loading branch information
khajoue2 authored Mar 5, 2021
1 parent 3983d7e commit 6031492
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 71 deletions.
107 changes: 53 additions & 54 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,46 @@ jobs:
steps:
- checkout
- run:
command: |
./tests/skylab/trigger_test.sh optimus
no_output_timeout: 1.5h
command: |
./tests/skylab/trigger_test.sh optimus
no_output_timeout: 1.5h


test_optimus_snrna:
machine: true
steps:
- checkout
- run:
command: |
./tests/skylab/trigger_test.sh optimus_snrna
no_output_timeout: 1.5h
command: |
./tests/skylab/trigger_test.sh optimus_snrna
no_output_timeout: 1.5h

test_optimus_v3:
machine: true
steps:
- checkout
- run:
command: |
./tests/skylab/trigger_test.sh optimus prV3
no_output_timeout: 1.5h
command: |
./tests/skylab/trigger_test.sh optimus prV3
no_output_timeout: 1.5h

test_smartseq2:
machine: true
steps:
- checkout
- run:
command: |
./tests/skylab/trigger_test.sh smartseq2_single_sample
no_output_timeout: 1.5h
command: |
./tests/skylab/trigger_test.sh smartseq2_single_sample
no_output_timeout: 1.5h

test_smartseq2_multisample:
machine: true
steps:
- checkout
- run:
command: |
command: |
./tests/skylab/trigger_test.sh smartseq2_multisample
no_output_timeout: 1.5h
no_output_timeout: 1.5h

test_smartseq2_multisample_single_end:
machine: true
Expand All @@ -61,82 +61,81 @@ jobs:
steps:
- checkout
- run:
command: |
./tests/skylab/trigger_test.sh smartseq2_single_sample pr_single_end
no_output_timeout: 1.5h
command: |
./tests/skylab/trigger_test.sh smartseq2_single_sample pr_single_end
no_output_timeout: 1.5h

test_optimus_mouse:
machine: true

steps:
- checkout
- run:
command: |
./tests/skylab/trigger_test.sh optimus_mouse
no_output_timeout: 1.5h
command: |
./tests/skylab/trigger_test.sh optimus_mouse
no_output_timeout: 1.5h

test_emptyDropsWrapper:
docker:
- image: quay.io/humancellatlas/secondary-analysis-dropletutils:0.1.0
steps:
- checkout
- run:
command: |
cd docker/emptydrops/emptyDropsWrapper/test/
./test_emptyDropsWrapper.sh
command: |
cd docker/emptydrops/emptyDropsWrapper/test/
./test_emptyDropsWrapper.sh
test_npz2rds:
docker:
- image: quay.io/humancellatlas/secondary-analysis-dropletutils:0.1.2
steps:
- checkout
- run:
command: |
cd docker/emptydrops/npz2rds/test/
./test.sh
command: |
cd docker/emptydrops/npz2rds/test/
./test.sh
test_bulk_rna:
machine: true
steps:
- checkout
- run:
command: |
./tests/skylab/trigger_test.sh bulk_rna_pipeline
no_output_timeout: 1.5h
command: |
./tests/skylab/trigger_test.sh bulk_rna_pipeline
no_output_timeout: 1.5h

test_sc_atac:
machine: true
steps:
- checkout
- run:
command: |
./tests/skylab/trigger_test.sh scATAC
no_output_timeout: 1.5h
command: |
./tests/skylab/trigger_test.sh scATAC
no_output_timeout: 1.5h

test_atac:
machine: true
steps:
- checkout
- run:
command: |
./tests/skylab/trigger_test.sh ATAC
no_output_timeout: 3.0h
machine: true
steps:
- checkout
- run:
command: |
./tests/skylab/trigger_test.sh ATAC
no_output_timeout: 3.0h

workflows:
version: 2
test_all:
jobs:
- test_optimus
- test_optimus_snrna
# - test_emptyDropsWrapper
- test_optimus_mouse
- test_smartseq2
- test_smartseq2_single_end
# - test_npz2rds
- test_sc_atac
- test_atac
# - test_bulk_rna
- test_optimus_v3
- test_smartseq2_multisample
- test_smartseq2_multisample_single_end

- test_optimus
- test_optimus_snrna
# - test_emptyDropsWrapper
- test_optimus_mouse
- test_smartseq2
- test_smartseq2_single_end
# - test_npz2rds
- test_sc_atac
- test_atac
# - test_bulk_rna
- test_optimus_v3
- test_smartseq2_multisample
- test_smartseq2_multisample_single_end
50 changes: 35 additions & 15 deletions dockers/skylab/emptydrops/emptyDropsWrapper/emptyDropsWrapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ library('optparse')
#' Prints a message to stderr and exits R with error code 1
#' @param msg message to standard error
errorExit <- function(msg) {
cat(msg,file=stderr());
quit(save='no',status=1);
cat(msg,file=stderr())
quit(save='no',status=1)
}

#' Prints a message with cat only if verbose is TRUE
Expand Down Expand Up @@ -93,15 +93,15 @@ option_list <- list(
)

## Parse the arguments
opt_parser <- OptionParser(option_list=option_list);
opt <- parse_args(opt_parser);
opt_parser <- OptionParser(option_list=option_list)
opt <- parse_args(opt_parser)

## Check the parsed arguments
if(is.null(opt$input_rds)) errorExit("Input RDS is not specified\n");
if(is.null(opt$output_csv)) errorExit("Output CSV is not specified\n");
if(!file.exists(opt$input_rds)) errorExit("Input RDS doesn't exist!\n");
if(file.exists(opt$output_csv)) errorExit("Output CSV file exists!\n");
if(is.null(opt$min_molecules)) errorExit("Minimum number of molecules is not specified\n");
if(is.null(opt$input_rds)) errorExit("Input RDS is not specified\n")
if(is.null(opt$output_csv)) errorExit("Output CSV is not specified\n")
if(!file.exists(opt$input_rds)) errorExit("Input RDS doesn't exist!\n")
if(file.exists(opt$output_csv)) errorExit("Output CSV file exists!\n")
if(is.null(opt$min_molecules)) errorExit("Minimum number of molecules is not specified\n")

## Load the required libraries here
## NOTE: We do this after parsing arguments so that --help returns immediatedly
Expand Down Expand Up @@ -146,7 +146,7 @@ if(!class(inputMatrix) %in% c( 'dgCMatrix','dgRMatrix' )) {
if(any(dim(inputMatrix) == 0)) {
## If the matrix is empty we can't run emptyDrops, write a empty table header
cat('Warning: one or more dimensions of the input matrix are empty. Generating empty result table.',file=stderr())
outputFile <- file(output_csv);
outputFile <- file(output_csv)
writeLines('"CellId","Total","LogProb","PValue","Limited","FDR","IsCell"',outputFile)
close(outputFile)
q(save="no",status=0)
Expand Down Expand Up @@ -182,7 +182,7 @@ if(is.null(colnames(inputMatrix))) {
## can be done in a single step by re-interpreting the indexes
if (opt$transpose) {
catv('Transposing input matrix...')
inputMatrix <- Matrix::t(inputMatrix);
inputMatrix <- Matrix::t(inputMatrix)
catv('done\n')
}

Expand All @@ -198,9 +198,29 @@ tryCatch({
alpha=ed_param_alpha,
BPPARAM=ed_param_BPPARAM)
},error=function(e) {
cat('Error: an error occured while running emptyDrops!\n',file=stderr())
cat('Error: ', e$message,'\n',file=stderr())
quit(save="no",status=1)
if(grepl("need at least four unique 'x' values",e$message,fixed=TRUE))
{
cat('Error: an error occured while running emptyDrops!\n',file=stderr())
cat('Error: ', e$message,'\n',file=stderr())
# Write an empty_drops_results.csv file that has only NAs
n_rows = dim(inputMatrix)[2] # Get number of rows
emptyDrops_result <- matrix(data=NA,nrow=n_rows,ncol=7)
## Convert output from DataFrame to data.frame
emptyDrops_result <- as.data.frame(emptyDrops_result)
colnames(emptyDrops_result) <- c("CellId","Total", "LogProb", "PValue", "Limited", "FDR", "IsCell")
emptyDrops_result[,"CellId"] = colnames(inputMatrix)
## Write the output file
catv('Writing output CSV with NA\'s instead of emptydrops metrics ...')
write.csv(x=emptyDrops_result, file=output_csv,row.names=FALSE)
catv('done\n')
quit(save="no",status=0)
}
else
{
cat('Error: an error occured while running emptyDrops!\n',file=stderr())
cat('Error: ', e$message,'\n',file=stderr())
quit(save="no",status=1)
}
})
t1 <- Sys.time()
emptyDrop_runtime <- t1 - t0
Expand All @@ -223,6 +243,6 @@ emptyDrops_result <- emptyDrops_result[,colOrder]
catv('done\n') # Preparing output matrix

## Write the output file
catv('Writing output CSV...');
catv('Writing output CSV...')
write.csv(x=emptyDrops_result, file=output_csv,row.names=FALSE)
catv('done\n')
7 changes: 7 additions & 0 deletions pipelines/skylab/optimus/Optimus.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# 4.2.3

2021-02-23 (Date of Last Commit)

* Made changes to emptydrops tool wrappper script to not fail in cases with small number of cells, instead, create empty drop result files with NAs.
* Updated the docker in RunEmptyDrops.wdl task to 0.1.4 Updated emptyDropsWrapper.R in the docker

# 4.2.2

2021-01-04 (Date of Last Commit)
Expand Down
2 changes: 1 addition & 1 deletion pipelines/skylab/optimus/Optimus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ workflow Optimus {

# version of this pipeline
String pipeline_version = "4.2.2"
String pipeline_version = "4.2.3"

# this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
Array[Int] indices = range(length(r1_fastq))
Expand Down
3 changes: 2 additions & 1 deletion tasks/skylab/RunEmptyDrops.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ task RunEmptyDrops {
Int emptydrops_lower = 100

# runtime values
String docker = "quay.io/humancellatlas/secondary-analysis-dropletutils:0.1.2"
String docker = "quay.io/humancellatlas/secondary-analysis-dropletutils:0.1.4"
Int machine_mem_mb = 16000
Int cpu = 1
Int disk = 20
Expand Down Expand Up @@ -47,6 +47,7 @@ task RunEmptyDrops {
disks: "local-disk ${disk} HDD"
cpu: cpu
preemptible: preemptible
bootDiskSizeGb: "20"
}

output {
Expand Down

0 comments on commit 6031492

Please sign in to comment.