From d90b85a614ee44b681337e05536e8b9918ef51b5 Mon Sep 17 00:00:00 2001 From: Jessica Way Date: Wed, 17 Feb 2021 14:08:19 -0500 Subject: [PATCH 1/3] fix base file name for new cram to unmapped bam input (#241) --- .../test_inputs/Scientific/G96830.NA12878.WGS.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Scientific/G96830.NA12878.WGS.json b/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Scientific/G96830.NA12878.WGS.json index f1ddd84449..0e05934c17 100644 --- a/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Scientific/G96830.NA12878.WGS.json +++ b/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Scientific/G96830.NA12878.WGS.json @@ -2,7 +2,7 @@ "CramToUnmappedBams.input_cram": "gs://broad-gotc-test-storage/germline_single_sample/wgs/scientific/truth/{TRUTH_BRANCH}/G96830.NA12878/NA12878.cram", "CramToUnmappedBams.output_map": "gs://broad-gotc-test-storage/germline_single_sample/wgs/scientific/bams/G96830.NA12878/readgroupid_to_bamfilename_map.txt", - "CramToUnmappedBams.base_file_name": "G96830.NA12878", + "CramToUnmappedBams.base_file_name": "G96830.NA12878.WGS", "CramToUnmappedBams.unmapped_bam_suffix": ".unmapped.bam", "CramToUnmappedBams.ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", "CramToUnmappedBams.ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai" From ace01df38bb1cbdb6c223b3aece6baf96b922221 Mon Sep 17 00:00:00 2001 From: George Grant Date: Thu, 18 Feb 2021 20:59:33 -0500 Subject: [PATCH 2/3] =?UTF-8?q?No=20longer=20have=20ExternalReprocessingTe?= =?UTF-8?q?ster=20override=20testOptions=20(it=20wa=E2=80=A6=20(#243)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Have the ExternalReprocessingTester always set the google_project to broad-exomes-dev1 explicitly. --- ...ernalExomeReprocessing.plumbing.input.json | 66 ------------------- .../tester/ExternalReprocessingTester.scala | 6 +- 2 files changed, 5 insertions(+), 67 deletions(-) delete mode 100644 pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.plumbing.input.json diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.plumbing.input.json b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.plumbing.input.json deleted file mode 100644 index bed2a00be4..0000000000 --- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.plumbing.input.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "ExternalExomeReprocessing.input_cram": "gs://broad-gotc-test-storage/exome/plumbing/truth/master/NA12878_PLUMBING.cram", - - "ExternalExomeReprocessing.sample_name": "NA12878 PLUMBING", - "ExternalExomeReprocessing.base_file_name": "NA12878_PLUMBING", - "ExternalExomeReprocessing.final_gvcf_base_name": "NA12878_PLUMBING", - "ExternalExomeReprocessing.unmapped_bam_suffix": ".unmapped.bam", - - "ExternalExomeReprocessing.cram_ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", - "ExternalExomeReprocessing.cram_ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - - "ExternalExomeReprocessing.references": { - "haplotype_database_file": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.haplotype_database.txt", - "contamination_sites_ud": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.UD", - "contamination_sites_bed": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.bed", - "contamination_sites_mu": "gs://gcp-public-data--broad-references/hg38/v0/contamination-resources/1000g/1000g.phase3.100k.b38.vcf.gz.dat.mu", - "calling_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/exome_calling_regions.v1.interval_list", - "reference_fasta": { - "ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", - "ref_fasta": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", - "ref_fasta_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", - "ref_alt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.alt", - "ref_sa": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.sa", - "ref_amb": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.amb", - "ref_bwt": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.bwt", - "ref_ann": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.ann", - "ref_pac": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.pac" - }, - "known_indels_sites_vcfs": [ - "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", - "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz" - ], - "known_indels_sites_indices": [ - "gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi", - "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi" - ], - "dbsnp_vcf": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf", - "dbsnp_vcf_index": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.idx", - "evaluation_interval_list": "gs://gcp-public-data--broad-references/hg38/v0/exome_evaluation_regions.v1.interval_list" - }, - - "ExternalExomeReprocessing.target_interval_list": "gs://broad-references-private/HybSelOligos/whole_exome_illumina_coding_v1/whole_exome_illumina_coding_v1.Homo_sapiens_assembly38.targets.interval_list", - "ExternalExomeReprocessing.bait_interval_list": "gs://broad-references-private/HybSelOligos/whole_exome_illumina_coding_v1/whole_exome_illumina_coding_v1.Homo_sapiens_assembly38.baits.interval_list", - "ExternalExomeReprocessing.bait_set_name": "whole_exome_illumina_coding_v1", - - "ExternalExomeReprocessing.papi_settings": { - "preemptible_tries": 3, - "agg_preemptible_tries": 3 - }, - - "ExternalExomeReprocessing.scatter_settings": { - "haplotype_scatter_count": 50, - "break_bands_at_multiples_of": 0 - }, - - "ExternalExomeReprocessing.ExomeReprocessing.ExomeGermlineSingleSample.UnmappedBamToAlignedBam.ApplyBQSR.gatk_docker": "us.gcr.io/broad-gatk/gatk:4.1.8.0", - "ExternalExomeReprocessing.ExomeReprocessing.ExomeGermlineSingleSample.UnmappedBamToAlignedBam.BaseRecalibrator.gatk_docker": "us.gcr.io/broad-gatk/gatk:4.1.8.0", - "ExternalExomeReprocessing.ExomeReprocessing.ExomeGermlineSingleSample.UnmappedBamToAlignedBam.GatherBqsrReports.gatk_docker": "us.gcr.io/broad-gatk/gatk:4.1.8.0", - "ExternalExomeReprocessing.ExomeReprocessing.ExomeGermlineSingleSample.UnmappedBamToAlignedBam.CheckContamination.disable_sanity_check": true, - "ExternalExomeReprocessing.ExomeReprocessing.ExomeGermlineSingleSample.AggregatedBamQC.CollectReadgroupBamQualityMetrics.collect_gc_bias_metrics": false, - "ExternalExomeReprocessing.ExomeReprocessing.ExomeGermlineSingleSample.AggregatedBamQC.CollectAggregationMetrics.collect_gc_bias_metrics": false, - - "ExternalExomeReprocessing.destination_cloud_path": "{DESTINATION_CLOUD_PATH}", - "ExternalExomeReprocessing.vault_token_path": "{VAULT_TOKEN_PATH}", - "ExternalExomeReprocessing.google_account_vault_path": "secret/dsde/gotc/prod/picard/picard-account.pem" -} diff --git a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/ExternalReprocessingTester.scala b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/ExternalReprocessingTester.scala index 33c290d0c1..e54c82315d 100644 --- a/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/ExternalReprocessingTester.scala +++ b/tests/broad/scala_test/src/main/scala/org/broadinstitute/dsp/pipelines/tester/ExternalReprocessingTester.scala @@ -60,13 +60,17 @@ class ExternalReprocessingTester(testerConfig: GermlineCloudWorkflowConfig)( ) } + // Note - we are explicitly setting the google_project here so that when running in a non-dev environment, + // The workflow can still access the test data AND can then read from the vault override def readTestOptions( releaseDir: File, environment: CromwellEnvironment ): String = { val defaultOptions = Array( "read_from_cache" -> testerConfig.useCallCaching.asJson, - "backend" -> testerConfig.papiVersion.entryName.asJson + "backend" -> testerConfig.papiVersion.entryName.asJson, + "monitoring_script" -> "gs://broad-gotc-test-storage/cromwell_monitoring_script.sh".asJson, + "google_project" -> "broad-exomes-dev1".asJson ) val optionsJson = defaultOptions ++ environment.environmentOptions From c1f20c46ad61aa3b87ce9d88fd9facc871ae6d4b Mon Sep 17 00:00:00 2001 From: Jessica Way Date: Mon, 22 Feb 2021 11:09:04 -0500 Subject: [PATCH 3/3] Add sorting_collection_size_ratio as an optional task input for mark duplicates (#244) * add an optional task input for mark duplicates * update versions and changelogs --- .../exome/ExomeGermlineSingleSample.changelog.md | 5 +++++ .../single_sample/exome/ExomeGermlineSingleSample.wdl | 2 +- .../wgs/WholeGenomeGermlineSingleSample.changelog.md | 5 +++++ .../single_sample/wgs/WholeGenomeGermlineSingleSample.wdl | 2 +- .../broad/reprocessing/exome/ExomeReprocessing.changelog.md | 5 +++++ pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl | 2 +- .../external/exome/ExternalExomeReprocessing.changelog.md | 5 +++++ .../external/exome/ExternalExomeReprocessing.wdl | 2 +- .../wgs/ExternalWholeGenomeReprocessing.changelog.md | 5 +++++ .../external/wgs/ExternalWholeGenomeReprocessing.wdl | 2 +- .../reprocessing/wgs/WholeGenomeReprocessing.changelog.md | 5 +++++ pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl | 2 +- tasks/broad/BamProcessing.wdl | 3 +++ 13 files changed, 39 insertions(+), 6 deletions(-) diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md index d5ae98bb54..775be7d37b 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md @@ -1,3 +1,8 @@ +# 2.4.2 +2021-02-22 + +* Added SORTING_COLLECTION_SIZE_RATIO as an optional task input to MarkDuplicates + # 2.4.1 2020-12-21 diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl index aa761ca07d..eb72b716cc 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl @@ -40,7 +40,7 @@ import "../../../../../../structs/dna_seq/DNASeqStructs.wdl" workflow ExomeGermlineSingleSample { - String pipeline_version = "2.4.1" + String pipeline_version = "2.4.2" input { PapiSettings papi_settings diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md index e079febc3e..1a4567fa38 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md @@ -1,3 +1,8 @@ +# 2.3.2 +2021-02-22 + +* Added SORTING_COLLECTION_SIZE_RATIO as an optional task input to MarkDuplicates + # 2.3.1 2020-12-21 diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl index 9874cc826b..870a3c1831 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl @@ -38,7 +38,7 @@ import "../../../../../../structs/dna_seq/DNASeqStructs.wdl" # WORKFLOW DEFINITION workflow WholeGenomeGermlineSingleSample { - String pipeline_version = "2.3.1" + String pipeline_version = "2.3.2" input { SampleAndUnmappedBams sample_and_unmapped_bams diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md index c5f300e45a..4ba8c9834f 100644 --- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md @@ -1,3 +1,8 @@ +# 2.4.4 +2021-02-22 + +* Added SORTING_COLLECTION_SIZE_RATIO as an optional task input to MarkDuplicates + # 2.4.3 2021-02-08 diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl index 5537407486..a37bdbe81a 100644 --- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl @@ -6,7 +6,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl" workflow ExomeReprocessing { - String pipeline_version = "2.4.3" + String pipeline_version = "2.4.4" input { File? input_cram diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md index 1858688b89..f5087e4afa 100644 --- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md @@ -1,3 +1,8 @@ +# 2.4.4 +2021-02-22 + +* Added SORTING_COLLECTION_SIZE_RATIO as an optional task input to MarkDuplicates + # 2.4.3 2021-02-08 diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl index d431611fe7..a9d864f6ee 100644 --- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.wdl @@ -5,7 +5,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy workflow ExternalExomeReprocessing { - String pipeline_version = "2.4.3" + String pipeline_version = "2.4.4" input { File? input_cram diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md index 7504769966..288046a2d4 100644 --- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md @@ -1,3 +1,8 @@ +# 1.3.4 +2021-02-22 + +* Added SORTING_COLLECTION_SIZE_RATIO as an optional task input to MarkDuplicates + # 1.3.3 2021-02-08 diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl index 0e64c62ca2..c9e5283fec 100644 --- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.wdl @@ -5,7 +5,7 @@ import "../../../../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy workflow ExternalWholeGenomeReprocessing { - String pipeline_version = "1.3.3" + String pipeline_version = "1.3.4" input { File? input_cram diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md index 826e7fc748..f05bf47c42 100644 --- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md @@ -1,3 +1,8 @@ +# 2.3.4 +2021-02-22 + +* Added SORTING_COLLECTION_SIZE_RATIO as an optional task input to MarkDuplicates + # 2.3.3 2021-02-08 diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl index 365575bbef..1ee7c287ed 100644 --- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl +++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl @@ -6,7 +6,7 @@ import "../../../../structs/dna_seq/DNASeqStructs.wdl" workflow WholeGenomeReprocessing { - String pipeline_version = "2.3.3" + String pipeline_version = "2.3.4" input { File? input_cram diff --git a/tasks/broad/BamProcessing.wdl b/tasks/broad/BamProcessing.wdl index e2113ee772..75b2d85ac7 100644 --- a/tasks/broad/BamProcessing.wdl +++ b/tasks/broad/BamProcessing.wdl @@ -108,6 +108,8 @@ task MarkDuplicates { String? read_name_regex Int memory_multiplier = 1 Int additional_disk = 20 + + Float? sorting_collection_size_ratio } # The merged bam will be smaller than the sum of the parts so we need to account for the unmerged inputs and the merged output. @@ -130,6 +132,7 @@ task MarkDuplicates { METRICS_FILE=~{metrics_filename} \ VALIDATION_STRINGENCY=SILENT \ ~{"READ_NAME_REGEX=" + read_name_regex} \ + ~{"SORTING_COLLECTION_SIZE_RATIO=" + sorting_collection_size_ratio} \ OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ ASSUME_SORT_ORDER="queryname" \ CLEAR_DT="false" \