Skip to content

Commit

Permalink
Merge branch 'develop' into wd_update_wreleaser
Browse files Browse the repository at this point in the history
  • Loading branch information
wjdingman1 authored Nov 3, 2021
2 parents 48e7991 + f7456ed commit d49b06a
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 108 deletions.
13 changes: 12 additions & 1 deletion projects/optimus/CreateOptimusAdapterMetadata.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ workflow CreateOptimusAdapterMetadata {
Array[String] all_species
Array[String] all_project_ids
Array[String] all_project_names

# Flag which tells data importers if this run is a re-run of a previously processed project
Boolean is_update

String output_basename

Expand Down Expand Up @@ -169,6 +172,11 @@ workflow CreateOptimusAdapterMetadata {
pipeline_version = MergeOptimusLooms.pipeline_version_string
}

call Tasks.CreateStagingAreaFile as CreateStagingAreaFile {
input:
is_update = is_update
}

# store variable resulting from project run
Array[File] project_links = CreateProjectOptimusAdapters.links_outputs
Array[File] project_analysis_process_objects = CreateProjectOptimusAdapters.analysis_process_outputs
Expand All @@ -185,6 +193,7 @@ workflow CreateOptimusAdapterMetadata {
Array[File] reference_metadata_objects = CreateReferenceMetadata.reference_metadata_outputs
Array[File] reference_file_descriptor_objects = CreateReferenceMetadata.reference_file_descriptor_outputs
Array[File] data_objects = flatten([reference_fasta_array, project_loom_array, output_bams, output_looms])
File is_update_file = CreateStagingAreaFile.is_update_file

call Tasks.CopyToStagingBucket {
input:
Expand All @@ -196,7 +205,8 @@ workflow CreateOptimusAdapterMetadata {
analysis_protocol_objects = analysis_protocol_objects,
reference_metadata_objects = reference_metadata_objects,
reference_file_descriptor_objects = reference_file_descriptor_objects,
data_objects = data_objects
data_objects = data_objects,
is_update_file = is_update_file
}

output {
Expand All @@ -208,6 +218,7 @@ workflow CreateOptimusAdapterMetadata {
Array[File] output_reference_metadata_objects = reference_metadata_objects
Array[File] output_reference_file_descriptor_objects = reference_file_descriptor_objects
Array[File] output_data_objects = data_objects
File output_is_update_file = is_update_file
}
}

13 changes: 12 additions & 1 deletion projects/smartseq2/CreateSs2AdapterMetadata.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ workflow CreateSs2AdapterMetadata {
Array[String] all_project_ids
Array[String] all_project_names

# Flag which tells data importers if this run is a re-run of a previously processed project
Boolean is_update

String cromwell_url = "https://api.firecloud.org/"
String staging_area = "gs://broad-dsp-monster-hca-prod-lantern/"
String pipeline_type = "SS2"
Expand Down Expand Up @@ -188,6 +191,11 @@ workflow CreateSs2AdapterMetadata {
pipeline_type = pipeline_type
}

call Tasks.CreateStagingAreaFile as CreateStagingAreaFile {
input:
is_update = is_update
}

Array[File] project_links = GetLinksFileMetadata.links_outputs

########################## Copy Files to Staging Bucket ##########################
Expand All @@ -199,6 +207,7 @@ workflow CreateSs2AdapterMetadata {
Array[File] reference_metadata_objects = CreateReferenceMetadata.reference_metadata_outputs
Array[File] reference_file_descriptor_objects = CreateReferenceMetadata.reference_file_descriptor_outputs
Array[File] data_objects = flatten([reference_fasta_array, [output_loom], output_bams, output_bais])
File is_update_file = CreateStagingAreaFile.is_update_file

call Tasks.CopyToStagingBucket {
input:
Expand All @@ -210,7 +219,8 @@ workflow CreateSs2AdapterMetadata {
analysis_protocol_objects = analysis_protocol_objects,
reference_metadata_objects = reference_metadata_objects,
reference_file_descriptor_objects = reference_file_descriptor_objects,
data_objects = data_objects
data_objects = data_objects,
is_update_file = is_update_file
}

output {
Expand All @@ -222,5 +232,6 @@ workflow CreateSs2AdapterMetadata {
Array[File] output_reference_metadata_objects = reference_metadata_objects
Array[File] output_reference_file_descriptor_objects = reference_file_descriptor_objects
Array[File] output_data_objects = data_objects
File output_is_update_file = is_update_file
}
}
62 changes: 48 additions & 14 deletions projects/tasks/AdapterTasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ task GetCromwellMetadata {
Boolean include_subworkflows = false
String? include_keys
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = ceil((size(output_path, "MiB")))
Int disk_size_gb = ceil((size(output_path, "GiB")))
Expand Down Expand Up @@ -106,9 +106,9 @@ task MergeLooms {
String project_name
String output_basename
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = ceil(size(output_looms, "MiB")) * length(output_looms)
Int memory_mb = ceil(size(output_looms, "MiB")) * length(output_looms) * 5
Int disk_size_gb = ceil((size(output_looms, "GiB") * 2)) + 5
}
Expand Down Expand Up @@ -146,7 +146,7 @@ task GetAnalysisFileMetadata {
String? ss2_bam_file # Individual bam file to be used for intermediate analysis file for ss2 runs
String? ss2_bai_file # Individual bai file to be used for intermediate analysis file for ss2 runs
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = if defined(input_file) then ceil(size(input_file, "MiB")) else 2000
Int disk_size_gb = if defined(input_file) then ceil(size(input_file, "GiB")) else 5
Expand Down Expand Up @@ -200,7 +200,7 @@ task GetAnalysisProcessMetadata {
Boolean project_level
Int? ss2_index
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = ceil((size(input_file, "MiB"))) + 2000
Int disk_size_gb = ceil((size(input_file, "GiB"))) + 3
Expand Down Expand Up @@ -238,7 +238,7 @@ task GetAnalysisProtocolMetadata {
String pipeline_version
Boolean project_level
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = 2000
Int disk_size_gb = 5
Expand Down Expand Up @@ -285,7 +285,7 @@ task GetLinksFileMetadata {
Array[String]? fastq1_array
Array[String]? fastq2_array
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = ceil(size(output_file_path, "MiB"))
Int disk_size_gb = ceil(size(output_file_path, "GiB"))
Expand Down Expand Up @@ -368,7 +368,7 @@ task GetFileDescriptor {
String file_path_string # Must be the gs:// file path to properly hash and size
File file_path
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = ceil(size(file_path, "MiB")) + 2000
Int disk_size_gb = ceil(size(file_path, "GiB")) + 5
Expand Down Expand Up @@ -416,7 +416,7 @@ task GetReferenceFileMetadata {
String version_timestamp
String reference_version
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = 2000
Int disk_size_gb = 5
Expand Down Expand Up @@ -479,7 +479,7 @@ task ParseCromwellMetadata {
File cromwell_metadata
String pipeline_type
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = ceil((size(cromwell_metadata, "MiB")))
Int disk_size_gb = ceil(size(cromwell_metadata, "GiB"))
Expand Down Expand Up @@ -510,7 +510,7 @@ task GetReferenceDetails {
File ref_fasta
String species
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = ceil((size(ref_fasta, "MiB")) * 2) + 1000
Int disk_size_gb = ceil((size(ref_fasta, "GiB") * 2)) + 5
Expand Down Expand Up @@ -542,7 +542,7 @@ task GetProjectLevelInputIds {
input {
Array[File] intermediate_analysis_files
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = ceil((size(intermediate_analysis_files, "MiB")) * 2) + 1000
Int disk_size_gb = ceil((size(intermediate_analysis_files, "GiB") * 2)) + 5
Expand Down Expand Up @@ -575,10 +575,11 @@ task CopyToStagingBucket {
Array[String] data_objects
Array[String] reference_metadata_objects
Array[String] reference_file_descriptor_objects
String is_update_file
String staging_bucket
String? cache_invalidate
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = 20000
Int disk_size_gb = 50
Expand Down Expand Up @@ -609,6 +610,7 @@ task CopyToStagingBucket {
printf '%s\n' "${DATA_FILES[@]}" | jq -R . | jq -s . > $TMP_DIR/data_files.json
printf '%s\n' "${REFERENCE_METADATA[@]}" | jq -R . | jq -s . > $TMP_DIR/reference_metadata.json
printf '%s\n' "${REFERENCE_FILE_DESCRIPTORS[@]}" | jq -R . | jq -s . > $TMP_DIR/reference_file_descriptors.json
printf '%s\n' ~{is_update_file} | jq -R . | jq -s . > $TMP_DIR/is_update.json
copy-adapter-outputs \
--analysis_files_metadata_jsons "$TMP_DIR/analysis_file_metadata.json" \
Expand All @@ -619,6 +621,7 @@ task CopyToStagingBucket {
--data_files "$TMP_DIR/data_files.json" \
--reference_metadata_jsons "$TMP_DIR/reference_metadata.json" \
--reference_file_descriptor_jsons "$TMP_DIR/reference_file_descriptors.json" \
--is_update_file "$TMP_DIR/is_update.json" \
--staging-bucket ~{staging_bucket}
>>>
Expand Down Expand Up @@ -663,7 +666,7 @@ task GetBucketCreationDate {
input {
String bucket_path
String docker = "us.gcr.io/broad-gotc-prod/pipeline-tools:latest"
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = 1000
Int disk_size_gb = 1
Expand Down Expand Up @@ -692,3 +695,34 @@ task GetBucketCreationDate {
disks: "local-disk ${disk_size_gb} HDD"
}
}
# See spec https://github.com/HumanCellAtlas/dcp2/blob/main/docs/dcp2_system_design.rst#311staging-area-properties
task CreateStagingAreaFile {
input {
Boolean is_update
String docker = "us.gcr.io/broad-gotc-prod/hca-adapter-tools:main_1635791030"
Int cpu = 1
Int memory_mb = 1000
Int disk_size_gb = 1
}
command
<<<
cat <<EOF > staging_area.json
{ "is_delta" : ~{is_update} }
EOF
>>>
runtime {
docker: docker
cpu: cpu
memory: "${memory_mb} MiB"
disks: "local-disk ${disk_size_gb} HDD"
}
output{
File is_update_file = "staging_area.json"
}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit d49b06a

Please sign in to comment.