Skip to content

Commit

Permalink
Merge pull request broadinstitute#454 from broadinstitute/gg_MinorCle…
Browse files Browse the repository at this point in the history
…anup

Some cleanups / Improvements in Arrays
  • Loading branch information
gbggrant authored Oct 4, 2021
2 parents 39146f3 + 811dc3a commit 45c474d
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 49 deletions.
2 changes: 1 addition & 1 deletion pipelines/broad/arrays/single_sample/Arrays.changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# 2.4.3
2021-09-28
2021-10-01

* Enabled pipeline to lookup the extended_illumina_manifest_file using an alternate method
* If the path to the file is not provided, it will look in the arrays_metadata_path for a map file that contains a mapping of chip to extended_illumina_manifest
Expand Down
44 changes: 17 additions & 27 deletions pipelines/broad/arrays/single_sample/Arrays.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ workflow Arrays {

String pipeline_version = "2.4.3"

# This is the autocall_version, needed for the case where autocall fails (likely due to normalization errors)
# In this case it no longer emits the version in its output, so we store it here.
String autocall_version = "3.0.0"

input {
String chip_well_barcode
Int? analysis_version_number
Expand Down Expand Up @@ -122,24 +118,10 @@ workflow Arrays {

String service_account_filename = "service-account.json"

# Authorization block to be sourced by Mercury-accessing tasks.
# Sets up access to vault, key to read Mercury authentication, Mercury FP storage URL
Array[String] mercury_auth_block = [
"export VAULT_ADDR=https://clotho.broadinstitute.org:8200",
"export VAULT_TOKEN=~{read_lines(vault_token_path)[0]}",
"if [ ~{environment} == prod ]; then",
" export MERCURY_AUTH_KEY=secret/dsde/gotc/prod/wdl/secrets",
" export MERCURY_FP_STORE_URI=https://portals.broadinstitute.org/portal/mercury-ws/fingerprint",
"else",
" export MERCURY_AUTH_KEY=secret/dsde/gotc/dev/wdl/secrets",
" export MERCURY_FP_STORE_URI=https://portals.broadinstitute.org/portal-test/mercury-ws/fingerprint",
"fi"]

# Authorization block to be sourced by tasks that access cloud SQL database
# Sets up access to vault, reads authentication information from vault, sets permissions for accessing cloud sql.
Array[String] authentication_block = [
"export VAULT_ADDR=https://clotho.broadinstitute.org:8200",
"export VAULT_TOKEN=~{read_lines(vault_token_path)[0]}",
"declare -r secrets=secret/dsde/gotc/~{environment}/metrics/wdl/secrets",
"for field in user password jdbc_string",
"do",
Expand Down Expand Up @@ -304,6 +286,7 @@ workflow Arrays {
input:
chip_well_barcode = chip_well_barcode,
preemptible_tries = preemptible_tries,
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -327,8 +310,9 @@ workflow Arrays {
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
ref_dict = ref_dict,
preemptible_tries = preemptible_tries,
source_block = mercury_auth_block
environment = environment,
vault_token_path = vault_token_path,
preemptible_tries = preemptible_tries
}
}

Expand All @@ -339,6 +323,7 @@ workflow Arrays {
params_file = select_first([CreateChipWellBarcodeParamsFile.params_file, params_file]),
disk_size = disk_size,
preemptible_tries = preemptible_tries,
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -347,7 +332,6 @@ workflow Arrays {

call IlluminaGenotyping.IlluminaGenotypingArray as IlluminaGenotypingArray {
input:
autocall_version = autocall_version,
sample_alias = sample_alias,
analysis_version_number = analysis_version,
call_rate_threshold = call_rate_threshold,
Expand Down Expand Up @@ -388,18 +372,19 @@ workflow Arrays {
sample_alias = sample_alias,
chip_type = chip_type,
reported_gender = reported_gender,
autocall_version = autocall_version,
autocall_version = IlluminaGenotypingArray.autocall_version,
output_metrics_basename = sample_alias,
cluster_filename = egt_filename,
analysis_version_number = analysis_version,
preemptible_tries = preemptible_tries
}

call InternalArraysTasks.UploadArraysMetrics as UploadEmptyArraysMetrics {
call InternalArraysTasks.UploadEmptyArraysMetrics {
input:
arrays_variant_calling_detail_metrics = GenerateEmptyVariantCallingMetricsFile.detail_metrics,
disk_size = disk_size,
preemptible_tries = preemptible_tries,
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -412,6 +397,7 @@ workflow Arrays {
preemptible_tries = preemptible_tries,
reason = "DATA_QUALITY",
notes = "Normalization Failed",
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -435,8 +421,9 @@ workflow Arrays {
input:
fingerprint_json_file = VcfToMercuryFingerprintJson.output_json_file,
gtc_file = IlluminaGenotypingArray.gtc,
preemptible_tries = preemptible_tries,
source_block = mercury_auth_block
environment = environment,
vault_token_path = vault_token_path,
preemptible_tries = preemptible_tries
}
}

Expand All @@ -453,8 +440,8 @@ workflow Arrays {
call InternalArraysTasks.UploadArraysMetrics {
input:
arrays_variant_calling_detail_metrics = select_first([IlluminaGenotypingArray.arrays_variant_calling_detail_metrics]),
arrays_variant_calling_summary_metrics = IlluminaGenotypingArray.arrays_variant_calling_summary_metrics,
arrays_control_code_summary_metrics = IlluminaGenotypingArray.arrays_variant_calling_control_metrics,
arrays_variant_calling_summary_metrics = select_first([IlluminaGenotypingArray.arrays_variant_calling_summary_metrics]),
arrays_control_code_summary_metrics = select_first([IlluminaGenotypingArray.arrays_variant_calling_control_metrics]),
fingerprinting_detail_metrics = IlluminaGenotypingArray.fingerprint_detail_metrics,
fingerprinting_summary_metrics = IlluminaGenotypingArray.fingerprint_summary_metrics,
genotype_concordance_summary_metrics = IlluminaGenotypingArray.genotype_concordance_summary_metrics,
Expand All @@ -464,6 +451,7 @@ workflow Arrays {
bafregress_metrics = CreateBafRegressMetricsFile.output_metrics_file,
disk_size = disk_size,
preemptible_tries = preemptible_tries,
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -477,6 +465,7 @@ workflow Arrays {
preemptible_tries = preemptible_tries,
reason = "GENOTYPE_CONCORDANCE",
notes = "Genotype concordance below threshold: ~{genotype_concordance_threshold}",
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -492,6 +481,7 @@ workflow Arrays {
preemptible_tries = preemptible_tries,
reason = "SAMPLE_MIXUP",
notes = "Fingerprint LOD below -3.0",
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# 1.13.4
2021-09-28
2021-10-01

* Task wdls used by ValidateChip were updated with changes that don't affect ValidateChip wdl

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 1.11.6
2021-10-01

* Changed the way the version of autocall is returned for the case of arrays that fail gencall

# 1.11.5
2021-09-08

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,9 @@ import "../../../../tasks/broad/IlluminaGenotypingArrayTasks.wdl" as GenotypingT
workflow IlluminaGenotypingArray {

String pipeline_version = "1.11.5"
String pipeline_version = "1.11.6"

input {

# This is the autocall_version, needed for the case where autocall fails (likely due to normalization errors)
# In this case it no longer emits the version in its output, so we store it here.
String autocall_version = "3.0.0"
String sample_alias
Int analysis_version_number
Float call_rate_threshold
Expand Down Expand Up @@ -337,6 +333,7 @@ workflow IlluminaGenotypingArray {
output {
String chip_well_barcode_output = chip_well_barcode
Int analysis_version_number_output = analysis_version_number
String autocall_version = AutoCall.autocall_version
File gtc = AutoCall.gtc_file
File red_idat_md5_cloud_path = RedIdatMd5Sum.md5_cloud_path
File green_idat_md5_cloud_path = GreenIdatMd5Sum.md5_cloud_path
Expand Down
8 changes: 8 additions & 0 deletions tasks/broad/IlluminaGenotypingArrayTasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,13 @@ task AutoCall {

String gtc_filename = "~{chip_well_barcode}.gtc"

# This is the autocall_version, It is normally output by autocall (gencall) itself, except for the case
# where autocall fails (likely due to normalization errors)
# In this case it no longer emits the version in its output, so we have it here so that it can be output and
# stored in the database.
# NB - this should be returned from the docker ideally.
String autocall_ver = "3.0.0"

command <<<
set -e
rm -rf ~{chip_well_barcode}
Expand Down Expand Up @@ -550,6 +557,7 @@ task AutoCall {

output {
File gtc_file = gtc_filename
String autocall_version = autocall_ver
}
}

Expand Down
73 changes: 64 additions & 9 deletions tasks/broad/InternalArraysTasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ task BlacklistBarcode {
String chip_well_barcode
Int analysis_version_number
Int preemptible_tries
File vault_token_path
Array[String] authentication
String service_account_filename
String reason
Expand All @@ -56,6 +57,7 @@ task BlacklistBarcode {
command <<<
set -eo pipefail

export VAULT_TOKEN=$(cat ~{vault_token_path})
AUTH=~{write_lines(authentication)} && chmod +x $AUTH && $AUTH
export GOOGLE_APPLICATION_CREDENTIALS=/cromwell_root/~{service_account_filename}

Expand Down Expand Up @@ -164,8 +166,8 @@ task CreateBafRegressMetricsFile {
task UploadArraysMetrics {
input {
File arrays_variant_calling_detail_metrics
File? arrays_variant_calling_summary_metrics
File? arrays_control_code_summary_metrics
File arrays_variant_calling_summary_metrics
File arrays_control_code_summary_metrics
File? fingerprinting_detail_metrics
File? fingerprinting_summary_metrics
File? genotype_concordance_summary_metrics
Expand All @@ -174,6 +176,7 @@ task UploadArraysMetrics {
File? verify_id_metrics
File? bafregress_metrics

File vault_token_path
Array[String] authentication
String service_account_filename

Expand All @@ -188,13 +191,18 @@ task UploadArraysMetrics {
command <<<
set -eo pipefail

export VAULT_TOKEN=$(cat ~{vault_token_path})
AUTH=~{write_lines(authentication)} && chmod +x $AUTH && $AUTH
export GOOGLE_APPLICATION_CREDENTIALS=/cromwell_root/~{service_account_filename}

rm -rf metrics_upload_dir &&
mkdir metrics_upload_dir &&

# check that files are passed in before copying them -- [ -z FILE ] evaluates to true if FILE not there
cp ~{arrays_control_code_summary_metrics} metrics_upload_dir
cp ~{arrays_variant_calling_detail_metrics} metrics_upload_dir
cp ~{arrays_variant_calling_summary_metrics} metrics_upload_dir

# check that optional files exist before copying them -- [ -z FILE ] evaluates to true if FILE not there
! [ -z ~{genotype_concordance_summary_metrics} ] &&
cp ~{genotype_concordance_summary_metrics} metrics_upload_dir
! [ -z ~{genotype_concordance_detail_metrics} ] &&
Expand All @@ -211,12 +219,6 @@ task UploadArraysMetrics {
! [ -z ~{fingerprinting_summary_metrics} ] &&
cp ~{fingerprinting_summary_metrics} metrics_upload_dir

cp ~{arrays_variant_calling_detail_metrics} metrics_upload_dir
! [ -z ~{arrays_variant_calling_summary_metrics} ] &&
cp ~{arrays_variant_calling_summary_metrics} metrics_upload_dir

! [ -z ~{arrays_control_code_summary_metrics} ] &&
cp ~{arrays_control_code_summary_metrics} metrics_upload_dir
java -Xms2g -Dpicard.useLegacyParser=false -jar /usr/gitc/picard-private.jar \
UploadArraysMetrics \
--ANALYSIS_DIRECTORY metrics_upload_dir \
Expand All @@ -238,6 +240,55 @@ task UploadArraysMetrics {
}
}

task UploadEmptyArraysMetrics {
input {
File arrays_variant_calling_detail_metrics

File vault_token_path
Array[String] authentication
String service_account_filename

Int disk_size
Int preemptible_tries
}

meta {
volatile: true
}

command <<<
set -eo pipefail

export VAULT_TOKEN=$(cat ~{vault_token_path})
AUTH=~{write_lines(authentication)} && chmod +x $AUTH && $AUTH
export GOOGLE_APPLICATION_CREDENTIALS=/cromwell_root/~{service_account_filename}

rm -rf metrics_upload_dir &&
mkdir metrics_upload_dir &&

cp ~{arrays_variant_calling_detail_metrics} metrics_upload_dir

java -Xms2g -Dpicard.useLegacyParser=false -jar /usr/gitc/picard-private.jar \
UploadArraysMetrics \
--ANALYSIS_DIRECTORY metrics_upload_dir \
--DB_USERNAME_FILE cloudsql.db_user.txt \
--DB_PASSWORD_FILE cloudsql.db_password.txt \
--DB_JDBC_FILE cloudsql.db_jdbc.txt &&
touch empty_file_for_dependency
>>>

runtime {
docker: "us.gcr.io/broad-arrays-prod/arrays-picard-private:4.0.10-1631039849"
disks: "local-disk " + disk_size + " HDD"
memory: "3.5 GiB"
preemptible: preemptible_tries
}

output {
File upload_metrics_empty_file = "empty_file_for_dependency"
}
}

task CreateChipWellBarcodeParamsFile {
input {
String chip_type_name
Expand Down Expand Up @@ -299,6 +350,7 @@ task CreateChipWellBarcodeParamsFile {
task UpdateChipWellBarcodeIndex {
input {
File params_file
File vault_token_path
Array[String] authentication
String service_account_filename
Int disk_size
Expand All @@ -312,6 +364,7 @@ task UpdateChipWellBarcodeIndex {
command <<<
set -eo pipefail

export VAULT_TOKEN=$(cat ~{vault_token_path})
AUTH=~{write_lines(authentication)} && chmod +x $AUTH && $AUTH
export GOOGLE_APPLICATION_CREDENTIALS=/cromwell_root/~{service_account_filename}
java -Xms2g -Dpicard.useLegacyParser=false -jar /usr/gitc/picard-private.jar \
Expand All @@ -335,6 +388,7 @@ task GetNextArraysQcAnalysisVersionNumber {
input {
String chip_well_barcode
Int preemptible_tries
File vault_token_path
Array[String] authentication
String service_account_filename
}
Expand All @@ -346,6 +400,7 @@ task GetNextArraysQcAnalysisVersionNumber {
command <<<
set -eo pipefail

export VAULT_TOKEN=$(cat ~{vault_token_path})
AUTH=~{write_lines(authentication)} && chmod +x $AUTH && $AUTH
export GOOGLE_APPLICATION_CREDENTIALS=/cromwell_root/~{service_account_filename}

Expand Down
Loading

0 comments on commit 45c474d

Please sign in to comment.