Skip to content

Commit

Permalink
Some Cleanups.
Browse files Browse the repository at this point in the history
1) Don't pass down the autocall_version - rather return it from autocall (this is in there for the special case where autocall fails and doesn't return its version, but we still wanted to track it).
2) Break UploadEmptyArrayMetrics into its own method so that there are less exposed task level optional inputs.
3) Modify vault token handling. Localize token to the VM and then read it directly into an environment variable to prevent the token ever being viewable in the metadata, etc.
  • Loading branch information
gbggrant committed Oct 1, 2021
1 parent 39146f3 commit 811dc3a
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 49 deletions.
2 changes: 1 addition & 1 deletion pipelines/broad/arrays/single_sample/Arrays.changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# 2.4.3
2021-09-28
2021-10-01

* Enabled pipeline to lookup the extended_illumina_manifest_file using an alternate method
* If the path to the file is not provided, it will look in the arrays_metadata_path for a map file that contains a mapping of chip to extended_illumina_manifest
Expand Down
44 changes: 17 additions & 27 deletions pipelines/broad/arrays/single_sample/Arrays.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@ workflow Arrays {

String pipeline_version = "2.4.3"

# This is the autocall_version, needed for the case where autocall fails (likely due to normalization errors)
# In this case it no longer emits the version in its output, so we store it here.
String autocall_version = "3.0.0"

input {
String chip_well_barcode
Int? analysis_version_number
Expand Down Expand Up @@ -122,24 +118,10 @@ workflow Arrays {

String service_account_filename = "service-account.json"

# Authorization block to be sourced by Mercury-accessing tasks.
# Sets up access to vault, key to read Mercury authentication, Mercury FP storage URL
Array[String] mercury_auth_block = [
"export VAULT_ADDR=https://clotho.broadinstitute.org:8200",
"export VAULT_TOKEN=~{read_lines(vault_token_path)[0]}",
"if [ ~{environment} == prod ]; then",
" export MERCURY_AUTH_KEY=secret/dsde/gotc/prod/wdl/secrets",
" export MERCURY_FP_STORE_URI=https://portals.broadinstitute.org/portal/mercury-ws/fingerprint",
"else",
" export MERCURY_AUTH_KEY=secret/dsde/gotc/dev/wdl/secrets",
" export MERCURY_FP_STORE_URI=https://portals.broadinstitute.org/portal-test/mercury-ws/fingerprint",
"fi"]

# Authorization block to be sourced by tasks that access cloud SQL database
# Sets up access to vault, reads authentication information from vault, sets permissions for accessing cloud sql.
Array[String] authentication_block = [
"export VAULT_ADDR=https://clotho.broadinstitute.org:8200",
"export VAULT_TOKEN=~{read_lines(vault_token_path)[0]}",
"declare -r secrets=secret/dsde/gotc/~{environment}/metrics/wdl/secrets",
"for field in user password jdbc_string",
"do",
Expand Down Expand Up @@ -304,6 +286,7 @@ workflow Arrays {
input:
chip_well_barcode = chip_well_barcode,
preemptible_tries = preemptible_tries,
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -327,8 +310,9 @@ workflow Arrays {
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
ref_dict = ref_dict,
preemptible_tries = preemptible_tries,
source_block = mercury_auth_block
environment = environment,
vault_token_path = vault_token_path,
preemptible_tries = preemptible_tries
}
}

Expand All @@ -339,6 +323,7 @@ workflow Arrays {
params_file = select_first([CreateChipWellBarcodeParamsFile.params_file, params_file]),
disk_size = disk_size,
preemptible_tries = preemptible_tries,
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -347,7 +332,6 @@ workflow Arrays {

call IlluminaGenotyping.IlluminaGenotypingArray as IlluminaGenotypingArray {
input:
autocall_version = autocall_version,
sample_alias = sample_alias,
analysis_version_number = analysis_version,
call_rate_threshold = call_rate_threshold,
Expand Down Expand Up @@ -388,18 +372,19 @@ workflow Arrays {
sample_alias = sample_alias,
chip_type = chip_type,
reported_gender = reported_gender,
autocall_version = autocall_version,
autocall_version = IlluminaGenotypingArray.autocall_version,
output_metrics_basename = sample_alias,
cluster_filename = egt_filename,
analysis_version_number = analysis_version,
preemptible_tries = preemptible_tries
}

call InternalArraysTasks.UploadArraysMetrics as UploadEmptyArraysMetrics {
call InternalArraysTasks.UploadEmptyArraysMetrics {
input:
arrays_variant_calling_detail_metrics = GenerateEmptyVariantCallingMetricsFile.detail_metrics,
disk_size = disk_size,
preemptible_tries = preemptible_tries,
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -412,6 +397,7 @@ workflow Arrays {
preemptible_tries = preemptible_tries,
reason = "DATA_QUALITY",
notes = "Normalization Failed",
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -435,8 +421,9 @@ workflow Arrays {
input:
fingerprint_json_file = VcfToMercuryFingerprintJson.output_json_file,
gtc_file = IlluminaGenotypingArray.gtc,
preemptible_tries = preemptible_tries,
source_block = mercury_auth_block
environment = environment,
vault_token_path = vault_token_path,
preemptible_tries = preemptible_tries
}
}

Expand All @@ -453,8 +440,8 @@ workflow Arrays {
call InternalArraysTasks.UploadArraysMetrics {
input:
arrays_variant_calling_detail_metrics = select_first([IlluminaGenotypingArray.arrays_variant_calling_detail_metrics]),
arrays_variant_calling_summary_metrics = IlluminaGenotypingArray.arrays_variant_calling_summary_metrics,
arrays_control_code_summary_metrics = IlluminaGenotypingArray.arrays_variant_calling_control_metrics,
arrays_variant_calling_summary_metrics = select_first([IlluminaGenotypingArray.arrays_variant_calling_summary_metrics]),
arrays_control_code_summary_metrics = select_first([IlluminaGenotypingArray.arrays_variant_calling_control_metrics]),
fingerprinting_detail_metrics = IlluminaGenotypingArray.fingerprint_detail_metrics,
fingerprinting_summary_metrics = IlluminaGenotypingArray.fingerprint_summary_metrics,
genotype_concordance_summary_metrics = IlluminaGenotypingArray.genotype_concordance_summary_metrics,
Expand All @@ -464,6 +451,7 @@ workflow Arrays {
bafregress_metrics = CreateBafRegressMetricsFile.output_metrics_file,
disk_size = disk_size,
preemptible_tries = preemptible_tries,
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -477,6 +465,7 @@ workflow Arrays {
preemptible_tries = preemptible_tries,
reason = "GENOTYPE_CONCORDANCE",
notes = "Genotype concordance below threshold: ~{genotype_concordance_threshold}",
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand All @@ -492,6 +481,7 @@ workflow Arrays {
preemptible_tries = preemptible_tries,
reason = "SAMPLE_MIXUP",
notes = "Fingerprint LOD below -3.0",
vault_token_path = vault_token_path,
authentication = authentication_block,
service_account_filename = service_account_filename
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# 1.13.4
2021-09-28
2021-10-01

* Task wdls used by ValidateChip were updated with changes that don't affect ValidateChip wdl

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 1.11.6
2021-10-01

* Changed the way the version of autocall is returned for the case of arrays that fail gencall

# 1.11.5
2021-09-08

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,9 @@ import "../../../../tasks/broad/IlluminaGenotypingArrayTasks.wdl" as GenotypingT
workflow IlluminaGenotypingArray {

String pipeline_version = "1.11.5"
String pipeline_version = "1.11.6"

input {

# This is the autocall_version, needed for the case where autocall fails (likely due to normalization errors)
# In this case it no longer emits the version in its output, so we store it here.
String autocall_version = "3.0.0"
String sample_alias
Int analysis_version_number
Float call_rate_threshold
Expand Down Expand Up @@ -337,6 +333,7 @@ workflow IlluminaGenotypingArray {
output {
String chip_well_barcode_output = chip_well_barcode
Int analysis_version_number_output = analysis_version_number
String autocall_version = AutoCall.autocall_version
File gtc = AutoCall.gtc_file
File red_idat_md5_cloud_path = RedIdatMd5Sum.md5_cloud_path
File green_idat_md5_cloud_path = GreenIdatMd5Sum.md5_cloud_path
Expand Down
8 changes: 8 additions & 0 deletions tasks/broad/IlluminaGenotypingArrayTasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,13 @@ task AutoCall {

String gtc_filename = "~{chip_well_barcode}.gtc"

# This is the autocall_version, It is normally output by autocall (gencall) itself, except for the case
# where autocall fails (likely due to normalization errors)
# In this case it no longer emits the version in its output, so we have it here so that it can be output and
# stored in the database.
# NB - this should be returned from the docker ideally.
String autocall_ver = "3.0.0"

command <<<
set -e
rm -rf ~{chip_well_barcode}
Expand Down Expand Up @@ -550,6 +557,7 @@ task AutoCall {

output {
File gtc_file = gtc_filename
String autocall_version = autocall_ver
}
}

Expand Down
73 changes: 64 additions & 9 deletions tasks/broad/InternalArraysTasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ task BlacklistBarcode {
String chip_well_barcode
Int analysis_version_number
Int preemptible_tries
File vault_token_path
Array[String] authentication
String service_account_filename
String reason
Expand All @@ -56,6 +57,7 @@ task BlacklistBarcode {
command <<<
set -eo pipefail

export VAULT_TOKEN=$(cat ~{vault_token_path})
AUTH=~{write_lines(authentication)} && chmod +x $AUTH && $AUTH
export GOOGLE_APPLICATION_CREDENTIALS=/cromwell_root/~{service_account_filename}

Expand Down Expand Up @@ -164,8 +166,8 @@ task CreateBafRegressMetricsFile {
task UploadArraysMetrics {
input {
File arrays_variant_calling_detail_metrics
File? arrays_variant_calling_summary_metrics
File? arrays_control_code_summary_metrics
File arrays_variant_calling_summary_metrics
File arrays_control_code_summary_metrics
File? fingerprinting_detail_metrics
File? fingerprinting_summary_metrics
File? genotype_concordance_summary_metrics
Expand All @@ -174,6 +176,7 @@ task UploadArraysMetrics {
File? verify_id_metrics
File? bafregress_metrics

File vault_token_path
Array[String] authentication
String service_account_filename

Expand All @@ -188,13 +191,18 @@ task UploadArraysMetrics {
command <<<
set -eo pipefail

export VAULT_TOKEN=$(cat ~{vault_token_path})
AUTH=~{write_lines(authentication)} && chmod +x $AUTH && $AUTH
export GOOGLE_APPLICATION_CREDENTIALS=/cromwell_root/~{service_account_filename}

rm -rf metrics_upload_dir &&
mkdir metrics_upload_dir &&

# check that files are passed in before copying them -- [ -z FILE ] evaluates to true if FILE not there
cp ~{arrays_control_code_summary_metrics} metrics_upload_dir
cp ~{arrays_variant_calling_detail_metrics} metrics_upload_dir
cp ~{arrays_variant_calling_summary_metrics} metrics_upload_dir

# check that optional files exist before copying them -- [ -z FILE ] evaluates to true if FILE not there
! [ -z ~{genotype_concordance_summary_metrics} ] &&
cp ~{genotype_concordance_summary_metrics} metrics_upload_dir
! [ -z ~{genotype_concordance_detail_metrics} ] &&
Expand All @@ -211,12 +219,6 @@ task UploadArraysMetrics {
! [ -z ~{fingerprinting_summary_metrics} ] &&
cp ~{fingerprinting_summary_metrics} metrics_upload_dir

cp ~{arrays_variant_calling_detail_metrics} metrics_upload_dir
! [ -z ~{arrays_variant_calling_summary_metrics} ] &&
cp ~{arrays_variant_calling_summary_metrics} metrics_upload_dir

! [ -z ~{arrays_control_code_summary_metrics} ] &&
cp ~{arrays_control_code_summary_metrics} metrics_upload_dir
java -Xms2g -Dpicard.useLegacyParser=false -jar /usr/gitc/picard-private.jar \
UploadArraysMetrics \
--ANALYSIS_DIRECTORY metrics_upload_dir \
Expand All @@ -238,6 +240,55 @@ task UploadArraysMetrics {
}
}

task UploadEmptyArraysMetrics {
input {
File arrays_variant_calling_detail_metrics

File vault_token_path
Array[String] authentication
String service_account_filename

Int disk_size
Int preemptible_tries
}

meta {
volatile: true
}

command <<<
set -eo pipefail

export VAULT_TOKEN=$(cat ~{vault_token_path})
AUTH=~{write_lines(authentication)} && chmod +x $AUTH && $AUTH
export GOOGLE_APPLICATION_CREDENTIALS=/cromwell_root/~{service_account_filename}

rm -rf metrics_upload_dir &&
mkdir metrics_upload_dir &&

cp ~{arrays_variant_calling_detail_metrics} metrics_upload_dir

java -Xms2g -Dpicard.useLegacyParser=false -jar /usr/gitc/picard-private.jar \
UploadArraysMetrics \
--ANALYSIS_DIRECTORY metrics_upload_dir \
--DB_USERNAME_FILE cloudsql.db_user.txt \
--DB_PASSWORD_FILE cloudsql.db_password.txt \
--DB_JDBC_FILE cloudsql.db_jdbc.txt &&
touch empty_file_for_dependency
>>>

runtime {
docker: "us.gcr.io/broad-arrays-prod/arrays-picard-private:4.0.10-1631039849"
disks: "local-disk " + disk_size + " HDD"
memory: "3.5 GiB"
preemptible: preemptible_tries
}

output {
File upload_metrics_empty_file = "empty_file_for_dependency"
}
}

task CreateChipWellBarcodeParamsFile {
input {
String chip_type_name
Expand Down Expand Up @@ -299,6 +350,7 @@ task CreateChipWellBarcodeParamsFile {
task UpdateChipWellBarcodeIndex {
input {
File params_file
File vault_token_path
Array[String] authentication
String service_account_filename
Int disk_size
Expand All @@ -312,6 +364,7 @@ task UpdateChipWellBarcodeIndex {
command <<<
set -eo pipefail

export VAULT_TOKEN=$(cat ~{vault_token_path})
AUTH=~{write_lines(authentication)} && chmod +x $AUTH && $AUTH
export GOOGLE_APPLICATION_CREDENTIALS=/cromwell_root/~{service_account_filename}
java -Xms2g -Dpicard.useLegacyParser=false -jar /usr/gitc/picard-private.jar \
Expand All @@ -335,6 +388,7 @@ task GetNextArraysQcAnalysisVersionNumber {
input {
String chip_well_barcode
Int preemptible_tries
File vault_token_path
Array[String] authentication
String service_account_filename
}
Expand All @@ -346,6 +400,7 @@ task GetNextArraysQcAnalysisVersionNumber {
command <<<
set -eo pipefail

export VAULT_TOKEN=$(cat ~{vault_token_path})
AUTH=~{write_lines(authentication)} && chmod +x $AUTH && $AUTH
export GOOGLE_APPLICATION_CREDENTIALS=/cromwell_root/~{service_account_filename}

Expand Down
Loading

0 comments on commit 811dc3a

Please sign in to comment.