diff --git a/.dockstore.yml b/.dockstore.yml index 5797c6abdd..5793c254c1 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -61,3 +61,7 @@ workflows: subclass: WDL primaryDescriptorPath: /pipelines/broad/arrays/single_sample/Arrays.wdl + - name: Imputation + subclass: WDL + primaryDescriptorPath: /pipelines/broad/arrays/imputation/Imputation.wdl + diff --git a/dockers/broad/imputation/build_push_docker.sh b/dockers/broad/imputation/build_push_docker.sh new file mode 100644 index 0000000000..cd837173dd --- /dev/null +++ b/dockers/broad/imputation/build_push_docker.sh @@ -0,0 +1,119 @@ +set -e + +while test $# -gt 0; do + case "$1" in + -h|--help) + echo "Build and push a docker image from specified Dockerfile." + echo "Call from anywhere in the palantir-workflows repo. Supply the following options:" + echo "Usage: ./build_push_docker.sh [OPTIONS]" + echo "Options:" + echo " --directory/-d (required) directory containing the Dockerfile (eg. imputation_eagle_docker)" + echo " --image-tag/-i (required) tag for this image (suggest using the image version)" + echo " --ubuntu-version/-u (optional) version of ubuntu base image (defaults to 20.04)" + echo " --no-push/-p (optional) build but do not push" + echo " --dry-run/-r (optional) dry run (no build or push; can use to inspect variables)" + echo " --no-cache/-c (optional) build docker image with no cache" + exit 1 + ;; + -d|--directory) + shift + DOCKER_DIR=$1 + shift + ;; + -i|--image-version-tag) + shift + IMG_TAG=$1 + shift + ;; + -u|--ubuntu-version) + shift + UBUNTU=$1 + shift + ;; + -r|--dry-run) + shift + DRY="true" + ;; + -p|--no-push) + shift + PUSH="false" + ;; + -c|--no-cache) + shift + NOCACHE="--no-cache" + ;; + *) + echo "Invalid argument. Use --help or -h flags for usage information." + exit 1 + ;; + esac +done + + +if [[ -z $DOCKER_DIR ]]; then + echo "No docker path specified. Please specify a directory containing a dockerfile with -d or --directory." + exit 1 +fi + +# check for missing arguments, fill defaults +if [[ -z $UBUNTU ]]; then + echo "No ubuntu version specified. Using ${UBUNTU} as default." + UBUNTU=20.04 +fi +if [[ -z $IMG_TAG ]]; then + echo "No image version specified. Please specify an image version with -i or --image-version." + exit 1 +fi +[[ -z "${DRY}" ]] && DRY=false +[[ -z "${PUSH}" ]] && PUSH=true + + +docker_dir=$(basename ${DOCKER_DIR}) +docker_path=$(find .. -type d -name "${docker_dir}") +image_name=us.gcr.io/broad-dsde-methods/${docker_dir}:${IMG_TAG} +while true; do + if [[ "${DRY}" == "true" ]]; then + break; + fi + echo "This script will build and push ${image_name}. Do you want to proceed? (y/[n])" + read yn + [[ -z ${yn} ]] && yn=n + case $yn in + [Yy]* ) break;; + [Nn]* ) exit 1;; + * ) echo "Please answer yes or no.";; + esac +done + +# Execute commands only if this is not a dry run +function execute(){ + # Irrespective of whether dry run is enabled or not, we display + # the command on the screen + # shellcheck disable=SC2145 + echo "COMMAND: ${@}" + # if dry run is enabled then simply return + if [[ ${DRY} == "false" ]]; then + eval "$@" + fi +} + +# Check the docker path + + +echo "Directory: ${docker_path}" +if ! [[ $(find "${docker_path}" -name Dockerfile) ]]; then + echo "No Dockerfile found in this directory." + exit 1 +fi + + +image_name=us.gcr.io/broad-dsde-methods/${docker_dir}:${IMG_TAG} + +echo "Ubuntu version: ${UBUNTU}" +echo "Image version tag: ${IMG_TAG}" + + + +build_opts="-t ${image_name} --build-arg UBUNTU_VERSION=${UBUNTU} ${NOCACHE}" +execute "docker build ${docker_path} ${build_opts}" +execute "docker push ${image_name}" \ No newline at end of file diff --git a/dockers/broad/imputation/imputation_bcftools_vcftools/Dockerfile b/dockers/broad/imputation/imputation_bcftools_vcftools/Dockerfile new file mode 100644 index 0000000000..8c989a1f0e --- /dev/null +++ b/dockers/broad/imputation/imputation_bcftools_vcftools/Dockerfile @@ -0,0 +1,40 @@ +# default ubuntu version: 20.04 +ARG UBUNTU_VERSION=20.04 +FROM ubuntu:${UBUNTU_VERSION} + +# the following argument is needed for pkg-config to +# install without hanging on user input +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get install -y wget \ + && apt-get install -y libgomp1 \ + && apt-get install -y build-essential \ + && apt-get install -y libz-dev \ + && apt-get install -y libbz2-dev \ + && apt-get install -y liblzma-dev \ + && apt-get install -y pkg-config \ + && apt-get install -y tabix \ + && apt-get install -y python3 \ + && apt-get install -y python3-pip \ + && rm -rf /var/lib/apt/lists/* + +RUN wget https://github.com/samtools/bcftools/releases/download/1.10.2/bcftools-1.10.2.tar.bz2 \ + && tar xf bcftools-1.10.2.tar.bz2 \ + && cd bcftools-1.10.2 \ + && ./configure \ + && make \ + && make install \ + && cd ../ \ + && rm -r bcftools-1.10.2.tar.bz2 + +RUN wget https://github.com/vcftools/vcftools/releases/download/v0.1.16/vcftools-0.1.16.tar.gz \ + && tar xf vcftools-0.1.16.tar.gz \ + && cd vcftools-0.1.16 \ + && ./configure \ + && make \ + && make install \ + && cd ../ \ + && rm -r vcftools-0.1.16.tar.gz + +RUN pip3 install docopt diff --git a/dockers/broad/imputation/imputation_bcftools_vcftools/build_push_vcftoolsbcftools_docker.sh b/dockers/broad/imputation/imputation_bcftools_vcftools/build_push_vcftoolsbcftools_docker.sh new file mode 100644 index 0000000000..aea18c441c --- /dev/null +++ b/dockers/broad/imputation/imputation_bcftools_vcftools/build_push_vcftoolsbcftools_docker.sh @@ -0,0 +1,19 @@ +# Build docker image from Dockerfile in this directory +# This is to help keep track of versions, etc. +# Wraps around ImputationPipeline/build_push_docker.sh. + +dockerfile_directory=imputation_bcftools_vcftools_docker +image_version=v1.0.0 # as of Jan 25 2021 +ubuntu_version=20.04 # as of Jan 25 2021 + +wd=$(pwd) +cd "$(dirname $0)" || exit + +../build_push_docker.sh \ + --directory ${dockerfile_directory} \ + --ubuntu-version ${ubuntu_version} \ + --image-version-tag ${image_version} \ +# --dry-run + + +cd "${wd}" || exit diff --git a/dockers/broad/imputation/imputation_eagle_docker/Dockerfile b/dockers/broad/imputation/imputation_eagle_docker/Dockerfile new file mode 100644 index 0000000000..54a93a2130 --- /dev/null +++ b/dockers/broad/imputation/imputation_eagle_docker/Dockerfile @@ -0,0 +1,31 @@ +# default ubuntu version: 20.04 +ARG UBUNTU_VERSION=20.04 +FROM ubuntu:${UBUNTU_VERSION} + +# the following argument is needed for pkg-config to +# install without hanging on user input +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get install -y wget \ + && apt-get install -y libgomp1 \ + && apt-get install -y build-essential \ + && apt-get install -y libz-dev \ + && apt-get install -y libbz2-dev \ + && apt-get install -y liblzma-dev \ + && apt-get install -y pkg-config \ + && apt-get install -y tabix \ + && apt-get install -y python3 \ + && apt-get install -y python3-pip \ + && rm -rf /var/lib/apt/lists/* + +#Should be same eagle version as Michigan imputation server +ARG EAGLE_VERSION=2.4 +# RUN wget https://github.com/genepi/imputationserver/raw/master/files/bin/eagle +RUN wget https://storage.googleapis.com/broad-alkesgroup-public/Eagle/downloads/old/Eagle_v${EAGLE_VERSION}.tar.gz +RUN tar xf Eagle_v2.4.tar.gz +RUN mv Eagle_v2.4/eagle . +RUN rm -r Eagle_v2.4.tar.gz +RUN ["chmod", "+x", "eagle"] + +RUN wget https://data.broadinstitute.org/alkesgroup/Eagle/downloads/tables/genetic_map_hg19_withX.txt.gz diff --git a/dockers/broad/imputation/imputation_eagle_docker/build_push_eagle_docker.sh b/dockers/broad/imputation/imputation_eagle_docker/build_push_eagle_docker.sh new file mode 100644 index 0000000000..5b68bb3c6a --- /dev/null +++ b/dockers/broad/imputation/imputation_eagle_docker/build_push_eagle_docker.sh @@ -0,0 +1,19 @@ +# Build docker image from Dockerfile in this directory +# This is to help keep track of versions, etc. +# Wraps around ImputationPipeline/build_push_docker.sh. + +dockerfile_directory=imputation_eagle_docker +image_version=v1.0.0 # as of Jan 25 2021 +ubuntu_version=20.04 # as of Jan 25 2021 + +wd=$(pwd) +cd "$(dirname $0)" || exit + +../build_push_docker.sh \ + --directory ${dockerfile_directory} \ + --ubuntu-version ${ubuntu_version} \ + --image-version-tag ${image_version} \ +# --dry-run + + +cd "${wd}" || exit \ No newline at end of file diff --git a/dockers/broad/imputation/imputation_minimac_docker/Dockerfile b/dockers/broad/imputation/imputation_minimac_docker/Dockerfile new file mode 100644 index 0000000000..f222ee4f96 --- /dev/null +++ b/dockers/broad/imputation/imputation_minimac_docker/Dockerfile @@ -0,0 +1,39 @@ +# default ubuntu version: 20.04 +ARG UBUNTU_VERSION=20.04 +FROM ubuntu:${UBUNTU_VERSION} + +# the following argument is needed for pkg-config to +# install without hanging on user input +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get install -y wget \ + && apt-get install -y libgomp1 \ + && apt-get install -y build-essential \ + && apt-get install -y libz-dev \ + && apt-get install -y libbz2-dev \ + && apt-get install -y liblzma-dev \ + && apt-get install -y pkg-config \ + && apt-get install -y tabix \ + && apt-get install -y python3 \ + && apt-get install -y python3-pip \ + && apt-get install -y software-properties-common cmake \ + && rm -rf /var/lib/apt/lists/* + +RUN pip3 install cget +RUN wget https://github.com/statgen/Minimac4/archive/v1.0.2.tar.gz \ + && tar xf v1.0.2.tar.gz \ + && cd Minimac4-1.0.2 \ + && bash install.sh \ + && cp release-build/minimac4 .. \ + && cd .. \ + && rm -r v1.0.2.tar.gz + +RUN wget https://github.com/samtools/bcftools/releases/download/1.10.2/bcftools-1.10.2.tar.bz2 \ + && tar xf bcftools-1.10.2.tar.bz2 \ + && cd bcftools-1.10.2 \ + && ./configure \ + && make \ + && make install \ + && cd ../ \ + && rm -r bcftools-1.10.2.tar.bz2 diff --git a/dockers/broad/imputation/imputation_minimac_docker/build_push_minimac_docker.sh b/dockers/broad/imputation/imputation_minimac_docker/build_push_minimac_docker.sh new file mode 100644 index 0000000000..61b444e8b7 --- /dev/null +++ b/dockers/broad/imputation/imputation_minimac_docker/build_push_minimac_docker.sh @@ -0,0 +1,19 @@ +# Build docker image from Dockerfile in this directory +# This is to help keep track of versions, etc. +# Wraps around ImputationPipeline/build_push_docker.sh. + +dockerfile_directory=imputation_minimac_docker +image_version=v1.0.0 # as of Jan 25 2021 +ubuntu_version=20.04 # as of Jan 25 2021 + +wd=$(pwd) +cd "$(dirname $0)" || exit + +../build_push_docker.sh \ + --directory ${dockerfile_directory} \ + --ubuntu-version ${ubuntu_version} \ + --image-version-tag ${image_version} \ +# --dry-run + + +cd "${wd}" || exit \ No newline at end of file diff --git a/pipelines/broad/arrays/imputation/Imputation.changelog.md b/pipelines/broad/arrays/imputation/Imputation.changelog.md new file mode 100644 index 0000000000..76f16b5aee --- /dev/null +++ b/pipelines/broad/arrays/imputation/Imputation.changelog.md @@ -0,0 +1,8 @@ +# 1.0.0 + +2021-09-09 (Date of Last Commit) + +* Initial public release of the Imputation pipeline. Read more in the [Imputation pipeline overview](https://broadinstitute.github.io/warp/docs/Pipelines/Imputation_Pipeline/README). + + + * The Imputation pipeline imputes missing genotypes from either a multi-sample VCF or an array of single sample VCFs using a large genomic reference panel. It is based on the Michigan Imputation Server pipeline. Overall, the pipeline filters, phases, and performs imputation on a multi-sample VCF. It outputs the imputed VCF along with key imputation metrics. \ No newline at end of file diff --git a/pipelines/broad/arrays/imputation/Imputation.options.json b/pipelines/broad/arrays/imputation/Imputation.options.json new file mode 100644 index 0000000000..0253eeff3c --- /dev/null +++ b/pipelines/broad/arrays/imputation/Imputation.options.json @@ -0,0 +1,5 @@ +{ + "read_from_cache": true, + "write_to_cache": true, + "monitoring_script": "gs://broad-gotc-test-storage/cromwell_monitoring_script.sh" +} diff --git a/pipelines/broad/arrays/imputation/Imputation.wdl b/pipelines/broad/arrays/imputation/Imputation.wdl new file mode 100644 index 0000000000..7c7d272ea3 --- /dev/null +++ b/pipelines/broad/arrays/imputation/Imputation.wdl @@ -0,0 +1,328 @@ +version 1.0 + +import "../../../../structs/imputation/ImputationStructs.wdl" as structs +import "../../../../tasks/broad/ImputationTasks.wdl" as tasks +import "../../../../tasks/broad/Utilities.wdl" as utils + +workflow ImputationPipeline { + + String pipeline_version = "1.0.0" + + input { + Int chunkLength = 25000000 + Int chunkOverlaps = 5000000 # this is the padding that will be added to the beginning and end of each chunk to reduce edge effects + + # You can either input a multisample VCF or an array of single sample VCFs + # The pipeline will just merge the single sample VCFs into one multisample VCF + # and then impute the multisample VCF + # If you want to run a single sample VCF, set the multi_sample_vcf input to the + # single sample VCF + File? multi_sample_vcf + File? multi_sample_vcf_index + Array[File]? single_sample_vcfs + Array[File]? single_sample_vcf_indices + + Boolean perform_extra_qc_steps = false # these are optional additional extra QC steps from Amit's group that should only be + # run for large sample sets, especially a diverse set of samples (it's further limiting called at sites to 95% and by HWE) + Float? optional_qc_max_missing + Float? optional_qc_hwe + File ref_dict # for reheadering / adding contig lengths in the header of the ouptut VCF, and calculating contig lengths + Array[ReferencePanelContig] referencePanelContigs + File genetic_maps_eagle + String output_callset_name # the output callset name + Boolean split_output_to_single_sample = false + File haplotype_database + Int merge_ssvcf_mem_gb = 3 # the memory allocation for MergeSingleSampleVcfs (in GiB) + + Float frac_well_imputed_threshold = 0.9 # require fraction of sites well imputed to be greater than this to pass + Int chunks_fail_threshold = 1 # require fewer than this many chunks to fail in order to pass + } + # Docker images here + String bcftools_docker_tag = "us.gcr.io/broad-dsde-methods/imputation_bcftools_vcftools_docker:v1.0.0" + String bcftools_vcftools_docker_tag = "us.gcr.io/broad-dsde-methods/imputation_bcftools_vcftools_docker:v1.0.0" + String gatk_docker_tag = "us.gcr.io/broad-gatk/gatk:4.1.9.0" + String minimac4_docker_tag = "us.gcr.io/broad-dsde-methods/imputation-minimac-docker:v1.0.0" + String eagle_docker_tag = "us.gcr.io/broad-dsde-methods/imputation_eagle_docker:v1.0.0" + String ubuntu_docker_tag = "ubuntu:20.04" + String rtidyverse_docker_tag = "rocker/tidyverse:4.1.0" + + if (defined(single_sample_vcfs) && defined(multi_sample_vcf)) { + call utils.ErrorWithMessage as ErrorMessageDoubleInput{ + input: + message = "single_sample_vcfs and multi_sample_vcf cannot both be defined as input" + } + } + + if (!defined(single_sample_vcfs) && !defined(multi_sample_vcf)) { + call utils.ErrorWithMessage as ErrorMessageNoInput { + input: + message = "One (and only one) of single_sample_vcfs and multi_sample_vcf must be defined as input" + } + } + + if (defined(single_sample_vcfs)) { + call tasks.MergeSingleSampleVcfs { + input: + input_vcfs = select_first([single_sample_vcfs]), + input_vcf_indices = select_first([single_sample_vcf_indices]), + output_vcf_basename = "merged_input_samples", + bcftools_docker = bcftools_docker_tag, + mem = merge_ssvcf_mem_gb + } + } + + File vcf_to_impute = select_first([multi_sample_vcf, MergeSingleSampleVcfs.output_vcf]) + File vcf_index_to_impute = select_first([multi_sample_vcf_index, MergeSingleSampleVcfs.output_vcf_index]) + + call tasks.SetIDs as SetIdsVcfToImpute{ + input: + vcf = vcf_to_impute, + output_basename = "input_samples_with_variant_ids", + bcftools_docker = bcftools_docker_tag + } + + call tasks.ExtractIDs as ExtractIdsVcfToImpute { + input: + vcf = SetIdsVcfToImpute.output_vcf, + output_basename = "imputed_sites", + bcftools_docker = bcftools_docker_tag + } + + call tasks.CountSamples { + input: + vcf = vcf_to_impute, + bcftools_docker = bcftools_docker_tag + } + + scatter (referencePanelContig in referencePanelContigs) { + call tasks.CalculateChromosomeLength { + input: + ref_dict = ref_dict, + chrom = referencePanelContig.contig + } + + Float chunkLengthFloat = chunkLength + Int num_chunks = ceil(CalculateChromosomeLength.chrom_length / chunkLengthFloat) + + scatter (i in range(num_chunks)) { + String chunk_contig = referencePanelContig.contig + Int start = (i * chunkLength) + 1 + Int startWithOverlaps = if (start - chunkOverlaps < 1) then 1 else start - chunkOverlaps + Int end = if (CalculateChromosomeLength.chrom_length < ((i + 1) * chunkLength)) then CalculateChromosomeLength.chrom_length else ((i + 1) * chunkLength) + Int endWithOverlaps = if (CalculateChromosomeLength.chrom_length < end + chunkOverlaps) then CalculateChromosomeLength.chrom_length else end + chunkOverlaps + + call tasks.GenerateChunk { + input: + vcf = vcf_to_impute, + vcf_index = vcf_index_to_impute, + start = startWithOverlaps, + end = endWithOverlaps, + chrom = referencePanelContig.contig, + basename = "chrom_" + referencePanelContig.contig + "_chunk_" + i, + gatk_docker = gatk_docker_tag + } + + if (perform_extra_qc_steps) { + call tasks.OptionalQCSites { + input: + input_vcf = GenerateChunk.output_vcf, + input_vcf_index = GenerateChunk.output_vcf_index, + output_vcf_basename = "chrom_" + referencePanelContig.contig + "_chunk_" + i, + bcftools_vcftools_docker = bcftools_vcftools_docker_tag, + optional_qc_max_missing = optional_qc_max_missing, + optional_qc_hwe = optional_qc_hwe + } + } + + call tasks.CountVariantsInChunks { + input: + vcf = select_first([OptionalQCSites.output_vcf, GenerateChunk.output_vcf]), + vcf_index = select_first([OptionalQCSites.output_vcf_index, GenerateChunk.output_vcf_index]), + panel_vcf = referencePanelContig.vcf, + panel_vcf_index = referencePanelContig.vcf_index, + gatk_docker = gatk_docker_tag + } + call tasks.CheckChunks { + input: + vcf = select_first([OptionalQCSites.output_vcf, GenerateChunk.output_vcf]), + vcf_index = select_first([OptionalQCSites.output_vcf_index, GenerateChunk.output_vcf_index]), + panel_vcf = referencePanelContig.vcf, + panel_vcf_index = referencePanelContig.vcf_index, + var_in_original = CountVariantsInChunks.var_in_original, + var_in_reference = CountVariantsInChunks.var_in_reference, + bcftools_docker = bcftools_docker_tag + } + + if (CheckChunks.valid) { + + call tasks.PhaseVariantsEagle { + input: + dataset_bcf = CheckChunks.valid_chunk_bcf, + dataset_bcf_index = CheckChunks.valid_chunk_bcf_index, + reference_panel_bcf = referencePanelContig.bcf, + reference_panel_bcf_index = referencePanelContig.bcf_index, + chrom = referencePanelContig.contig, + genetic_map_file = genetic_maps_eagle, + eagle_docker = eagle_docker_tag, + start = startWithOverlaps, + end = endWithOverlaps + } + + call tasks.Minimac4 { + input: + ref_panel = referencePanelContig.m3vcf, + phased_vcf = PhaseVariantsEagle.dataset_prephased_vcf, + prefix = "chrom_" + referencePanelContig.contig + "_chunk_" + i +"_imputed", + chrom = referencePanelContig.contig, + minimac4_docker = minimac4_docker_tag, + start = start, + end = end, + window = chunkOverlaps + } + + call tasks.AggregateImputationQCMetrics { + input: + infoFile = Minimac4.info, + nSamples = CountSamples.nSamples, + basename = output_callset_name + "chrom_" + referencePanelContig.contig + "_chunk_" + i, + rtidyverse_docker = rtidyverse_docker_tag + } + + call tasks.UpdateHeader { + input: + vcf = Minimac4.vcf, + vcf_index = Minimac4.vcf_index, + ref_dict = ref_dict, + basename = "chrom_" + referencePanelContig.contig + "_chunk_" + i +"_imputed", + gatk_docker = gatk_docker_tag + } + + call tasks.SeparateMultiallelics { + input: + original_vcf = UpdateHeader.output_vcf, + original_vcf_index = UpdateHeader.output_vcf_index, + output_basename = "chrom" + referencePanelContig.contig + "_chunk_" + i +"_imputed", + bcftools_docker = bcftools_docker_tag + } + + call tasks.RemoveSymbolicAlleles { + input: + original_vcf = SeparateMultiallelics.output_vcf, + original_vcf_index = SeparateMultiallelics.output_vcf_index, + output_basename = "chrom" + referencePanelContig.contig + "_chunk_" + i +"_imputed", + gatk_docker = gatk_docker_tag + } + + call tasks.SetIDs { + input: + vcf = RemoveSymbolicAlleles.output_vcf, + output_basename = "chrom" + referencePanelContig.contig + "_chunk_" + i +"_imputed", + bcftools_docker = bcftools_docker_tag + } + } + } + Array[File] aggregatedImputationMetrics = select_all(AggregateImputationQCMetrics.aggregated_metrics) + Array[File] chromosome_vcfs = select_all(SetIDs.output_vcf) + Array[File] chromosome_vcf_indices = select_all(SetIDs.output_vcf_index) + } + + Array[File] phased_vcfs = flatten(chromosome_vcfs) + Array[File] phased_vcf_indices = flatten(chromosome_vcf_indices) + + call tasks.GatherVcfs { + input: + input_vcfs = phased_vcfs, + input_vcf_indices = phased_vcf_indices, + output_vcf_basename = output_callset_name, + gatk_docker = gatk_docker_tag + } + + call tasks.ExtractIDs { + input: + vcf = GatherVcfs.output_vcf, + output_basename = "imputed_sites", + bcftools_docker = bcftools_docker_tag + } + + call tasks.FindSitesUniqueToFileTwoOnly { + input: + file1 = ExtractIDs.ids, + file2 = ExtractIdsVcfToImpute.ids, + ubuntu_docker = ubuntu_docker_tag + } + + call tasks.SelectVariantsByIds { + input: + vcf = SetIdsVcfToImpute.output_vcf, + ids = FindSitesUniqueToFileTwoOnly.missing_sites, + basename = "imputed_sites_to_recover", + gatk_docker = gatk_docker_tag + } + + call tasks.RemoveAnnotations { + input: + vcf = SelectVariantsByIds.output_vcf, + basename = "imputed_sites_to_recover_annotations_removed", + bcftools_docker = bcftools_docker_tag + } + + call tasks.InterleaveVariants { + input: + vcfs = [RemoveAnnotations.output_vcf, GatherVcfs.output_vcf], + basename = output_callset_name, + gatk_docker = gatk_docker_tag + } + + call tasks.MergeImputationQCMetrics { + input: + metrics = flatten(aggregatedImputationMetrics), + basename = output_callset_name, + rtidyverse_docker = rtidyverse_docker_tag + } + + if (MergeImputationQCMetrics.frac_well_imputed < frac_well_imputed_threshold) { + call utils.ErrorWithMessage as FailQCWellImputedFrac { + input: + message = "Well imputed fraction was " + MergeImputationQCMetrics.frac_well_imputed + ", QC failure threshold was set at " + frac_well_imputed_threshold + } + } + + call tasks.StoreChunksInfo { + input: + chroms = flatten(chunk_contig), + starts = flatten(start), + ends = flatten(end), + vars_in_array = flatten(CountVariantsInChunks.var_in_original), + vars_in_panel = flatten(CountVariantsInChunks.var_in_reference), + valids = flatten(CheckChunks.valid), + basename = output_callset_name, + rtidyverse_docker = rtidyverse_docker_tag + } + + if (StoreChunksInfo.n_failed_chunks >= chunks_fail_threshold) { + call utils.ErrorWithMessage as FailQCNChunks { + input: + message = StoreChunksInfo.n_failed_chunks + " chunks failed imputation, QC threshold was set to " + chunks_fail_threshold + } + } + + if (split_output_to_single_sample) { + call tasks.SplitMultiSampleVcf { + input: + multiSampleVcf = InterleaveVariants.output_vcf, + bcftools_docker = bcftools_docker_tag + } + } + + + output { + Array[File]? imputed_single_sample_vcfs = SplitMultiSampleVcf.single_sample_vcfs + Array[File]? imputed_single_sample_vcf_indices = SplitMultiSampleVcf.single_sample_vcf_indices + File imputed_multisample_vcf = InterleaveVariants.output_vcf + File imputed_multisample_vcf_index = InterleaveVariants.output_vcf_index + File aggregated_imputation_metrics = MergeImputationQCMetrics.aggregated_metrics + File chunks_info = StoreChunksInfo.chunks_info + File failed_chunks = StoreChunksInfo.failed_chunks + Int n_failed_chunks = StoreChunksInfo.n_failed_chunks + } +} \ No newline at end of file diff --git a/pipelines/broad/arrays/imputation/example_inputs.json b/pipelines/broad/arrays/imputation/example_inputs.json new file mode 100644 index 0000000000..3e5c484281 --- /dev/null +++ b/pipelines/broad/arrays/imputation/example_inputs.json @@ -0,0 +1,52 @@ +{ + "ImputationPipeline.single_sample_vcfs": [ + "gs://broad-gotc-test-storage/imputation/plumbing/vcfs/101342370027_R02C01.vcf.gz", + "gs://broad-gotc-test-storage/imputation/plumbing/vcfs/101342370027_R12C02.vcf.gz", + "gs://broad-gotc-test-storage/imputation/plumbing/vcfs/101342370134_R12C02.vcf.gz" + ], + "ImputationPipeline.single_sample_vcf_indices":[ + "gs://broad-gotc-test-storage/imputation/plumbing/vcfs/101342370027_R02C01.vcf.gz.tbi", + "gs://broad-gotc-test-storage/imputation/plumbing/vcfs/101342370027_R12C02.vcf.gz.tbi", + "gs://broad-gotc-test-storage/imputation/plumbing/vcfs/101342370134_R12C02.vcf.gz.tbi" + ], + + "ImputationPipeline.ref_dict": "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.dict", + "ImputationPipeline.referencePanelContigs": [ + { + "vcf": "gs://broad-gotc-test-storage/imputation/1000G_reference_panel/ALL.chr21.phase3_integrated.20130502.genotypes.cleaned.vcf.gz", + "vcf_index": "gs://broad-gotc-test-storage/imputation/1000G_reference_panel/ALL.chr21.phase3_integrated.20130502.genotypes.cleaned.vcf.gz.tbi", + "bcf": "gs://broad-gotc-test-storage/imputation/1000G_reference_panel/ALL.chr21.phase3_integrated.20130502.genotypes.cleaned.bcf", + "bcf_index":"gs://broad-gotc-test-storage/imputation/1000G_reference_panel/ALL.chr21.phase3_integrated.20130502.genotypes.cleaned.bcf.csi", + "m3vcf": "gs://broad-gotc-test-storage/imputation/1000G_reference_panel/ALL.chr21.phase3_integrated.20130502.genotypes.cleaned.cleaned.m3vcf.gz", + "contig": "21" + }, + { + "vcf": "gs://broad-gotc-test-storage/imputation/1000G_reference_panel/ALL.chr22.phase3_integrated.20130502.genotypes.cleaned.vcf.gz", + "vcf_index": "gs://broad-gotc-test-storage/imputation/1000G_reference_panel/ALL.chr22.phase3_integrated.20130502.genotypes.cleaned.vcf.gz.tbi", + "bcf": "gs://broad-gotc-test-storage/imputation/1000G_reference_panel/ALL.chr22.phase3_integrated.20130502.genotypes.cleaned.bcf", + "bcf_index": "gs://broad-gotc-test-storage/imputation/1000G_reference_panel/ALL.chr22.phase3_integrated.20130502.genotypes.cleaned.bcf.csi", + "m3vcf": "gs://broad-gotc-test-storage/imputation/1000G_reference_panel/ALL.chr22.phase3_integrated.20130502.genotypes.cleaned.cleaned.m3vcf.gz", + "contig": "22" + } + ], + "ImputationPipeline.genetic_maps_eagle": "gs://broad-gotc-test-storage/imputation/eagle_genetic_map/genetic_map_hg19_withX.txt.gz", + "ImputationPipeline.output_callset_name": "plumbing_test", + "ImputationPipeline.split_output_to_single_sample": false, + "ImputationPipeline.haplotype_database": "gs://gcp-public-data--broad-references/hg19/v0/Homo_sapiens_assembly19.haplotype_database.txt", + "ImputationPipeline.perform_extra_qc_steps": false, + "ImputationPipeline.optional_qc_max_missing": 0.05, + "ImputationPipeline.optional_qc_hwe": 0.000001 +} + + + + + + + + + + + + + diff --git a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md index f74ece0edd..7b1c63d4ed 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md +++ b/pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.changelog.md @@ -1,8 +1,8 @@ # 2.4.7 2021-09-22 -* Updated Utilities.wdl task definitions to include a new task that is NOT used in this pipeline. - +* Updated Utilities.wdl task definitions to include a new ErrorWithMessage task that is NOT used in the ExomeGermlineSingleSample pipeline. + # 2.4.6 2021-08-02 diff --git a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md index d5862f3580..7e391fdd3a 100644 --- a/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md +++ b/pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.changelog.md @@ -1,7 +1,7 @@ # 2.3.7 2021-09-22 -* Updated Utilities.wdl task definitions to include a new task that is NOT used in this pipeline. +* Updated Utilities.wdl task definitions to include a new ErrorWithMessage task that is NOT used in the WholeGenomeGermlineSingleSample pipeline. # 2.3.6 2021-08-02 diff --git a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md index 53744c6aa8..ad297acb3b 100644 --- a/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md +++ b/pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.changelog.md @@ -1,7 +1,7 @@ # 1.0.2 2021-09-22 -* Updated Utilities.wdl task definitions to include a new task that is NOT used in this pipeline. +* Updated Utilities.wdl task definitions to include a new ErrorWithMessage task that is NOT used in the VariantCalling pipeline. # 1.0.1 2021-06-22 diff --git a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md index c9db65e502..522e4b94ec 100644 --- a/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/exome/ExomeReprocessing.changelog.md @@ -1,7 +1,7 @@ # 2.4.9 2021-09-22 -* Updated Utilities.wdl task definitions to include a new task that is NOT used in this pipeline. +* Updated Utilities.wdl task definitions to include a new ErrorWithMessage task that is NOT used in the ExomeReprocessing pipeline. # 2.4.8 2021-08-02 diff --git a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md index d52bbb4dcf..490827e6de 100644 --- a/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/external/exome/ExternalExomeReprocessing.changelog.md @@ -1,7 +1,7 @@ # 2.4.9 2021-09-22 -* Updated Utilities.wdl task definitions to include a new task that is NOT used in this pipeline. +* Updated Utilities.wdl task definitions to include a new ErrorWithMessage task that is NOT used in the ExternalExomeReprocessing pipeline. # 2.4.8 2021-08-02 diff --git a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md index 66b5a86a6f..277537d8f5 100644 --- a/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/external/wgs/ExternalWholeGenomeReprocessing.changelog.md @@ -1,7 +1,7 @@ # 1.3.9 2021-09-22 -* Updated Utilities.wdl task definitions to include a new task that is NOT used in this pipeline. +* Updated Utilities.wdl task definitions to include a new ErrorWithMessage task that is NOT used in the ExternalWholeGenomeReprocessing pipeline. # 1.3.8 2021-08-02 diff --git a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md index 3e7b3da3a0..e7def3a6d9 100644 --- a/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md +++ b/pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.changelog.md @@ -1,7 +1,7 @@ # 2.3.9 2021-09-22 -* Updated Utilities.wdl task definitions to include a new task that is NOT used in this pipeline. +* Updated Utilities.wdl task definitions to include a new ErrorWithMessage task that is NOT used in the WholeGenomeReprocessing pipeline. # 2.3.8 2021-08-02 diff --git a/structs/imputation/ImputationStructs.wdl b/structs/imputation/ImputationStructs.wdl new file mode 100644 index 0000000000..1bfd250127 --- /dev/null +++ b/structs/imputation/ImputationStructs.wdl @@ -0,0 +1,10 @@ +version 1.0 + +struct ReferencePanelContig { + File vcf + File vcf_index + File bcf + File bcf_index + File m3vcf + String contig +} \ No newline at end of file diff --git a/tasks/broad/ImputationTasks.wdl b/tasks/broad/ImputationTasks.wdl new file mode 100644 index 0000000000..c735ade3c5 --- /dev/null +++ b/tasks/broad/ImputationTasks.wdl @@ -0,0 +1,729 @@ +version 1.0 + +task CalculateChromosomeLength { + input { + File ref_dict + Int chrom + } + + Int disk_size = ceil(2*size(ref_dict, "GiB")) + 5 + + command { + grep -P "SN:~{chrom}\t" ~{ref_dict} | sed 's/.*LN://' | sed 's/\t.*//' + } + runtime { + docker: "ubuntu:18.04" + disks: "local-disk " + disk_size + " HDD" + memory: "2 GiB" + } + output { + Int chrom_length = read_int(stdout()) + } +} + +task GenerateChunk { + input { + Int start + Int end + String chrom + String basename + String vcf + String vcf_index + Int disk_size = 400 # not sure how big the disk size needs to be since we aren't downloading the entire VCF here + String gatk_docker + } + command { + gatk SelectVariants \ + -V ~{vcf} \ + --select-type-to-include SNP \ + --max-nocall-fraction 0.1 \ + -xl-select-type SYMBOLIC \ + --select-type-to-exclude MIXED \ + --restrict-alleles-to BIALLELIC \ + -L ~{chrom}:~{start}-~{end} \ + -O ~{basename}.vcf.gz \ + --exclude-filtered true + } + runtime { + docker: gatk_docker + disks: "local-disk " + disk_size + " HDD" + memory: "8 GiB" + } + parameter_meta { + vcf: { + description: "vcf", + localization_optional: true + } + vcf_index: { + description: "vcf index", + localization_optional: true + } + } + output { + File output_vcf = "~{basename}.vcf.gz" + File output_vcf_index = "~{basename}.vcf.gz.tbi" + } +} + +task CountVariantsInChunks { + input { + File vcf + File vcf_index + File panel_vcf + File panel_vcf_index + Int disk_size = ceil(2*size([vcf, vcf_index, panel_vcf, panel_vcf_index], "GiB")) + String gatk_docker + } + command <<< + echo $(gatk CountVariants -V ~{vcf} | sed 's/Tool returned://') > var_in_original + echo $(gatk CountVariants -V ~{vcf} -L ~{panel_vcf} | sed 's/Tool returned://') > var_in_reference + >>> + output { + Int var_in_original = read_int("var_in_original") + Int var_in_reference = read_int("var_in_reference") + } + runtime { + docker: gatk_docker + disks: "local-disk " + disk_size + " HDD" + memory: "4 GiB" + } + +} +task CheckChunks { + input { + File vcf + File vcf_index + File panel_vcf + File panel_vcf_index + Int var_in_original + Int var_in_reference + Int disk_size =ceil(2*size([vcf, vcf_index, panel_vcf, panel_vcf_index], "GiB")) + String bcftools_docker + } + command <<< + if [ $(( ~{var_in_reference} * 2 - ~{var_in_original})) -gt 0 ] && [ ~{var_in_reference} -gt 3 ]; then + echo true > valid_file.txt + else + echo false > valid_file.txt + fi + + bcftools convert -Ob ~{vcf} > valid_variants.bcf + bcftools index -f valid_variants.bcf + >>> + output { + File valid_chunk_bcf = "valid_variants.bcf" + File valid_chunk_bcf_index = "valid_variants.bcf.csi" + Boolean valid = read_boolean("valid_file.txt") + } + runtime { + docker: bcftools_docker + disks: "local-disk " + disk_size + " HDD" + memory: "4 GiB" + } + +} + +task PhaseVariantsEagle { + input { + File dataset_bcf + File dataset_bcf_index + File reference_panel_bcf + File reference_panel_bcf_index + String chrom + File genetic_map_file + Int start + Int end + String eagle_docker + } + Int disk_size = ceil(3 * size([dataset_bcf, reference_panel_bcf, dataset_bcf_index, reference_panel_bcf_index], "GiB")) + command <<< + /eagle \ + --vcfTarget ~{dataset_bcf} \ + --vcfRef ~{reference_panel_bcf} \ + --geneticMapFile ~{genetic_map_file} \ + --outPrefix pre_phased_~{chrom} \ + --vcfOutFormat z \ + --bpStart ~{start} \ + --bpEnd ~{end} \ + --allowRefAltSwap + >>> + output { + File dataset_prephased_vcf="pre_phased_~{chrom}.vcf.gz" + } + runtime { + docker: eagle_docker + memory: "32 GiB" + cpu: "8" + disks: "local-disk " + disk_size + " HDD" + } +} + +task Minimac4 { + input { + File ref_panel + File phased_vcf + String prefix + String chrom + Int start + Int end + String minimac4_docker + Int window + } + command <<< + /Minimac4 \ + --refHaps ~{ref_panel} \ + --haps ~{phased_vcf} \ + --start ~{start} \ + --end ~{end} \ + --window ~{window} \ + --chr ~{chrom} \ + --noPhoneHome \ + --format GT,DS,GP \ + --allTypedSites \ + --prefix ~{prefix} \ + --minRatio 0.00001 + + bcftools index -t ~{prefix}.dose.vcf.gz + >>> + output { + File vcf = "~{prefix}.dose.vcf.gz" + File vcf_index = "~{prefix}.dose.vcf.gz.tbi" + File info = "~{prefix}.info" + } + runtime { + docker: minimac4_docker + memory: "4 GiB" + cpu: "1" + disks: "local-disk 100 HDD" + } +} + +task GatherVcfs { + input { + Array[File] input_vcfs + Array[File] input_vcf_indices + String output_vcf_basename + String gatk_docker + } + + Int disk_size = ceil(3*size(input_vcfs, "GiB")) + + command <<< + gatk GatherVcfs \ + -I ~{sep=' -I ' input_vcfs} \ + -O ~{output_vcf_basename}.vcf.gz + + gatk IndexFeatureFile -I ~{output_vcf_basename}.vcf.gz + + >>> + runtime { + docker: gatk_docker + disks: "local-disk " + disk_size + " HDD" + memory: "16 GiB" + } + output { + File output_vcf = "~{output_vcf_basename}.vcf.gz" + File output_vcf_index = "~{output_vcf_basename}.vcf.gz.tbi" + } +} + +task UpdateHeader { + input { + File vcf + File vcf_index + File ref_dict + String basename + Int disk_size = ceil(4*(size(vcf, "GiB") + size(vcf_index, "GiB"))) + 20 + String gatk_docker + } + command <<< + + ## update the header of the merged vcf + gatk UpdateVCFSequenceDictionary \ + --source-dictionary ~{ref_dict} \ + --output ~{basename}.vcf.gz \ + --replace -V ~{vcf} \ + --disable-sequence-dictionary-validation + >>> + runtime { + docker: gatk_docker + disks: "local-disk " + disk_size + " HDD" + memory: "8 GiB" + } + output { + File output_vcf = "~{basename}.vcf.gz" + File output_vcf_index = "~{basename}.vcf.gz.tbi" + } +} + +task RemoveSymbolicAlleles { + input { + File original_vcf + File original_vcf_index + String output_basename + Int disk_size = ceil(3*(size(original_vcf, "GiB") + size(original_vcf_index, "GiB"))) + String gatk_docker + } + command { + gatk SelectVariants -V ~{original_vcf} -xl-select-type SYMBOLIC -O ~{output_basename}.vcf.gz + } + output { + File output_vcf = "~{output_basename}.vcf.gz" + File output_vcf_index = "~{output_basename}.vcf.gz.tbi" + } + runtime { + docker: gatk_docker + disks: "local-disk " + disk_size + " HDD" + memory: "4 GiB" + } +} + +task SeparateMultiallelics { + input { + File original_vcf + File original_vcf_index + String output_basename + Int disk_size = ceil(2*(size(original_vcf, "GiB") + size(original_vcf_index, "GiB"))) + String bcftools_docker + } + command { + bcftools norm -m - ~{original_vcf} -Oz -o ~{output_basename}.vcf.gz + bcftools index -t ~{output_basename}.vcf.gz + } + output { + File output_vcf = "~{output_basename}.vcf.gz" + File output_vcf_index = "~{output_basename}.vcf.gz.tbi" + } + runtime { + docker: bcftools_docker + disks: "local-disk " + disk_size + " HDD" + memory: "4 GiB" + } +} + +task OptionalQCSites { + input { + File input_vcf + File input_vcf_index + String output_vcf_basename + String bcftools_vcftools_docker + Float? optional_qc_max_missing + Float? optional_qc_hwe + } + Float max_missing = select_first([optional_qc_max_missing, 0.05]) + Float hwe = select_first([optional_qc_hwe, 0.000001]) + + Int disk_size = ceil(2*(size(input_vcf, "GiB") + size(input_vcf_index, "GiB"))) + + command <<< + # site missing rate < 5% ; hwe p > 1e-6 + vcftools --gzvcf ~{input_vcf} --max-missing ~{max_missing} --hwe ~{hwe} --recode -c | bgzip -c > ~{output_vcf_basename}.vcf.gz + bcftools index -t ~{output_vcf_basename}.vcf.gz # Note: this is necessary because vcftools doesn't have a way to output a zipped vcf, nor a way to index one (hence needing to use bcf). + >>> + + runtime { + docker: bcftools_vcftools_docker + memory: "16 GiB" + disks: "local-disk " + disk_size + " HDD" + } + output { + File output_vcf = "~{output_vcf_basename}.vcf.gz" + File output_vcf_index = "~{output_vcf_basename}.vcf.gz.tbi" + } +} + +task MergeSingleSampleVcfs { + input { + Array[File] input_vcfs + Array[File] input_vcf_indices + String output_vcf_basename + String bcftools_docker + Int mem + } + + Int disk_size = 3 * ceil(size(input_vcfs, "GiB") + size(input_vcf_indices, "GiB")) + 20 + + command <<< + bcftools merge ~{sep=' ' input_vcfs} -O z -o ~{output_vcf_basename}.vcf.gz + bcftools index -t ~{output_vcf_basename}.vcf.gz + >>> + + runtime { + docker: bcftools_docker + memory: mem + " GiB" + disks: "local-disk " + disk_size + " HDD" + } + output { + File output_vcf = "~{output_vcf_basename}.vcf.gz" + File output_vcf_index = "~{output_vcf_basename}.vcf.gz.tbi" + } +} + +task CountSamples { + input { + File vcf + String bcftools_docker + } + + Int disk_size = 100 + ceil(size(vcf, "GiB")) + + command <<< + bcftools query -l ~{vcf} | wc -l + >>> + + runtime { + docker: bcftools_docker + memory: "3 GiB" + disks: "local-disk " + disk_size + " HDD" + } + + output { + Int nSamples = read_int(stdout()) + } +} + +task AggregateImputationQCMetrics { + input { + File infoFile + Int nSamples + String basename + String rtidyverse_docker + } + + Int disk_size = 100 + ceil(size(infoFile, "GiB")) + + command <<< + Rscript -<< "EOF" + library(dplyr) + library(readr) + library(purrr) + library(ggplot2) + + sites_info <- read_tsv("~{infoFile}") + + nSites <- sites_info %>% nrow() + nSites_with_var <- sites_info %>% filter(MAF >= 0.3/(2*~{nSamples} - 0.7)) %>% nrow() + nSites_high_r2 <- sites_info %>% filter(Rsq>0.3) %>% nrow() + + aggregated_metrics <- tibble(total_sites=nSites, total_sites_with_var=nSites_with_var, total_sites_r2_gt_0.3=nSites_high_r2,) + + write_tsv(aggregated_metrics, "~{basename}_aggregated_imputation_metrics.tsv") + + EOF + >>> + + runtime { + docker: rtidyverse_docker + disks : "local-disk " + disk_size + " HDD" + preemptible : 3 + } + + output { + File aggregated_metrics = "~{basename}_aggregated_imputation_metrics.tsv" + } +} + +task StoreChunksInfo { + input { + Array[String] chroms + Array[Int] starts + Array[Int] ends + Array[Int] vars_in_array + Array[Int] vars_in_panel + Array[Boolean] valids + String basename + String rtidyverse_docker + } + + command <<< + Rscript -<< "EOF" + library(dplyr) + library(readr) + + chunk_info <- tibble(chrom = c("~{sep='", "' chroms}"), start = c("~{sep='", "' starts}"), ends = c("~{sep='", "' ends}"), vars_in_array = c("~{sep='", "' vars_in_array}"), vars_in_panel = c("~{sep='", "' vars_in_panel}"), chunk_was_imputed = as.logical(c("~{sep='", "' valids}"))) + failed_chunks <- chunk_info %>% filter(!chunk_was_imputed) %>% select(-chunk_was_imputed) + n_failed_chunks <- nrow(failed_chunks) + write_tsv(chunk_info, "~{basename}_chunk_info.tsv") + write_tsv(failed_chunks, "~{basename}_failed_chunks.tsv") + write(n_failed_chunks, "n_failed_chunks.txt") + EOF + >>> + + runtime { + docker: rtidyverse_docker + preemptible : 3 + } + + output { + File chunks_info = "~{basename}_chunk_info.tsv" + File failed_chunks = "~{basename}_failed_chunks.tsv" + Int n_failed_chunks = read_int("n_failed_chunks.txt") + } +} + +task MergeImputationQCMetrics { + input { + Array[File] metrics + String basename + String rtidyverse_docker + } + + Int disk_size = 100 + ceil(size(metrics, "GiB")) + + command <<< + Rscript -<< "EOF" + library(dplyr) + library(readr) + library(purrr) + library(ggplot2) + + metrics <- list("~{sep='", "' metrics}") %>% map(read_tsv) %>% reduce(`+`) %>% mutate(frac_sites_r2_gt_0.3=total_sites_r2_gt_0.3/total_sites, frac_sites_with_var_r2_gt_0.3=total_sites_r2_gt_0.3/total_sites_with_var) + + write_tsv(metrics, "~{basename}_aggregated_imputation_metrics.tsv") + write(metrics %>% pull(frac_sites_with_var_r2_gt_0.3), "frac_well_imputed.txt") + + EOF + >>> + + runtime { + docker: rtidyverse_docker + disks : "local-disk " + disk_size + " HDD" + preemptible : 3 + } + + output { + File aggregated_metrics = "~{basename}_aggregated_imputation_metrics.tsv" + Float frac_well_imputed = read_float("frac_well_imputed.txt") + } +} + +task SetIDs { + input { + File vcf + String output_basename + String bcftools_docker + } + + Int disk_size = 100 + ceil(2.2 * size(vcf, "GiB")) + + command <<< + bcftools annotate ~{vcf} --set-id '%CHROM\:%POS\:%REF\:%FIRST_ALT' -Ov | \ + awk -v OFS='\t' '{split($3, n, ":"); if ( !($1 ~ /^"#"/) && n[4] < n[3]) $3=n[1]":"n[2]":"n[4]":"n[3]; print $0}' | \ + bgzip -c > ~{output_basename}.vcf.gz + + bcftools index -t ~{output_basename}.vcf.gz + >>> + + runtime { + docker: bcftools_docker + disks: "local-disk " + disk_size + " HDD" + memory: "4 GiB" + } + + output { + File output_vcf = "~{output_basename}.vcf.gz" + File output_vcf_index = "~{output_basename}.vcf.gz.tbi" + } +} + +task ExtractIDs { + input { + File vcf + String output_basename + Int disk_size = 2*ceil(size(vcf, "GiB")) + 100 + String bcftools_docker + } + + command <<< + bcftools query -f "%ID\n" ~{vcf} -o ~{output_basename}.ids.txt + >>> + output { + File ids = "~{output_basename}.ids.txt" + } + runtime { + docker: bcftools_docker + disks: "local-disk " + disk_size + " HDD" + memory: "4 GiB" + } +} + +task SelectVariantsByIds { + input { + File vcf + File ids + String basename + String gatk_docker + } + + Int disk_size = ceil(1.2*size(vcf, "GiB")) + 100 + + parameter_meta { + vcf: { + description: "vcf", + localization_optional: true + } + } + + command <<< + cp ~{ids} sites.list + gatk SelectVariants -V ~{vcf} --exclude-filtered --keep-ids sites.list -O ~{basename}.vcf.gz + >>> + + runtime { + docker: gatk_docker + disks: "local-disk " + disk_size + " SSD" + memory: "16 GiB" + } + + output { + File output_vcf = "~{basename}.vcf.gz" + File output_vcf_index = "~{basename}.vcf.gz.tbi" + } +} + +task RemoveAnnotations { + input { + File vcf + String basename + String bcftools_docker + } + + Int disk_size = ceil(2.2*size(vcf, "GiB")) + 100 + + command <<< + bcftools annotate ~{vcf} -x FORMAT,INFO -Oz -o ~{basename}.vcf.gz + bcftools index -t ~{basename}.vcf.gz + >>> + + runtime { + docker: bcftools_docker + memory: "3 GiB" + disks: "local-disk " + disk_size + " HDD" + } + + output { + File output_vcf = "~{basename}.vcf.gz" + File output_vcf_index = "~{basename}.vcf.gz.tbi" + } +} + +task InterleaveVariants { + input { + Array[File] vcfs + String basename + String gatk_docker + } + + Int disk_size = ceil(3.2*size(vcfs, "GiB")) + 100 + + command <<< + gatk MergeVcfs -I ~{sep=" -I " vcfs} -O ~{basename}.vcf.gz + >>> + + + runtime { + docker: gatk_docker + disks: "local-disk " + disk_size + " SSD" + memory: "16 GiB" + } + + output { + File output_vcf = "~{basename}.vcf.gz" + File output_vcf_index = "~{basename}.vcf.gz.tbi" + } +} + +task FindSitesUniqueToFileTwoOnly { + input { + File file1 + File file2 + String ubuntu_docker + } + + Int disk_size = ceil(size(file1, "GiB") + 2*size(file2, "GiB")) + 100 + + command <<< + comm -13 <(sort ~{file1} | uniq) <(sort ~{file2} | uniq) > missing_sites.ids + >>> + + runtime { + docker: ubuntu_docker + disks: "local-disk " + disk_size + " HDD" + memory: "4 GiB" + } + + output { + File missing_sites = "missing_sites.ids" + } +} + +task SplitMultiSampleVcf { + input { + File multiSampleVcf + Int mem = 8 + Int bcftools_docker + } + + Int disk_size = ceil(3*size(multiSampleVcf, "GiB")) + 100 + + command <<< + mkdir out_dir + bcftools +split ~{multiSampleVcf} -Oz -o out_dir + for vcf in out_dir/*.vcf.gz; do + bcftools index -t $vcf + done + >>> + + runtime { + docker: bcftools_docker + disks: "local-disk " + disk_size + " SSD" + memory: mem + " GiB" + } + + output { + Array[File] single_sample_vcfs = glob("out_dir/*.vcf.gz") + Array[File] single_sample_vcf_indices = glob("out_dir/*.vcf.gz.tbi") + } +} + +task CrosscheckFingerprints { + input { + Array[File] firstInputs + Array[File] secondInputs + Array[File] firstInputIndices + Array[File] secondInputIndices + File haplotypeDatabase + String basename + Int mem = 8 + String gatk_docker + } + + Int disk_size = ceil(1.2*(size(firstInputs, "GiB") + size(secondInputs, "GiB") + size(haplotypeDatabase, "GiB"))) + 100 + + command <<< + # add links to ensure correctly located indices + array_vcfs=( ~{sep=" " firstInputs} ) + array_indices=( ~{sep=" " firstInputIndices} ) + for i in ${!array_vcfs[@]}; do + ln -s ${array_indices[i]} $(dirname ${array_vcfs[i]}) + done + + array_vcfs2=( ~{sep=" " secondInputs} ) + array_indices2=( ~{sep=" " secondInputIndices} ) + for i in ${!array_vcfs2[@]}; do + ln -s ${array_indices2[i]} $(dirname ${array_vcfs2[i]}) + done + + gatk CrosscheckFingerprints -I ~{sep=" -I " firstInputs} -SI ~{sep=" -SI " secondInputs} -H ~{haplotypeDatabase} -O ~{basename}.crosscheck + >>> + + runtime { + docker: gatk_docker + disks: "local-disk " + disk_size + " HDD" + memory: "16 GiB" + } + + output { + File crosscheck = "~{basename}.crosscheck" + } +} \ No newline at end of file diff --git a/website/docs/Pipelines/Imputation_Pipeline/README.md b/website/docs/Pipelines/Imputation_Pipeline/README.md new file mode 100644 index 0000000000..fe98ab2aa1 --- /dev/null +++ b/website/docs/Pipelines/Imputation_Pipeline/README.md @@ -0,0 +1,129 @@ +--- +sidebar_position: 1 +--- + +# Imputation Overview + +| Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | +| :----: | :---: | :----: | :--------------: | +| [Imputation_v1.0.0](https://github.com/broadinstitute/warp/releases) | August, 2021 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in warp or contact [Kylee Degatano](mailto:kdegatano@broadinstitute.org) | + +## Introduction to the Imputation pipeline +The Imputation pipeline imputes missing genotypes from either a multi-sample VCF or an array of single sample VCFs using a large genomic reference panel. It is based on the Michigan Imputation Server pipeline. Overall, the pipeline filters, phases, and performs imputation on a multi-sample VCF. It outputs the imputed VCF along with key imputation metrics. + +![](imputation.png) +## Set-up + +### Workflow installation and requirements + +The [Imputation workflow](https://github.com/broadinstitute/warp/blob/develop/pipelines/broad/arrays/imputation/Imputation.wdl) is written in the Workflow Description Language (WDL) and can be deployed using a WDL-compatible execution engine like [Cromwell](https://github.com/broadinstitute/cromwell), a GA4GH compliant, flexible workflow management system that supports multiple computing platforms. + +To identify the latest workflow version and release notes, please see the Imputation workflow [changelog](https://github.com/broadinstitute/warp/blob/develop/pipelines/broad/arrays/imputation/Imputation.changelog.md). + +The latest release of the workflow, example data, and dependencies are available from the WARP releases page. To discover and search releases, use the WARP command-line tool [Wreleaser](https://github.com/broadinstitute/warp/tree/develop/wreleaser). + +### Input descriptions +The table below describes each of the Imputation pipeline inputs. The workflow requires either a multi-sample VCF or an array of single sample VCFs. +For examples of how to specify each input in a configuration file, as well as cloud locations for different example input files, see the [example input configuration file (JSON)](https://github.com/broadinstitute/warp/blob/develop/pipelines/broad/arrays/imputation/example_inputs.json). + +| Input name | Description | Type | +| --- | --- | --- | +| ChunkLength | Size of chunks; default set to 25 MB. | Int | +| chunkOverlaps | Padding adding to the beginning and end of each chunk to reduce edge effects; default set 5 MB. | Int | +| multi_sample_vcf | Merged VCF containing multiple samples; can also use an array of individual VCFs. | File | +| multi_sample_vcf_index | Merged index for the merged VCF; can also use an array of index files if using an array of VCFs. | Index | +| single_sample_vcfs | Array of VCFs, one for each sample; can be used in lieu of a merged VCF containing all samples. | Array of files | +| single_sample_vcf_indices | Array of indices, one for each sample; can be used in lieu of a merged index for a multi-sample VCF. | Array of index files | +| perform_extra_qc_steps | Boolean to indicate if additional QC steps should be performed before imputing; when true, sites with call rates below 95% or low Hardy Weinberg Equilibrium (HWE) p-value are removed before imputation. Default is set to false. | Boolean | +| optional_qc_max_missing | Optional float used for the additional QC steps that sets a max threshold for the maximum rate of missing data allowed for sites; default set to 0.05 | Float | +| optional_qc_hwe | Optional HWE p-value when performing additional QC steps; default set to 0.000001 | Float | +| ref_dict | Reference dictionary. | File | +| referencePanelContigs | Array of structs containing reference panel files that is imported from [ImputationStructs WDL](https://github.com/broadinstitute/warp/blob/develop/structs/imputation/ImputationStructs.wdl); each input is specified in the configuration JSON. | Array of structs | +| genetics_maps_eagle | Genetic map file for phasing.| File | +| output_callset_name | Output callset name. | String | +| split_output_to_single_sample | Boolean to split out the final combined VCF to individual sample VCFs; set to false by default. | Boolean | +| merge_ssvcf_mem_gb | Memory allocation for MergeSingleSampleVcfs (in GB). | Int | +| frac_well_imputed_threshold | Threshold for the fraction of well-imputed sites; default set to 0.9. | Float | +| chunks_fail_threshold | Maximum threshold for the number of chunks allowed to fail; default set to 1. | Float | +| bcftools_docker_tag | Cloud path to the Docker image containing bcftools software. | String | +| bcftools_vcftools_docker_tag | Cloud path to the Docker image containing bcftools and vcftools software. | String | +| gatk_docker_tag | Cloud path to the Docker image containing GATK software for variant selection and manipulation. | String | +| minimac4_docker_tag | Cloud path to the Docker image containing minimac4 software for phasing. | String | +| eagle_docker_tag | Cloud path the Docker image containing Eagle2 software for imputation. | String | +| ubuntu_docker_tag | Cloud path to the Docker image containing Ubuntu software. | String | +| rtidyverse_docker_tag | Cloud path to the Docker image containing R tidyverse packages. | String | + +### Imputation reference panel + +The reference panel files required for the Imputation workflow will soon be hosted in a public Google Bucket. See the [example input configuration](https://github.com/broadinstitute/warp/blob/develop/pipelines/broad/arrays/imputation/example_inputs.json) for the current reference panel files. + +## Workflow tasks and tools + +The [Imputation workflow](https://github.com/broadinstitute/warp/blob/develop/pipelines/broad/arrays/imputation/Imputation.wdl) imports a series of tasks from the ImputationTasks WDL, which is hosted in the Broad [tasks library](https://github.com/broadinstitute/warp/tree/develop/tasks/broad). The table below describes each workflow task, including the task name, tools, relevant software and non-default parameters. + +| Task name (alias) in WDL | Tool | Software | Description | +| --- | --- | --- | --- | +| MergeSingleSampleVcfs | merge | [bcftools](http://samtools.github.io/bcftools/bcftools.html) | If an array of single sample VCFs are pipeline input, the task merges them into a single VCF. | +| SetIDs (SetIdsVcfToImpute) | annotate | [bcftools](http://samtools.github.io/bcftools/bcftools.html), bash | Adds variant IDs to the combined input VCF to create a new VCF. Sorts the alleles for a given variant ID so that REF:ALT is lexicographically consistent across IDs. | +| ExtractIDs (ExtractIdsVcfToImpute) | query | [bcftools](http://samtools.github.io/bcftools/bcftools.html) | Extracts the variant IDs from the SortIDs output VCF to a new “.ids” file so that any missing variants can be added back to the final VCF after imputation. | +| CountSamples | query | [bcftools](http://samtools.github.io/bcftools/bcftools.html) | Uses the merged input VCF file to count the number of samples and output a TXT file containing the count. | +| CalculateChromsomeLength | grep | bash | Reads chromosome lengths from the reference dictionary and uses these to generate chunk intervals for the GenerateChunk task. | +| GenerateChunk | SelectVariants | [GATK](https://gatk.broadinstitute.org/hc/en-us) | Performs site filtering by selecting SNPs only and excluding InDels, removing duplicate sites from the VCF, selecting biallelic variants, excluding symbolic/mixed variants, and removing sites with a maximum fraction of samples with no-call genotypes greater than 0.1. Also subsets to only a specified chunk of the genome.| +| OptionalQCSites | --- | [vcftools](http://vcftools.sourceforge.net/), [bcftools](http://samtools.github.io/bcftools/bcftools.html) | If the boolean extra_qc_steps is true, performs additional QC steps; excludes sites with more than 95% missing data and assesses sites for Hardy Weinberg Equilibrium, excluding any site with a p-value less than 0.000001.| +| CountVariantsInChunks | CountVariants | [GATK](https://gatk.broadinstitute.org/hc/en-us) | Counts variants in the filtered VCF file; Returns the number of chunks in the array and in the reference file. | +| CheckChunks | convert, index | [bcftools](http://samtools.github.io/bcftools/bcftools.html) | Confirms that there are no chunks where less than 3 sites or less than 50% of the sites in the array are also in the reference panel; if valid, creates a new VCF output. | +| PhaseVariantsEagle | eagle | [Eagle2](https://alkesgroup.broadinstitute.org/Eagle/Eagle_manual.html) | Performs phasing on the filtered, validated VCF using the phased reference panel; allows for REF/ALT swaps | +| Minimac4 | Minimac4 | [minimac4](https://genome.sph.umich.edu/wiki/Minimac4_Documentation), [bcftools]([bcftools](http://samtools.github.io/bcftools/bcftools.html)) | Performs imputation on the prephased VCF; parameterized to include variants that were genotyped but NOT in the reference panel and to specify a minRatio of 0.00001. | +| AggregateImputationQCMetrics | --- | R | Uses an R script to take calculate metrics from minimac4 output info file, including total sites, total sites with variants, and sites with an [R2 metric](https://genome.sph.umich.edu/wiki/Minimac3_Info_File) of 0.3 (total_sites_r2_gt_0.3); adds the metrics to a new TSV output. | +| UpdateHeader | UpdateVCFSequenceDictionary | [GATK](https://gatk.broadinstitute.org/hc/en-us) | Updates the header of the imputed VCF; adds contig lengths | +| SeparateMultiallelics | norm | [bcftools](http://samtools.github.io/bcftools/bcftools.html) | Splits multiallelic sites in the imputed VCF into biallelic records. | +| RemoveSymbolicAlleles | SelectVariants | [GATK](https://gatk.broadinstitute.org/hc/en-us) | Removes SYMBOLIC alleles from the output VCF of the SeparateMultiallelics. | +| SetIds | annotate, index | [bcftools](http://samtools.github.io/bcftools/bcftools.html) | Sorts the alleles in the variant ID from the RemoveSymbolicAllele output VCF so that REF:ALT is lexicographically consistent across IDs. | +| GatherVcfs | GatherVCFs | [GATK](https://gatk.broadinstitute.org/hc/en-us) | Gathers the array of imputed VCFs and merges them into one VCF output. | +| ExtractIDs | query | [bcftools](http://samtools.github.io/bcftools/bcftools.html) | Extracts the variant IDs from the imputed VCF. | +| FindSitesUniqueToFileTwoOnly | --- | Ubuntu | Uses the IDs extracted from imputed VCF and those extracted from original VCF to identify missing variant sites from the original VCF; outputs the IDs to a file. | +| SelectVariantsByIds | SelectVariants | [GATK](https://gatk.broadinstitute.org/hc/en-us) | Selects from the original input VCF any sites which were not included in the imputed VCF. | +| RemoveAnnotations | annotate | [bcftools](http://samtools.github.io/bcftools/bcftools.html) | Removes the FORMAT and INFO annotations from the new VCF generated by the SelectVariantsbyIds task that contains the missing variants. | +| InterleaveVariants | MergeVCFs | [GATK](https://gatk.broadinstitute.org/hc/en-us) | Combines the missing variants from the original VCF and the imputed variants into a new VCF. | +| MergeImputationQCMetrics | --- | R | Uses an R script to calculate the fraction of well-imputed sites and outputs them to a TXT file; the fraction of "well-imputed" sites is based on the minimac reported R2 metric, with R2>0.3 being "well-imputed." Since homomorphic sites lead to an R2 value of 0, we report the fraction of sites with any variation which are well-imputed in addition to the fraction of total sites. | +| StoreChunksInfo | --- | R | Uses an R script to record the coordinates of each imputation chunk, number of sites in the original array, and number of sites in the original array which are also in the reference panel, for each imputation chunk. | +| SplitMultiSampleVcf | split | [bcftools](http://samtools.github.io/bcftools/bcftools.html) | If boolean is set to true, will split the interleave variants VCF into single sample VCFs. | + +## Workflow outputs + +The table below summarizes the workflow outputs. If running the workflow on Cromwell, these outputs are found in the task execution directory. + +| Output name | Description | Type | +| --- | --- | --- | +| imputed_single_sample_vcfs | Array of imputed single sample VCFs from the SplitMultiSampleVcf task. | Array | +| imputed_single_sample_vcf_indices | Array of indices for the imputed VCFs from the SplitMultiSampleVcf task | Array | +| imputed_multisample_vcf | VCF from the InterleaveVariants task; contains imputed variants as well as missing variants from the input VCF. | VCF | +| imputed_multisample_vcf_index | Index file for VCF from the InterleaveVariants task. | Index | +| File aggregated_imputation_metrics | Aggregated QC metrics from the MergeImputationQcMetrics task; reports the fraction of sites well-imputed and outputs to TXT file; fraction of "well-imputed" is based on the minimac reported R2 metric, with R2>0.3 being "well-imputed." Since homomorphic sites lead to an R2 value of 0, we report the fraction of sites with any variation which are well-imputed in addition to the fraction of total sites. | TXT | +| chunks_info | TSV from StoreChunksInfo task; contains the chunk intervals as well as the number of variants in the array. | TSV | +| failed_chunks | Number of failed chunks from the StoreChunksInfo task. | Int | + +## Important notes + +- Runtime parameters are optimized for Broad's Google Cloud Platform implementation. + +## Contact us + +Help us make our tools better by contacting [Kylee Degatano](mailto:kdegatano@broadinstitute.org) for pipeline-related suggestions or questions. + +## Licensing + +Copyright Broad Institute, 2020 | BSD-3 + +The workflow script is released under the **WDL open source code license (BSD-3)** (full license text at https://github.com/broadinstitute/warp/blob/master/LICENSE). However, please note that the programs it calls may be subject to different licenses. Users are responsible for checking that they are authorized to run all programs before running this script. + +- [GATK](https://github.com/broadinstitute/gatk/blob/master/LICENSE.TXT) +- [Picard](https://github.com/broadinstitute/picard/blob/master/LICENSE.txt) +- [Eagle2](https://alkesgroup.broadinstitute.org/Eagle/#x1-340007) +- [minimac4](https://github.com/statgen/Minimac4/blob/master/LICENSE) +- [bcftools](https://github.com/samtools/bcftools/blob/develop/LICENSE) +-[vcftools](http://vcftools.sourceforge.net/license.html) + + + + diff --git a/website/docs/Pipelines/Imputation_Pipeline/_category_.json b/website/docs/Pipelines/Imputation_Pipeline/_category_.json new file mode 100644 index 0000000000..8ebd7570bc --- /dev/null +++ b/website/docs/Pipelines/Imputation_Pipeline/_category_.json @@ -0,0 +1,4 @@ +{ + "label": "Imputation", + "position": 5 +} diff --git a/website/docs/Pipelines/Imputation_Pipeline/imputation.png b/website/docs/Pipelines/Imputation_Pipeline/imputation.png new file mode 100644 index 0000000000..64adee52bc Binary files /dev/null and b/website/docs/Pipelines/Imputation_Pipeline/imputation.png differ diff --git a/website/docs/Pipelines/Optimus_Pipeline/_category_.json b/website/docs/Pipelines/Optimus_Pipeline/_category_.json index c2ce950657..fffb9c7596 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/_category_.json +++ b/website/docs/Pipelines/Optimus_Pipeline/_category_.json @@ -1,4 +1,4 @@ { "label": "Optimus", - "position": 5 + "position": 6 } diff --git a/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/_category_.json b/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/_category_.json index 165a911143..d2f1106214 100644 --- a/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/_category_.json +++ b/website/docs/Pipelines/Single_Cell_ATAC_Seq_Pipeline/_category_.json @@ -1,4 +1,4 @@ { "label": "Single Cell ATAC", - "position": 6 + "position": 7 } diff --git a/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/_category_.json b/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/_category_.json index 11775122da..247b76b57c 100644 --- a/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/_category_.json +++ b/website/docs/Pipelines/Smart-seq2_Multi_Sample_Pipeline/_category_.json @@ -1,4 +1,4 @@ { "label": "Smart-seq2 Multi-Sample", - "position": 7 + "position": 8 } diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Pipeline/_category_.json b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Pipeline/_category_.json new file mode 100644 index 0000000000..e69de29bb2 diff --git a/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/_category_.json b/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/_category_.json index 02dfb83186..5d15b40165 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/_category_.json +++ b/website/docs/Pipelines/Smart-seq2_Single_Sample_Pipeline/_category_.json @@ -1,4 +1,4 @@ { "label": "Smart-seq2 Single Sample", - "position": 10 + "position": 11 } diff --git a/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/_category_.json b/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/_category_.json index 94d04333c4..63b160b7b9 100644 --- a/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/_category_.json +++ b/website/docs/Pipelines/Whole_Genome_Germline_Single_Sample_Pipeline/_category_.json @@ -1,4 +1,4 @@ { "label": "Whole Genome Germline Single Sample", - "position": 11 + "position": 12 }