Skip to content

Commit

Permalink
Imputation pipeline (broadinstitute#363)
Browse files Browse the repository at this point in the history
* Add new Imputation pipeline to Warp
  • Loading branch information
kachulis authored Sep 24, 2021
1 parent 950936c commit ebb1823
Show file tree
Hide file tree
Showing 30 changed files with 1,568 additions and 13 deletions.
4 changes: 4 additions & 0 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,7 @@ workflows:
subclass: WDL
primaryDescriptorPath: /pipelines/broad/arrays/single_sample/Arrays.wdl

- name: Imputation
subclass: WDL
primaryDescriptorPath: /pipelines/broad/arrays/imputation/Imputation.wdl

119 changes: 119 additions & 0 deletions dockers/broad/imputation/build_push_docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
set -e

while test $# -gt 0; do
case "$1" in
-h|--help)
echo "Build and push a docker image from specified Dockerfile."
echo "Call from anywhere in the palantir-workflows repo. Supply the following options:"
echo "Usage: ./build_push_docker.sh [OPTIONS]"
echo "Options:"
echo " --directory/-d (required) directory containing the Dockerfile (eg. imputation_eagle_docker)"
echo " --image-tag/-i (required) tag for this image (suggest using the image version)"
echo " --ubuntu-version/-u (optional) version of ubuntu base image (defaults to 20.04)"
echo " --no-push/-p (optional) build but do not push"
echo " --dry-run/-r (optional) dry run (no build or push; can use to inspect variables)"
echo " --no-cache/-c (optional) build docker image with no cache"
exit 1
;;
-d|--directory)
shift
DOCKER_DIR=$1
shift
;;
-i|--image-version-tag)
shift
IMG_TAG=$1
shift
;;
-u|--ubuntu-version)
shift
UBUNTU=$1
shift
;;
-r|--dry-run)
shift
DRY="true"
;;
-p|--no-push)
shift
PUSH="false"
;;
-c|--no-cache)
shift
NOCACHE="--no-cache"
;;
*)
echo "Invalid argument. Use --help or -h flags for usage information."
exit 1
;;
esac
done


if [[ -z $DOCKER_DIR ]]; then
echo "No docker path specified. Please specify a directory containing a dockerfile with -d or --directory."
exit 1
fi

# check for missing arguments, fill defaults
if [[ -z $UBUNTU ]]; then
echo "No ubuntu version specified. Using ${UBUNTU} as default."
UBUNTU=20.04
fi
if [[ -z $IMG_TAG ]]; then
echo "No image version specified. Please specify an image version with -i or --image-version."
exit 1
fi
[[ -z "${DRY}" ]] && DRY=false
[[ -z "${PUSH}" ]] && PUSH=true


docker_dir=$(basename ${DOCKER_DIR})
docker_path=$(find .. -type d -name "${docker_dir}")
image_name=us.gcr.io/broad-dsde-methods/${docker_dir}:${IMG_TAG}
while true; do
if [[ "${DRY}" == "true" ]]; then
break;
fi
echo "This script will build and push ${image_name}. Do you want to proceed? (y/[n])"
read yn
[[ -z ${yn} ]] && yn=n
case $yn in
[Yy]* ) break;;
[Nn]* ) exit 1;;
* ) echo "Please answer yes or no.";;
esac
done

# Execute commands only if this is not a dry run
function execute(){
# Irrespective of whether dry run is enabled or not, we display
# the command on the screen
# shellcheck disable=SC2145
echo "COMMAND: ${@}"
# if dry run is enabled then simply return
if [[ ${DRY} == "false" ]]; then
eval "$@"
fi
}

# Check the docker path


echo "Directory: ${docker_path}"
if ! [[ $(find "${docker_path}" -name Dockerfile) ]]; then
echo "No Dockerfile found in this directory."
exit 1
fi


image_name=us.gcr.io/broad-dsde-methods/${docker_dir}:${IMG_TAG}

echo "Ubuntu version: ${UBUNTU}"
echo "Image version tag: ${IMG_TAG}"



build_opts="-t ${image_name} --build-arg UBUNTU_VERSION=${UBUNTU} ${NOCACHE}"
execute "docker build ${docker_path} ${build_opts}"
execute "docker push ${image_name}"
40 changes: 40 additions & 0 deletions dockers/broad/imputation/imputation_bcftools_vcftools/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# default ubuntu version: 20.04
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION}

# the following argument is needed for pkg-config to
# install without hanging on user input
ARG DEBIAN_FRONTEND=noninteractive

RUN apt-get update \
&& apt-get install -y wget \
&& apt-get install -y libgomp1 \
&& apt-get install -y build-essential \
&& apt-get install -y libz-dev \
&& apt-get install -y libbz2-dev \
&& apt-get install -y liblzma-dev \
&& apt-get install -y pkg-config \
&& apt-get install -y tabix \
&& apt-get install -y python3 \
&& apt-get install -y python3-pip \
&& rm -rf /var/lib/apt/lists/*

RUN wget https://github.com/samtools/bcftools/releases/download/1.10.2/bcftools-1.10.2.tar.bz2 \
&& tar xf bcftools-1.10.2.tar.bz2 \
&& cd bcftools-1.10.2 \
&& ./configure \
&& make \
&& make install \
&& cd ../ \
&& rm -r bcftools-1.10.2.tar.bz2

RUN wget https://github.com/vcftools/vcftools/releases/download/v0.1.16/vcftools-0.1.16.tar.gz \
&& tar xf vcftools-0.1.16.tar.gz \
&& cd vcftools-0.1.16 \
&& ./configure \
&& make \
&& make install \
&& cd ../ \
&& rm -r vcftools-0.1.16.tar.gz

RUN pip3 install docopt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Build docker image from Dockerfile in this directory
# This is to help keep track of versions, etc.
# Wraps around ImputationPipeline/build_push_docker.sh.

dockerfile_directory=imputation_bcftools_vcftools_docker
image_version=v1.0.0 # as of Jan 25 2021
ubuntu_version=20.04 # as of Jan 25 2021

wd=$(pwd)
cd "$(dirname $0)" || exit

../build_push_docker.sh \
--directory ${dockerfile_directory} \
--ubuntu-version ${ubuntu_version} \
--image-version-tag ${image_version} \
# --dry-run


cd "${wd}" || exit
31 changes: 31 additions & 0 deletions dockers/broad/imputation/imputation_eagle_docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# default ubuntu version: 20.04
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION}

# the following argument is needed for pkg-config to
# install without hanging on user input
ARG DEBIAN_FRONTEND=noninteractive

RUN apt-get update \
&& apt-get install -y wget \
&& apt-get install -y libgomp1 \
&& apt-get install -y build-essential \
&& apt-get install -y libz-dev \
&& apt-get install -y libbz2-dev \
&& apt-get install -y liblzma-dev \
&& apt-get install -y pkg-config \
&& apt-get install -y tabix \
&& apt-get install -y python3 \
&& apt-get install -y python3-pip \
&& rm -rf /var/lib/apt/lists/*

#Should be same eagle version as Michigan imputation server
ARG EAGLE_VERSION=2.4
# RUN wget https://github.com/genepi/imputationserver/raw/master/files/bin/eagle
RUN wget https://storage.googleapis.com/broad-alkesgroup-public/Eagle/downloads/old/Eagle_v${EAGLE_VERSION}.tar.gz
RUN tar xf Eagle_v2.4.tar.gz
RUN mv Eagle_v2.4/eagle .
RUN rm -r Eagle_v2.4.tar.gz
RUN ["chmod", "+x", "eagle"]

RUN wget https://data.broadinstitute.org/alkesgroup/Eagle/downloads/tables/genetic_map_hg19_withX.txt.gz
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Build docker image from Dockerfile in this directory
# This is to help keep track of versions, etc.
# Wraps around ImputationPipeline/build_push_docker.sh.

dockerfile_directory=imputation_eagle_docker
image_version=v1.0.0 # as of Jan 25 2021
ubuntu_version=20.04 # as of Jan 25 2021

wd=$(pwd)
cd "$(dirname $0)" || exit

../build_push_docker.sh \
--directory ${dockerfile_directory} \
--ubuntu-version ${ubuntu_version} \
--image-version-tag ${image_version} \
# --dry-run


cd "${wd}" || exit
39 changes: 39 additions & 0 deletions dockers/broad/imputation/imputation_minimac_docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# default ubuntu version: 20.04
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION}

# the following argument is needed for pkg-config to
# install without hanging on user input
ARG DEBIAN_FRONTEND=noninteractive

RUN apt-get update \
&& apt-get install -y wget \
&& apt-get install -y libgomp1 \
&& apt-get install -y build-essential \
&& apt-get install -y libz-dev \
&& apt-get install -y libbz2-dev \
&& apt-get install -y liblzma-dev \
&& apt-get install -y pkg-config \
&& apt-get install -y tabix \
&& apt-get install -y python3 \
&& apt-get install -y python3-pip \
&& apt-get install -y software-properties-common cmake \
&& rm -rf /var/lib/apt/lists/*

RUN pip3 install cget
RUN wget https://github.com/statgen/Minimac4/archive/v1.0.2.tar.gz \
&& tar xf v1.0.2.tar.gz \
&& cd Minimac4-1.0.2 \
&& bash install.sh \
&& cp release-build/minimac4 .. \
&& cd .. \
&& rm -r v1.0.2.tar.gz

RUN wget https://github.com/samtools/bcftools/releases/download/1.10.2/bcftools-1.10.2.tar.bz2 \
&& tar xf bcftools-1.10.2.tar.bz2 \
&& cd bcftools-1.10.2 \
&& ./configure \
&& make \
&& make install \
&& cd ../ \
&& rm -r bcftools-1.10.2.tar.bz2
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Build docker image from Dockerfile in this directory
# This is to help keep track of versions, etc.
# Wraps around ImputationPipeline/build_push_docker.sh.

dockerfile_directory=imputation_minimac_docker
image_version=v1.0.0 # as of Jan 25 2021
ubuntu_version=20.04 # as of Jan 25 2021

wd=$(pwd)
cd "$(dirname $0)" || exit

../build_push_docker.sh \
--directory ${dockerfile_directory} \
--ubuntu-version ${ubuntu_version} \
--image-version-tag ${image_version} \
# --dry-run


cd "${wd}" || exit
8 changes: 8 additions & 0 deletions pipelines/broad/arrays/imputation/Imputation.changelog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# 1.0.0

2021-09-09 (Date of Last Commit)

* Initial public release of the Imputation pipeline. Read more in the [Imputation pipeline overview](https://broadinstitute.github.io/warp/docs/Pipelines/Imputation_Pipeline/README).


* The Imputation pipeline imputes missing genotypes from either a multi-sample VCF or an array of single sample VCFs using a large genomic reference panel. It is based on the Michigan Imputation Server pipeline. Overall, the pipeline filters, phases, and performs imputation on a multi-sample VCF. It outputs the imputed VCF along with key imputation metrics.
5 changes: 5 additions & 0 deletions pipelines/broad/arrays/imputation/Imputation.options.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"read_from_cache": true,
"write_to_cache": true,
"monitoring_script": "gs://broad-gotc-test-storage/cromwell_monitoring_script.sh"
}
Loading

0 comments on commit ebb1823

Please sign in to comment.