Skip to content

Commit

Permalink
add dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
bestpredicts committed Apr 18, 2023
1 parent 0bba376 commit 54d3b27
Showing 1 changed file with 56 additions and 0 deletions.
56 changes: 56 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_22-08.html#rel_22-08
FROM nvcr.io/nvidia/pytorch:22.08-py3
LABEL maintainer="BELLE"

ARG DEBIAN_FRONTEND=noninteractive

ARG PYTORCH='2.0.0'
# Example: `cu102`, `cu113`, etc.
ARG CUDA='cu117'

RUN apt -y update
RUN apt install -y libaio-dev
RUN python3 -m pip install --no-cache-dir --upgrade pip
RUN python3 -m pip install -U pip
RUN python3 -m pip config set global.index-url http://mirrors.aliyun.com/pypi/simple
RUN python3 -m pip config set install.trusted-host mirrors.aliyun.com

ARG REF=main
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF

# Install latest release PyTorch
# (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.)
# (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops)
RUN python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA

RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing]

# Uninstall `torch-tensorrt` shipped with the base image
RUN python3 -m pip uninstall -y torch-tensorrt

# recompile apex
RUN python3 -m pip uninstall -y apex
RUN git clone https://github.com/NVIDIA/apex
# `MAX_JOBS=1` disables parallel building to avoid cpu memory OOM when building image on GitHub Action (standard) runners
RUN cd apex && MAX_JOBS=1 python3 -m pip install --global-option="--cpp_ext" --global-option="--cuda_ext" --no-cache -v --disable-pip-version-check .

# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout)
RUN python3 -m pip uninstall -y deepspeed
# This has to be run (again) inside the GPU VMs running the tests.
# The installation works here, but some tests fail, if we don't pre-build deepspeed again in the VMs running the tests.
# TODO: Find out why test fail.
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1

# When installing in editable mode, `transformers` is not recognized as a package.
# this line must be added in order for python to be aware of transformers.
RUN cd transformers && python3 setup.py develop

# The base image ships with `pydantic==1.8.2` which is not working - i.e. the next command fails
RUN python3 -m pip install -U --no-cache-dir pydantic
RUN python3 -c "from deepspeed.launcher.runner import main"

#customer
RUN python3 -m pip install datasets>=2.8.0
RUN python3 -m pip install sentencepiece>=0.1.97
RUN python3 -m pip install protobuf==3.20.3
RUN python3 -m pip install accelerate>=0.15.0

0 comments on commit 54d3b27

Please sign in to comment.