forked from LianjiaTech/BELLE
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0bba376
commit 54d3b27
Showing
1 changed file
with
56 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel_22-08.html#rel_22-08 | ||
FROM nvcr.io/nvidia/pytorch:22.08-py3 | ||
LABEL maintainer="BELLE" | ||
|
||
ARG DEBIAN_FRONTEND=noninteractive | ||
|
||
ARG PYTORCH='2.0.0' | ||
# Example: `cu102`, `cu113`, etc. | ||
ARG CUDA='cu117' | ||
|
||
RUN apt -y update | ||
RUN apt install -y libaio-dev | ||
RUN python3 -m pip install --no-cache-dir --upgrade pip | ||
RUN python3 -m pip install -U pip | ||
RUN python3 -m pip config set global.index-url http://mirrors.aliyun.com/pypi/simple | ||
RUN python3 -m pip config set install.trusted-host mirrors.aliyun.com | ||
|
||
ARG REF=main | ||
RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF | ||
|
||
# Install latest release PyTorch | ||
# (PyTorch must be installed before pre-compiling any DeepSpeed c++/cuda ops.) | ||
# (https://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops) | ||
RUN python3 -m pip install --no-cache-dir -U torch==$PYTORCH torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/$CUDA | ||
|
||
RUN python3 -m pip install --no-cache-dir ./transformers[deepspeed-testing] | ||
|
||
# Uninstall `torch-tensorrt` shipped with the base image | ||
RUN python3 -m pip uninstall -y torch-tensorrt | ||
|
||
# recompile apex | ||
RUN python3 -m pip uninstall -y apex | ||
RUN git clone https://github.com/NVIDIA/apex | ||
# `MAX_JOBS=1` disables parallel building to avoid cpu memory OOM when building image on GitHub Action (standard) runners | ||
RUN cd apex && MAX_JOBS=1 python3 -m pip install --global-option="--cpp_ext" --global-option="--cuda_ext" --no-cache -v --disable-pip-version-check . | ||
|
||
# Pre-build **latest** DeepSpeed, so it would be ready for testing (otherwise, the 1st deepspeed test will timeout) | ||
RUN python3 -m pip uninstall -y deepspeed | ||
# This has to be run (again) inside the GPU VMs running the tests. | ||
# The installation works here, but some tests fail, if we don't pre-build deepspeed again in the VMs running the tests. | ||
# TODO: Find out why test fail. | ||
RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_UTILS=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1 | ||
|
||
# When installing in editable mode, `transformers` is not recognized as a package. | ||
# this line must be added in order for python to be aware of transformers. | ||
RUN cd transformers && python3 setup.py develop | ||
|
||
# The base image ships with `pydantic==1.8.2` which is not working - i.e. the next command fails | ||
RUN python3 -m pip install -U --no-cache-dir pydantic | ||
RUN python3 -c "from deepspeed.launcher.runner import main" | ||
|
||
#customer | ||
RUN python3 -m pip install datasets>=2.8.0 | ||
RUN python3 -m pip install sentencepiece>=0.1.97 | ||
RUN python3 -m pip install protobuf==3.20.3 | ||
RUN python3 -m pip install accelerate>=0.15.0 |