forked from MeetKai/functionary
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f4d7d80
commit 2739d4d
Showing
7 changed files
with
100 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
Dockerfile | ||
Dockerfile.cpu |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[submodule "vllm"] | ||
path = vllm | ||
url = https://github.com/vllm-project/vllm.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# This vLLM Dockerfile is used to construct image that can build and run vLLM on x86 CPU platform. | ||
|
||
FROM ubuntu:22.04 AS cpu-test-1 | ||
|
||
RUN --mount=type=cache,target=/var/cache/apt \ | ||
apt-get update -y \ | ||
&& apt-get install -y curl ccache git wget vim numactl gcc-12 g++-12 python3 python3-pip libtcmalloc-minimal4 libnuma-dev \ | ||
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 | ||
|
||
# https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html | ||
# intel-openmp provides additional performance improvement vs. openmp | ||
# tcmalloc provides better memory allocation efficiency, e.g, holding memory in caches to speed up access of commonly-used objects. | ||
RUN --mount=type=cache,target=/root/.cache/pip \ | ||
pip install intel-openmp | ||
|
||
ENV LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:/usr/local/lib/libiomp5.so" | ||
|
||
RUN echo 'ulimit -c 0' >> ~/.bashrc | ||
|
||
RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.4.0%2Bgitfbaa4bc-cp310-cp310-linux_x86_64.whl | ||
|
||
ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu | ||
RUN --mount=type=cache,target=/root/.cache/pip \ | ||
--mount=type=bind,src=vllm/requirements-build.txt,target=requirements-build.txt \ | ||
pip install --upgrade pip && \ | ||
pip install -r requirements-build.txt | ||
|
||
FROM cpu-test-1 AS build | ||
|
||
WORKDIR /workspace/vllm | ||
|
||
RUN --mount=type=cache,target=/root/.cache/pip \ | ||
--mount=type=bind,src=vllm/requirements-common.txt,target=requirements-common.txt \ | ||
--mount=type=bind,src=vllm/requirements-cpu.txt,target=requirements-cpu.txt \ | ||
pip install -v -r requirements-cpu.txt | ||
|
||
COPY ./vllm ./ | ||
|
||
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... | ||
ARG VLLM_CPU_DISABLE_AVX512 | ||
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} | ||
|
||
ENV CCACHE_DIR=/root/.cache/ccache | ||
RUN --mount=type=cache,target=/root/.cache/pip \ | ||
--mount=type=cache,target=/root/.cache/ccache \ | ||
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel && \ | ||
pip install dist/*.whl | ||
|
||
WORKDIR /workspace/ | ||
|
||
RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks | ||
|
||
WORKDIR /app/ | ||
RUN pip show vllm | grep Version | awk '{print $2}' | ||
|
||
COPY requirements.cpu.txt /app/requirements.txt | ||
|
||
RUN python3.10 -m pip install -r requirements.txt | ||
# RUN pip show torch && sleep 60 | ||
COPY . /app | ||
|
||
# CMD python3.10 server_vllm.py --model "hivata/functionary-small-v2.1-AWQ" --host 0.0.0.0 --trust-remote-code | ||
ENTRYPOINT [ "python3.10", "server_vllm.py" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
fastapi | ||
uvicorn | ||
pydantic | ||
scipy | ||
jsonref | ||
requests | ||
PyYAML | ||
typer | ||
protobuf | ||
triton |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters