-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
384 changed files
with
63,224 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
[33mcommit e50b714436f4c1512416dea693d4dd85a209ecda[m[33m ([m[1;36mHEAD -> [m[1;32mmain[m[33m, [m[1;31morigin/main[m[33m, [m[1;31morigin/HEAD[m[33m)[m | ||
Author: YaoJiayi <[email protected]> | ||
Date: Tue May 28 11:20:34 2024 -0500 | ||
|
||
test commit | ||
|
||
[33mcommit 67883dd49e43a7bbdd427aba40acf409cbfa1f22[m | ||
Author: Jiayi Yao <[email protected]> | ||
Date: Sun May 26 00:12:54 2024 -0500 | ||
|
||
Update README.md | ||
|
||
[33mcommit 99d43ec3c98766f149201b8e74a2604db072c9d5[m | ||
Author: Jiayi Yao <[email protected]> | ||
Date: Sun May 26 00:12:28 2024 -0500 | ||
|
||
Update README.md | ||
|
||
[33mcommit 4bda3004445b16bbd12cbfac9ab8c577d1ca90b8[m | ||
Author: Jiayi Yao <[email protected]> | ||
Date: Sun May 26 00:08:53 2024 -0500 | ||
|
||
Initial commit |
Empty file.
Empty file.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
# This script is run by buildkite to run the benchmarks and upload the results to buildkite | ||
|
||
set -ex | ||
set -o pipefail | ||
|
||
# cd into parent directory of this file | ||
cd "$(dirname "${BASH_SOURCE[0]}")/.." | ||
|
||
(which wget && which curl) || (apt-get update && apt-get install -y wget curl) | ||
|
||
# run python-based benchmarks and upload the result to buildkite | ||
python3 benchmarks/benchmark_latency.py 2>&1 | tee benchmark_latency.txt | ||
bench_latency_exit_code=$? | ||
|
||
python3 benchmarks/benchmark_throughput.py --input-len 256 --output-len 256 2>&1 | tee benchmark_throughput.txt | ||
bench_throughput_exit_code=$? | ||
|
||
# run server-based benchmarks and upload the result to buildkite | ||
python3 -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-chat-hf & | ||
server_pid=$! | ||
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json | ||
|
||
# wait for server to start, timeout after 600 seconds | ||
timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1 | ||
python3 benchmarks/benchmark_serving.py \ | ||
--backend openai \ | ||
--dataset ./ShareGPT_V3_unfiltered_cleaned_split.json \ | ||
--model meta-llama/Llama-2-7b-chat-hf \ | ||
--num-prompts 20 \ | ||
--endpoint /v1/completions \ | ||
--tokenizer meta-llama/Llama-2-7b-chat-hf \ | ||
--save-result \ | ||
2>&1 | tee benchmark_serving.txt | ||
bench_serving_exit_code=$? | ||
kill $server_pid | ||
|
||
# write the results into a markdown file | ||
echo "### Latency Benchmarks" >> benchmark_results.md | ||
sed -n '1p' benchmark_latency.txt >> benchmark_results.md # first line | ||
echo "" >> benchmark_results.md | ||
sed -n '$p' benchmark_latency.txt >> benchmark_results.md # last line | ||
|
||
echo "### Throughput Benchmarks" >> benchmark_results.md | ||
sed -n '1p' benchmark_throughput.txt >> benchmark_results.md # first line | ||
echo "" >> benchmark_results.md | ||
sed -n '$p' benchmark_throughput.txt >> benchmark_results.md # last line | ||
|
||
echo "### Serving Benchmarks" >> benchmark_results.md | ||
sed -n '1p' benchmark_serving.txt >> benchmark_results.md # first line | ||
echo "" >> benchmark_results.md | ||
tail -n 13 benchmark_serving.txt >> benchmark_results.md # last 13 lines | ||
|
||
# upload the results to buildkite | ||
/workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md | ||
|
||
# exit with the exit code of the benchmarks | ||
if [ $bench_latency_exit_code -ne 0 ]; then | ||
exit $bench_latency_exit_code | ||
fi | ||
|
||
if [ $bench_throughput_exit_code -ne 0 ]; then | ||
exit $bench_throughput_exit_code | ||
fi | ||
|
||
if [ $bench_serving_exit_code -ne 0 ]; then | ||
exit $bench_serving_exit_code | ||
fi | ||
|
||
/workspace/buildkite-agent artifact upload openai-*.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# In this file, you can add more tests to run either by adding a new step or | ||
# adding a new command to an existing step. See different options here for examples. | ||
# This script will be feed into Jinja template in `test-template.j2` to generate | ||
# the final pipeline yaml file. | ||
|
||
steps: | ||
- label: Regression Test | ||
command: pytest -v -s test_regression.py | ||
working_dir: "/vllm-workspace/tests" # optional | ||
|
||
- label: AsyncEngine Test | ||
command: pytest -v -s async_engine | ||
|
||
- label: Basic Correctness Test | ||
command: pytest -v -s --forked basic_correctness | ||
|
||
- label: Core Test | ||
command: pytest -v -s core | ||
|
||
- label: Distributed Comm Ops Test | ||
command: pytest -v -s --forked test_comm_ops.py | ||
working_dir: "/vllm-workspace/tests/distributed" | ||
num_gpus: 2 # only support 1 or 2 for now. | ||
|
||
- label: Distributed Correctness Test | ||
command: pytest -v -s --forked test_basic_distributed_correctness.py | ||
working_dir: "/vllm-workspace/tests/distributed" | ||
num_gpus: 2 # only support 1 or 2 for now. | ||
|
||
- label: Engine Test | ||
command: pytest -v -s engine test_sequence.py | ||
|
||
- label: Entrypoints Test | ||
command: pytest -v -s entrypoints | ||
|
||
- label: Kernels Test | ||
command: pytest -v -s kernels | ||
soft_fail: true | ||
|
||
- label: Models Test | ||
commands: | ||
- pytest -v -s models --forked | ||
soft_fail: true | ||
|
||
- label: Prefix Caching Test | ||
commands: | ||
- pytest -v -s prefix_caching | ||
|
||
- label: Samplers Test | ||
command: pytest -v -s samplers --forked | ||
|
||
- label: Worker Test | ||
command: pytest -v -s worker | ||
|
||
- label: Speculative decoding tests | ||
command: pytest -v -s spec_decode | ||
|
||
- label: LoRA Test | ||
command: pytest -v -s lora --forked | ||
|
||
- label: Metrics Test | ||
command: pytest -v -s metrics | ||
|
||
- label: Benchmarks | ||
working_dir: "/vllm-workspace/.buildkite" | ||
commands: | ||
- pip install aiohttp | ||
- bash run-benchmarks.sh | ||
|
||
- label: Documentation Build | ||
working_dir: "/vllm-workspace/docs" | ||
no_gpu: True | ||
commands: | ||
- pip install -r requirements-docs.txt | ||
- SPHINXOPTS=\"-W\" make html |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
{% set docker_image = "us-central1-docker.pkg.dev/vllm-405802/vllm-ci-test-repo/vllm-test:$BUILDKITE_COMMIT" %} | ||
{% set default_num_gpu = 1 %} | ||
{% set default_working_dir = "/vllm-workspace/tests" %} | ||
|
||
steps: | ||
- label: ":docker: build image" | ||
commands: | ||
- "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ." | ||
- "docker push {{ docker_image }}" | ||
env: | ||
DOCKER_BUILDKIT: "1" | ||
retry: | ||
automatic: | ||
- exit_status: -1 # Agent was lost | ||
limit: 5 | ||
- wait | ||
|
||
{% for step in steps %} | ||
- label: "{{ step.label }}" | ||
agents: | ||
queue: kubernetes | ||
soft_fail: {{ step.soft_fail or false }} | ||
retry: | ||
automatic: | ||
- exit_status: -1 # Agent was lost | ||
limit: 5 | ||
plugins: | ||
- kubernetes: | ||
podSpec: | ||
volumes: | ||
- name: dshm | ||
emptyDir: | ||
medium: Memory | ||
containers: | ||
- image: "{{ docker_image }}" | ||
command: ["bash"] | ||
args: | ||
- '-c' | ||
- "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'" | ||
{% if not step.no_gpu %} | ||
resources: | ||
requests: | ||
nvidia.com/gpu: "{{ step.num_gpus or default_num_gpu }}" | ||
limits: | ||
nvidia.com/gpu: "{{ step.num_gpus or default_num_gpu }}" | ||
{% endif %} | ||
env: | ||
- name: HF_TOKEN | ||
valueFrom: | ||
secretKeyRef: | ||
name: hf-token-secret | ||
key: token | ||
volumeMounts: | ||
- mountPath: /dev/shm | ||
name: dshm | ||
{% endfor %} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
vllm/*.so |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# This workflow will upload a Python Package to Release asset | ||
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions | ||
|
||
name: Create Release | ||
|
||
on: | ||
push: | ||
tags: | ||
- v* | ||
|
||
# Needed to create release and upload assets | ||
permissions: | ||
contents: write | ||
|
||
jobs: | ||
release: | ||
# Retrieve tag and create release | ||
name: Create Release | ||
runs-on: ubuntu-latest | ||
outputs: | ||
upload_url: ${{ steps.create_release.outputs.upload_url }} | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v3 | ||
|
||
- name: Extract branch info | ||
shell: bash | ||
run: | | ||
echo "release_tag=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV | ||
- name: Create Release | ||
id: create_release | ||
uses: "actions/github-script@v6" | ||
env: | ||
RELEASE_TAG: ${{ env.release_tag }} | ||
with: | ||
github-token: "${{ secrets.GITHUB_TOKEN }}" | ||
script: | | ||
const script = require('.github/workflows/scripts/create_release.js') | ||
await script(github, context, core) | ||
wheel: | ||
name: Build Wheel | ||
runs-on: ${{ matrix.os }} | ||
needs: release | ||
|
||
strategy: | ||
fail-fast: false | ||
matrix: | ||
os: ['ubuntu-20.04'] | ||
python-version: ['3.8', '3.9', '3.10', '3.11'] | ||
pytorch-version: ['2.1.2'] # Must be the most recent version that meets requirements.txt. | ||
cuda-version: ['11.8', '12.1'] | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v3 | ||
|
||
- name: Set up Linux Env | ||
if: ${{ runner.os == 'Linux' }} | ||
run: | | ||
bash -x .github/workflows/scripts/env.sh | ||
- name: Set up Python | ||
uses: actions/setup-python@v4 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
|
||
- name: Install CUDA ${{ matrix.cuda-version }} | ||
run: | | ||
bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }} | ||
- name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }} | ||
run: | | ||
bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.pytorch-version }} ${{ matrix.cuda-version }} | ||
- name: Build wheel | ||
shell: bash | ||
run: | | ||
bash -x .github/workflows/scripts/build.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }} | ||
wheel_name=$(ls dist/*whl | xargs -n 1 basename) | ||
asset_name=${wheel_name//"linux"/"manylinux1"} | ||
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV | ||
echo "asset_name=${asset_name}" >> $GITHUB_ENV | ||
- name: Upload Release Asset | ||
uses: actions/upload-release-asset@v1 | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
with: | ||
upload_url: ${{ needs.release.outputs.upload_url }} | ||
asset_path: ./dist/${{ env.wheel_name }} | ||
asset_name: ${{ env.asset_name }} | ||
asset_content_type: application/* | ||
|
||
# (Danielkinz): This last step will publish the .whl to pypi. Warning: untested | ||
# - name: Publish package | ||
# uses: pypa/gh-action-pypi-publish@release/v1.8 | ||
# with: | ||
# repository-url: https://test.pypi.org/legacy/ | ||
# password: ${{ secrets.PYPI_API_TOKEN }} | ||
# skip-existing: true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
name: ruff | ||
|
||
on: | ||
# Trigger the workflow on push or pull request, | ||
# but only for the main branch | ||
push: | ||
branches: | ||
- main | ||
pull_request: | ||
branches: | ||
- main | ||
|
||
jobs: | ||
ruff: | ||
runs-on: ubuntu-latest | ||
strategy: | ||
matrix: | ||
python-version: ["3.10"] | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Set up Python ${{ matrix.python-version }} | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: ${{ matrix.python-version }} | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1 | ||
- name: Analysing the code with ruff | ||
run: | | ||
ruff vllm tests | ||
- name: Spelling check with codespell | ||
run: | | ||
codespell --toml pyproject.toml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/bin/bash | ||
|
||
python_executable=python$1 | ||
cuda_home=/usr/local/cuda-$2 | ||
|
||
# Update paths | ||
PATH=${cuda_home}/bin:$PATH | ||
LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH | ||
|
||
# Install requirements | ||
$python_executable -m pip install wheel packaging | ||
$python_executable -m pip install -r requirements.txt | ||
|
||
# Limit the number of parallel jobs to avoid OOM | ||
export MAX_JOBS=1 | ||
# Make sure punica is built for the release (for LoRA) | ||
export VLLM_INSTALL_PUNICA_KERNELS=1 | ||
|
||
# Build | ||
$python_executable setup.py bdist_wheel --dist-dir=dist |
Oops, something went wrong.