Skip to content

Commit

Permalink
tet commit
Browse files Browse the repository at this point in the history
  • Loading branch information
YaoJiayi committed May 28, 2024
1 parent 67883dd commit 2000655
Show file tree
Hide file tree
Showing 384 changed files with 63,224 additions and 0 deletions.
23 changes: 23 additions & 0 deletions et --soft HEAD~1d
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
commit e50b714436f4c1512416dea693d4dd85a209ecda (HEAD -> main, origin/main, origin/HEAD)
Author: YaoJiayi <[email protected]>
Date: Tue May 28 11:20:34 2024 -0500

test commit

commit 67883dd49e43a7bbdd427aba40acf409cbfa1f22
Author: Jiayi Yao <[email protected]>
Date: Sun May 26 00:12:54 2024 -0500

Update README.md

commit 99d43ec3c98766f149201b8e74a2604db072c9d5
Author: Jiayi Yao <[email protected]>
Date: Sun May 26 00:12:28 2024 -0500

Update README.md

commit 4bda3004445b16bbd12cbfac9ab8c577d1ca90b8
Author: Jiayi Yao <[email protected]>
Date: Sun May 26 00:08:53 2024 -0500

Initial commit
Empty file added examples/fuse_gen.py
Empty file.
Empty file added examples/normal_gen.py
Empty file.
Empty file added examples/preprocess.py
Empty file.
Empty file added inputs/1.json
Empty file.
69 changes: 69 additions & 0 deletions vllm_fuse/.buildkite/run-benchmarks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# This script is run by buildkite to run the benchmarks and upload the results to buildkite

set -ex
set -o pipefail

# cd into parent directory of this file
cd "$(dirname "${BASH_SOURCE[0]}")/.."

(which wget && which curl) || (apt-get update && apt-get install -y wget curl)

# run python-based benchmarks and upload the result to buildkite
python3 benchmarks/benchmark_latency.py 2>&1 | tee benchmark_latency.txt
bench_latency_exit_code=$?

python3 benchmarks/benchmark_throughput.py --input-len 256 --output-len 256 2>&1 | tee benchmark_throughput.txt
bench_throughput_exit_code=$?

# run server-based benchmarks and upload the result to buildkite
python3 -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-chat-hf &
server_pid=$!
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

# wait for server to start, timeout after 600 seconds
timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
python3 benchmarks/benchmark_serving.py \
--backend openai \
--dataset ./ShareGPT_V3_unfiltered_cleaned_split.json \
--model meta-llama/Llama-2-7b-chat-hf \
--num-prompts 20 \
--endpoint /v1/completions \
--tokenizer meta-llama/Llama-2-7b-chat-hf \
--save-result \
2>&1 | tee benchmark_serving.txt
bench_serving_exit_code=$?
kill $server_pid

# write the results into a markdown file
echo "### Latency Benchmarks" >> benchmark_results.md
sed -n '1p' benchmark_latency.txt >> benchmark_results.md # first line
echo "" >> benchmark_results.md
sed -n '$p' benchmark_latency.txt >> benchmark_results.md # last line

echo "### Throughput Benchmarks" >> benchmark_results.md
sed -n '1p' benchmark_throughput.txt >> benchmark_results.md # first line
echo "" >> benchmark_results.md
sed -n '$p' benchmark_throughput.txt >> benchmark_results.md # last line

echo "### Serving Benchmarks" >> benchmark_results.md
sed -n '1p' benchmark_serving.txt >> benchmark_results.md # first line
echo "" >> benchmark_results.md
tail -n 13 benchmark_serving.txt >> benchmark_results.md # last 13 lines

# upload the results to buildkite
/workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md

# exit with the exit code of the benchmarks
if [ $bench_latency_exit_code -ne 0 ]; then
exit $bench_latency_exit_code
fi

if [ $bench_throughput_exit_code -ne 0 ]; then
exit $bench_throughput_exit_code
fi

if [ $bench_serving_exit_code -ne 0 ]; then
exit $bench_serving_exit_code
fi

/workspace/buildkite-agent artifact upload openai-*.json
75 changes: 75 additions & 0 deletions vllm_fuse/.buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# In this file, you can add more tests to run either by adding a new step or
# adding a new command to an existing step. See different options here for examples.
# This script will be feed into Jinja template in `test-template.j2` to generate
# the final pipeline yaml file.

steps:
- label: Regression Test
command: pytest -v -s test_regression.py
working_dir: "/vllm-workspace/tests" # optional

- label: AsyncEngine Test
command: pytest -v -s async_engine

- label: Basic Correctness Test
command: pytest -v -s --forked basic_correctness

- label: Core Test
command: pytest -v -s core

- label: Distributed Comm Ops Test
command: pytest -v -s --forked test_comm_ops.py
working_dir: "/vllm-workspace/tests/distributed"
num_gpus: 2 # only support 1 or 2 for now.

- label: Distributed Correctness Test
command: pytest -v -s --forked test_basic_distributed_correctness.py
working_dir: "/vllm-workspace/tests/distributed"
num_gpus: 2 # only support 1 or 2 for now.

- label: Engine Test
command: pytest -v -s engine test_sequence.py

- label: Entrypoints Test
command: pytest -v -s entrypoints

- label: Kernels Test
command: pytest -v -s kernels
soft_fail: true

- label: Models Test
commands:
- pytest -v -s models --forked
soft_fail: true

- label: Prefix Caching Test
commands:
- pytest -v -s prefix_caching

- label: Samplers Test
command: pytest -v -s samplers --forked

- label: Worker Test
command: pytest -v -s worker

- label: Speculative decoding tests
command: pytest -v -s spec_decode

- label: LoRA Test
command: pytest -v -s lora --forked

- label: Metrics Test
command: pytest -v -s metrics

- label: Benchmarks
working_dir: "/vllm-workspace/.buildkite"
commands:
- pip install aiohttp
- bash run-benchmarks.sh

- label: Documentation Build
working_dir: "/vllm-workspace/docs"
no_gpu: True
commands:
- pip install -r requirements-docs.txt
- SPHINXOPTS=\"-W\" make html
56 changes: 56 additions & 0 deletions vllm_fuse/.buildkite/test-template.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{% set docker_image = "us-central1-docker.pkg.dev/vllm-405802/vllm-ci-test-repo/vllm-test:$BUILDKITE_COMMIT" %}
{% set default_num_gpu = 1 %}
{% set default_working_dir = "/vllm-workspace/tests" %}

steps:
- label: ":docker: build image"
commands:
- "docker build --build-arg max_jobs=16 --tag {{ docker_image }} --target test --progress plain ."
- "docker push {{ docker_image }}"
env:
DOCKER_BUILDKIT: "1"
retry:
automatic:
- exit_status: -1 # Agent was lost
limit: 5
- wait

{% for step in steps %}
- label: "{{ step.label }}"
agents:
queue: kubernetes
soft_fail: {{ step.soft_fail or false }}
retry:
automatic:
- exit_status: -1 # Agent was lost
limit: 5
plugins:
- kubernetes:
podSpec:
volumes:
- name: dshm
emptyDir:
medium: Memory
containers:
- image: "{{ docker_image }}"
command: ["bash"]
args:
- '-c'
- "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'"
{% if not step.no_gpu %}
resources:
requests:
nvidia.com/gpu: "{{ step.num_gpus or default_num_gpu }}"
limits:
nvidia.com/gpu: "{{ step.num_gpus or default_num_gpu }}"
{% endif %}
env:
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: hf-token-secret
key: token
volumeMounts:
- mountPath: /dev/shm
name: dshm
{% endfor %}
1 change: 1 addition & 0 deletions vllm_fuse/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
vllm/*.so
102 changes: 102 additions & 0 deletions vllm_fuse/.github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# This workflow will upload a Python Package to Release asset
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions

name: Create Release

on:
push:
tags:
- v*

# Needed to create release and upload assets
permissions:
contents: write

jobs:
release:
# Retrieve tag and create release
name: Create Release
runs-on: ubuntu-latest
outputs:
upload_url: ${{ steps.create_release.outputs.upload_url }}
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Extract branch info
shell: bash
run: |
echo "release_tag=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
- name: Create Release
id: create_release
uses: "actions/github-script@v6"
env:
RELEASE_TAG: ${{ env.release_tag }}
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
script: |
const script = require('.github/workflows/scripts/create_release.js')
await script(github, context, core)
wheel:
name: Build Wheel
runs-on: ${{ matrix.os }}
needs: release

strategy:
fail-fast: false
matrix:
os: ['ubuntu-20.04']
python-version: ['3.8', '3.9', '3.10', '3.11']
pytorch-version: ['2.1.2'] # Must be the most recent version that meets requirements.txt.
cuda-version: ['11.8', '12.1']

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Set up Linux Env
if: ${{ runner.os == 'Linux' }}
run: |
bash -x .github/workflows/scripts/env.sh
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Install CUDA ${{ matrix.cuda-version }}
run: |
bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
- name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }}
run: |
bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.pytorch-version }} ${{ matrix.cuda-version }}
- name: Build wheel
shell: bash
run: |
bash -x .github/workflows/scripts/build.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }}
wheel_name=$(ls dist/*whl | xargs -n 1 basename)
asset_name=${wheel_name//"linux"/"manylinux1"}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
echo "asset_name=${asset_name}" >> $GITHUB_ENV
- name: Upload Release Asset
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ needs.release.outputs.upload_url }}
asset_path: ./dist/${{ env.wheel_name }}
asset_name: ${{ env.asset_name }}
asset_content_type: application/*

# (Danielkinz): This last step will publish the .whl to pypi. Warning: untested
# - name: Publish package
# uses: pypa/gh-action-pypi-publish@release/v1.8
# with:
# repository-url: https://test.pypi.org/legacy/
# password: ${{ secrets.PYPI_API_TOKEN }}
# skip-existing: true
34 changes: 34 additions & 0 deletions vllm_fuse/.github/workflows/ruff.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: ruff

on:
# Trigger the workflow on push or pull request,
# but only for the main branch
push:
branches:
- main
pull_request:
branches:
- main

jobs:
ruff:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff==0.1.5 codespell==2.2.6 tomli==2.0.1
- name: Analysing the code with ruff
run: |
ruff vllm tests
- name: Spelling check with codespell
run: |
codespell --toml pyproject.toml
20 changes: 20 additions & 0 deletions vllm_fuse/.github/workflows/scripts/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

python_executable=python$1
cuda_home=/usr/local/cuda-$2

# Update paths
PATH=${cuda_home}/bin:$PATH
LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH

# Install requirements
$python_executable -m pip install wheel packaging
$python_executable -m pip install -r requirements.txt

# Limit the number of parallel jobs to avoid OOM
export MAX_JOBS=1
# Make sure punica is built for the release (for LoRA)
export VLLM_INSTALL_PUNICA_KERNELS=1

# Build
$python_executable setup.py bdist_wheel --dist-dir=dist
Loading

0 comments on commit 2000655

Please sign in to comment.