-
Notifications
You must be signed in to change notification settings - Fork 27
133 lines (118 loc) · 4.39 KB
/
vllm-ci-test.yml
File metadata and controls
133 lines (118 loc) · 4.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
name: Run vLLM tests
on:
schedule:
# Run every 4 hours
- cron: '0 */4 * * *'
workflow_dispatch:
inputs:
vllm_branch:
description: vLLM branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER/head for pre-merge check on pull request)
required: true
type: string
default: main
vllm_commit:
description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked)
required: false
type: string
pull_request:
paths:
- .github/workflows/vllm-ci-test.yml
- .github/scripts/run_vllm_tests.sh
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
jobs:
test:
name: Run vLLM tests
if: ${{ !github.event.pull_request.head.repo.fork && github.repository_owner == 'pytorch' }}
strategy:
fail-fast: false
matrix:
include:
# TODO (huydhn): Figure out later if we need to scale this up to multiple runners
- runs-on: linux.aws.h100.4
device-name: cuda
permissions:
id-token: write
contents: read
runs-on: ${{ matrix.runs-on }}
environment: pytorch-x-vllm
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Checkout vLLM repository
uses: actions/checkout@v6
with:
repository: vllm-project/vllm
path: vllm
ref: ${{ inputs.vllm_branch || 'main' }}
fetch-depth: 0
- name: Set Docker registry
shell: bash
env:
HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
DEVICE_NAME: ${{ matrix.device-name }}
run: |
set -eux
# Mimic the logic from vllm ci-infra test template
if [[ "${HEAD_BRANCH}" == "main" ]]; then
DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
else
DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-test-repo
fi
DOCKER_IMAGE_SUFFIX=""
if [[ "${DEVICE_NAME}" == "rocm" ]]; then
DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
DOCKER_IMAGE_SUFFIX=-cpu
fi
echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
echo "DOCKER_IMAGE_SUFFIX=$DOCKER_IMAGE_SUFFIX" >> $GITHUB_ENV
- name: Check for available Docker image
working-directory: vllm
env:
HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
HEAD_SHA: ${{ inputs.vllm_commit || '' }}
run: |
set -eux
if [[ -z "${HEAD_SHA}" ]]; then
# Looking back the latest 100 commits is enough
for i in {0..99}
do
# Check if the image is there, if it doesn't then check an older one
# because the commit is too recent
HEAD_SHA=$(git rev-parse --verify HEAD~${i})
DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"
# No Docker image available yet because the commit is too recent
if docker manifest inspect "${DOCKER_IMAGE}"; then
break
fi
done
fi
echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV
- name: Setup CUDA GPU_FLAG for docker run
if: matrix.device-name == 'cuda'
run: |
echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"
- name: Setup ROCm
if: matrix.device-name == 'rocm'
uses: pytorch/pytorch/./.github/actions/setup-rocm@main
- name: Run vLLM tests
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }}
run: |
set -eux
container_name=$(docker run \
${GPU_FLAG:-} \
-e HF_TOKEN \
--ipc=host \
--tty \
--detach \
--security-opt seccomp=unconfined \
--shm-size=4g \
-v "${GITHUB_WORKSPACE}:/tmp/workspace" \
-w /tmp/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" bash -c "bash .github/scripts/run_vllm_tests.sh"