pytorch-integration-testing/.github/workflows/vllm-ci-test.yml at d7a991c72d29eba89db4c59dcecd3551a04d3cfb · pytorch/pytorch-integration-testing · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
name: Run vLLM tests

on:
  schedule:
    # Run every 4 hours
    - cron: '0 */4 * * *'
  workflow_dispatch:
    inputs:
      vllm_branch:
        description: vLLM branch (main, releases/vERSION for release validation, or refs/pull/PR_NUMBER/head for pre-merge check on pull request)
        required: true
        type: string
        default: main
      vllm_commit:
        description: vLLM commit (optional, default to the latest commit in the branch that has not yet been benchmarked)
        required: false
        type: string
  pull_request:
    paths:
      - .github/workflows/vllm-ci-test.yml
      - .github/scripts/run_vllm_tests.sh

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
  cancel-in-progress: true

jobs:
  test:
    name: Run vLLM tests
    if: ${{ !github.event.pull_request.head.repo.fork && github.repository_owner == 'pytorch' }}
    strategy:
      fail-fast: false
      matrix:
        include:
          # TODO (huydhn): Figure out later if we need to scale this up to multiple runners
          - runs-on: linux.aws.h100.4
            device-name: cuda
    permissions:
      id-token: write
      contents: read
    runs-on: ${{ matrix.runs-on }}
    environment: pytorch-x-vllm
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

      - name: Checkout vLLM repository
        uses: actions/checkout@v6
        with:
          repository: vllm-project/vllm
          path: vllm
          ref: ${{ inputs.vllm_branch || 'main' }}
          fetch-depth: 0

      - name: Set Docker registry
        shell: bash
        env:
          HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
          DEVICE_NAME: ${{ matrix.device-name }}
        run: |
          set -eux

          # Mimic the logic from vllm ci-infra test template
          if [[ "${HEAD_BRANCH}" == "main" ]]; then
            DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-postmerge-repo
          else
            DOCKER_IMAGE_PREFIX=public.ecr.aws/q9t5s3a7/vllm-ci-test-repo
          fi

          DOCKER_IMAGE_SUFFIX=""
          if [[ "${DEVICE_NAME}" == "rocm" ]]; then
            DOCKER_IMAGE_PREFIX=docker.io/rocm/vllm-ci
          elif [[ "${DEVICE_NAME}" == "cpu" ]]; then
            DOCKER_IMAGE_SUFFIX=-cpu
          fi
          echo "DOCKER_IMAGE_PREFIX=$DOCKER_IMAGE_PREFIX" >> $GITHUB_ENV
          echo "DOCKER_IMAGE_SUFFIX=$DOCKER_IMAGE_SUFFIX" >> $GITHUB_ENV

      - name: Check for available Docker image
        working-directory: vllm
        env:
          HEAD_BRANCH: ${{ inputs.vllm_branch || 'main' }}
          HEAD_SHA: ${{ inputs.vllm_commit || '' }}
        run: |
          set -eux

          if [[ -z "${HEAD_SHA}" ]]; then
            # Looking back the latest 100 commits is enough
            for i in {0..99}
            do
              # Check if the image is there, if it doesn't then check an older one
              # because the commit is too recent
              HEAD_SHA=$(git rev-parse --verify HEAD~${i})
              DOCKER_IMAGE="${DOCKER_IMAGE_PREFIX}:${HEAD_SHA}${DOCKER_IMAGE_SUFFIX}"

              # No Docker image available yet because the commit is too recent
              if docker manifest inspect "${DOCKER_IMAGE}"; then
                break
              fi
            done
          fi

          echo "HEAD_SHA=$HEAD_SHA" >> $GITHUB_ENV

      - name: Setup CUDA GPU_FLAG for docker run
        if: matrix.device-name == 'cuda'
        run: |
          echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}"

      - name: Setup ROCm
        if: matrix.device-name == 'rocm'
        uses: pytorch/pytorch/./.github/actions/setup-rocm@main

      - name: Run vLLM tests
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          DOCKER_IMAGE: ${{ env.DOCKER_IMAGE_PREFIX }}:${{ env.HEAD_SHA }}${{ env.DOCKER_IMAGE_SUFFIX }}
        run: |
          set -eux

          container_name=$(docker run \
            ${GPU_FLAG:-} \
            -e HF_TOKEN \
            --ipc=host \
            --tty \
            --detach \
            --security-opt seccomp=unconfined \
            --shm-size=4g \
            -v "${GITHUB_WORKSPACE}:/tmp/workspace" \
            -w /tmp/workspace \
            "${DOCKER_IMAGE}"
          )
          docker exec -t "${container_name}" bash -c "bash .github/scripts/run_vllm_tests.sh"