tilegym-ci #307
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # | |
| # SPDX-License-Identifier: MIT | |
| name: tilegym-ci | |
| on: | |
| push: | |
| branches: | |
| - "pull-request/[0-9]+" | |
| schedule: | |
| # Run nightly at 12 PM UTC | |
| - cron: '0 12 * * *' | |
| workflow_dispatch: # Allow manual trigger | |
| permissions: | |
| contents: read | |
| packages: write | |
| pull-requests: read | |
| checks: write | |
| env: | |
| # PR images go to a temp repo, main/nightly go to main repo | |
| IMAGE_NAME_PR: tilegym-pr | |
| IMAGE_NAME_MAIN: tilegym | |
| jobs: | |
| config: | |
| name: parse-ci-config | |
| runs-on: ubuntu-latest | |
| outputs: | |
| build: ${{ steps.parse.outputs.build }} | |
| run_ops: ${{ steps.parse.outputs.run_ops }} | |
| run_benchmark: ${{ steps.parse.outputs.run_benchmark }} | |
| run_sanity: ${{ steps.parse.outputs.run_sanity }} | |
| image_tag: ${{ steps.parse.outputs.image_tag }} | |
| image_name: ${{ steps.parse.outputs.image_name }} | |
| is_pr: ${{ steps.context.outputs.is_pr }} | |
| steps: | |
| - name: Determine context | |
| id: context | |
| run: | | |
| if [[ "${{ github.ref }}" == "refs/heads/main" ]] || [[ "${{ github.event_name }}" == "schedule" ]]; then | |
| echo "is_pr=false" >> $GITHUB_OUTPUT | |
| echo "image_name=${{ env.IMAGE_NAME_MAIN }}" >> $GITHUB_OUTPUT | |
| echo "Running in main/nightly context" | |
| else | |
| echo "is_pr=true" >> $GITHUB_OUTPUT | |
| echo "image_name=${{ env.IMAGE_NAME_PR }}" >> $GITHUB_OUTPUT | |
| echo "Running in PR context" | |
| fi | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Get PR info | |
| id: pr | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| let prBody = ''; | |
| let prNumber = ''; | |
| const branchName = context.ref.replace('refs/heads/', ''); | |
| core.info(`Looking for PR for branch: ${branchName}`); | |
| // Try method 1: Extract PR number from branch name | |
| const branchMatch = branchName.match(/^pull-request\/(\d+)/); | |
| if (branchMatch) { | |
| prNumber = branchMatch[1]; | |
| core.info(`Extracted PR #${prNumber} from branch name`); | |
| // Fetch PR body by number | |
| try { | |
| const { data: pr } = await github.rest.pulls.get({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| pull_number: parseInt(prNumber), | |
| }); | |
| prBody = pr.body || ''; | |
| core.info(`Fetched PR body (${prBody.length} characters)`); | |
| } catch (error) { | |
| core.warning(`Failed to fetch PR #${prNumber}: ${error.message}`); | |
| } | |
| } else { | |
| // Try method 2: Search by branch name | |
| try { | |
| const { data: prs } = await github.rest.pulls.list({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| state: 'open', | |
| head: `${context.repo.owner}:${branchName}`, | |
| }); | |
| if (prs.length > 0) { | |
| prBody = prs[0].body || ''; | |
| prNumber = prs[0].number.toString(); | |
| core.info(`Found PR #${prNumber} via API search`); | |
| core.info(`PR body length: ${prBody.length} characters`); | |
| } else { | |
| core.info(`No open PR found for branch ${branchName}`); | |
| } | |
| } catch (error) { | |
| core.warning(`Error searching for PR: ${error.message}`); | |
| } | |
| } | |
| return { prBody, prNumber }; | |
| - name: Parse config and set image tag | |
| id: parse | |
| env: | |
| PR_BODY: ${{ fromJSON(steps.pr.outputs.result).prBody }} | |
| PR_NUMBER: ${{ fromJSON(steps.pr.outputs.result).prNumber }} | |
| IS_PR: ${{ steps.context.outputs.is_pr }} | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| # Parse CI config from PR body (only for PRs) | |
| if [[ "$IS_PR" == "true" ]]; then | |
| pip install pyyaml --quiet | |
| python3 .github/scripts/parse_pr_config.py | |
| # Set PR-specific image tag | |
| if [ -n "$PR_NUMBER" ]; then | |
| echo "image_tag=pr-${PR_NUMBER}" >> $GITHUB_OUTPUT | |
| echo "Using image tag: pr-${PR_NUMBER}" | |
| else | |
| echo "image_tag=latest" >> $GITHUB_OUTPUT | |
| echo "Using image tag: latest (PR without number)" | |
| fi | |
| else | |
| # Main/nightly: check if image already exists before building | |
| echo "image_tag=${{ github.sha }}" >> $GITHUB_OUTPUT | |
| # Check if 'latest' already points to current SHA (tests passed previously) | |
| OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]') | |
| export REGISTRY_IMAGE="ghcr.io/${OWNER_LOWER}/${{ steps.context.outputs.image_name }}" | |
| export IMAGE_TAG="${{ github.sha }}" | |
| export IS_PR="false" | |
| python3 .github/scripts/check_image_exists.py | |
| # Read the skipped output from check_image_exists.py | |
| if [ -f "$GITHUB_OUTPUT" ] && grep -q "skipped=true" "$GITHUB_OUTPUT"; then | |
| echo "✅ Image already exists and tests passed, skipping build" | |
| echo "build=false" >> $GITHUB_OUTPUT | |
| echo "run_ops=false" >> $GITHUB_OUTPUT | |
| echo "run_benchmark=false" >> $GITHUB_OUTPUT | |
| echo "run_sanity=false" >> $GITHUB_OUTPUT | |
| else | |
| echo "🔨 Building new image and running tests" | |
| echo "build=true" >> $GITHUB_OUTPUT | |
| echo "run_ops=true" >> $GITHUB_OUTPUT | |
| echo "run_benchmark=true" >> $GITHUB_OUTPUT | |
| echo "run_sanity=true" >> $GITHUB_OUTPUT | |
| fi | |
| fi | |
| # Pass through image name from context | |
| echo "image_name=${{ steps.context.outputs.image_name }}" >> $GITHUB_OUTPUT | |
| build-wheel: | |
| name: build-python-wheels | |
| needs: config | |
| if: needs.config.outputs.build == 'true' | |
| uses: ./.github/workflows/build-wheel.yml | |
| with: | |
| package-name: tilegym | |
| # Artifact naming: PR builds -> tilegym-pr-wheel-{sha}, Main -> tilegym-wheel-{sha} | |
| artifact-suffix: ${{ needs.config.outputs.is_pr == 'true' && '-pr' || '' }} | |
| python-versions: '["3.10", "3.11", "3.12"]' # Build for multiple Python versions | |
| architectures: '["x86_64", "arm64"]' # Build for both architectures (6 wheels total) | |
| retention-days: 7 # All wheels kept for 7 days; only tested wheel gets -verified (30 days) | |
| skip-import-test: true # TileGym requires CUDA, test in Docker instead | |
| run-pip-audit: true | |
| check-wheel-contents-ignore: "W002" # Ignore duplicate files from symlinks | |
| # Optional: Override default runners (ubuntu-latest for x86_64, ubuntu-24.04-arm for arm64) | |
| # runner-x86-64: [self-hosted, linux, x64, gpu] | |
| build: | |
| name: build-tilegym-image | |
| needs: [config, build-wheel] | |
| if: needs.config.outputs.build == 'true' | |
| runs-on: ubuntu-latest | |
| # Note: Wheels are built for Python 3.10/3.11/3.12 and x86_64/arm64 (6 wheels total) | |
| # However, Docker build and tests only use Python 3.10 x86_64 wheel | |
| # Only the tested wheel (py310-x86_64) gets marked as "-verified" after tests pass | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download TileGym wheel (Python 3.10, x86_64) | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ${{ needs.build-wheel.outputs.artifact-name }}-py310-x86_64 | |
| path: ./wheel | |
| - name: Set image variables | |
| id: vars | |
| run: | | |
| OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]') | |
| REGISTRY_IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}" | |
| echo "owner_lower=${OWNER_LOWER}" >> $GITHUB_OUTPUT | |
| echo "registry_image=${REGISTRY_IMAGE}" >> $GITHUB_OUTPUT | |
| - name: Free up disk space | |
| run: | | |
| sudo rm -rf /usr/share/dotnet | |
| sudo rm -rf /usr/local/lib/android | |
| sudo rm -rf /opt/ghc | |
| sudo rm -rf /opt/hostedtoolcache/CodeQL | |
| docker system prune -af | |
| df -h | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Generate tags | |
| id: tags | |
| run: | | |
| TAGS="${{ steps.vars.outputs.registry_image }}:${{ needs.config.outputs.image_tag }}" | |
| TAGS="${TAGS},${{ steps.vars.outputs.registry_image }}:${{ github.sha }}" | |
| # Add datetime tag for nightly builds | |
| if [[ "${{ needs.config.outputs.is_pr }}" == "false" ]]; then | |
| DATETIME=$(date -u +%Y%m%d-%H%M%S) | |
| TAGS="${TAGS},${{ steps.vars.outputs.registry_image }}:nightly-${DATETIME}" | |
| fi | |
| echo "tags=${TAGS}" >> $GITHUB_OUTPUT | |
| - name: Build and push Docker image to GHCR | |
| if: steps.check-existing.outputs.skipped != 'true' | |
| uses: docker/build-push-action@v5 | |
| with: | |
| context: . | |
| file: ./modeling/transformers/Dockerfile | |
| target: wheel # Use wheel target for CI builds | |
| tags: ${{ steps.tags.outputs.tags }} | |
| push: true | |
| provenance: false | |
| outputs: type=image,push=true,compression=zstd,compression-level=3 | |
| cache-from: | | |
| type=gha | |
| type=registry,ref=${{ steps.vars.outputs.registry_image }}:latest | |
| type=registry,ref=${{ steps.vars.outputs.registry_image }}:${{ needs.config.outputs.image_tag }} | |
| type=registry,ref=ghcr.io/${{ steps.vars.outputs.owner_lower }}/tilegym:latest | |
| cache-to: type=gha,mode=max | |
| sanity-check: | |
| name: sanity-check | |
| needs: [config, build-wheel] | |
| if: | | |
| always() && | |
| needs.config.outputs.run_sanity == 'true' && | |
| (needs.build-wheel.result == 'success' || needs.build-wheel.result == 'skipped') | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Set up Python 3.10 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.10" | |
| - name: Download wheel (Python 3.10, x86_64) | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ${{ needs.build-wheel.outputs.artifact-name }}-py310-x86_64 | |
| path: ./wheel | |
| - name: pip check | |
| run: | | |
| python -m venv /tmp/sanity-env | |
| /tmp/sanity-env/bin/pip install --quiet ./wheel/*.whl | |
| /tmp/sanity-env/bin/pip check | |
| test-ops: | |
| name: test-ops | |
| needs: [config, build] | |
| timeout-minutes: 40 | |
| if: | | |
| always() && | |
| needs.config.outputs.run_ops == 'true' && | |
| (needs.build.result == 'success' || needs.build.result == 'skipped') | |
| runs-on: linux-amd64-gpu-rtxpro6000-latest-1 | |
| steps: | |
| - name: Checkout code (sparse - need ops tests and shared utilities) | |
| uses: actions/checkout@v4 | |
| with: | |
| sparse-checkout: | | |
| tests | |
| sparse-checkout-cone-mode: false | |
| - name: Create test results directory | |
| run: mkdir -p ${{ github.workspace }}/test-results | |
| - name: Login to GHCR | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Pull and run ops tests | |
| timeout-minutes: 35 | |
| run: | | |
| OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]') | |
| IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}:${{ needs.config.outputs.image_tag }}" | |
| docker pull ${IMAGE} | |
| docker run --rm \ | |
| --gpus all \ | |
| -e DISABLE_AUTOTUNE=1 \ | |
| -v ${{ github.workspace }}/tests:/workspace/tilegym/tests \ | |
| -v ${{ github.workspace }}/test-results:/test-results \ | |
| -w /workspace/tilegym \ | |
| ${IMAGE} \ | |
| bash -c "pip install --no-cache-dir pytest-xdist pytest-html && \ | |
| pytest -s tests/ops tests/suites -v -k test_op \ | |
| -n 12 \ | |
| --junitxml=/test-results/ops-results.xml \ | |
| --html=/test-results/ops-report.html \ | |
| --self-contained-html" | |
| - name: Upload test results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ops-test-results | |
| path: test-results/ops-* | |
| retention-days: 30 | |
| - name: Publish test results | |
| uses: EnricoMi/publish-unit-test-result-action@v2 | |
| if: always() | |
| with: | |
| files: test-results/ops-results.xml | |
| check_name: Ops Test Results | |
| comment_mode: off | |
| test-benchmark: | |
| name: test-benchmark | |
| needs: [config, build] | |
| timeout-minutes: 40 | |
| if: | | |
| always() && | |
| needs.config.outputs.run_benchmark == 'true' && | |
| (needs.build.result == 'success' || needs.build.result == 'skipped') | |
| runs-on: linux-amd64-gpu-rtxpro6000-latest-1 | |
| steps: | |
| - name: Checkout code (sparse - need scripts and benchmarks) | |
| uses: actions/checkout@v4 | |
| with: | |
| sparse-checkout: | | |
| .github/scripts | |
| tests/benchmark | |
| sparse-checkout-cone-mode: false | |
| - name: Create test results directory | |
| run: mkdir -p ${{ github.workspace }}/test-results | |
| # Download previous baseline for regression detection | |
| # Uses GitHub CLI instead of dawidd6 action (more reliable, no third-party dependencies) | |
| # This runs for ALL builds (PRs and nightly) to catch regressions early | |
| - name: Download baseline benchmark results | |
| timeout-minutes: 5 | |
| continue-on-error: true | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| echo "Attempting to download baseline benchmark results..." | |
| # Find the most recent successful workflow run on main with baseline artifact | |
| RUN_ID=$(gh api \ | |
| -H "Accept: application/vnd.github+json" \ | |
| -H "X-GitHub-Api-Version: 2022-11-28" \ | |
| "/repos/${{ github.repository }}/actions/workflows/tilegym-ci.yml/runs?branch=main&status=success&per_page=10" \ | |
| --jq '.workflow_runs[].id' | head -1) | |
| if [ -z "$RUN_ID" ]; then | |
| echo "⚠️ No successful workflow runs found on main branch" | |
| exit 0 | |
| fi | |
| echo "Found workflow run: $RUN_ID" | |
| # Download baseline artifact from that run | |
| mkdir -p ${{ github.workspace }}/baseline-results | |
| if gh run download "$RUN_ID" \ | |
| --name benchmark-baseline \ | |
| --dir ${{ github.workspace }}/baseline-results 2>/dev/null; then | |
| echo "✅ Downloaded baseline benchmark results" | |
| ls -lh ${{ github.workspace }}/baseline-results | |
| else | |
| echo "⚠️ No baseline artifact found (this may be the first run)" | |
| fi | |
| - name: Login to GHCR | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Pull and run benchmarks | |
| timeout-minutes: 35 | |
| run: | | |
| OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]') | |
| IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}:${{ needs.config.outputs.image_tag }}" | |
| docker pull ${IMAGE} | |
| docker run --rm \ | |
| --gpus all \ | |
| -v ${{ github.workspace }}/tests/benchmark:/workspace/tilegym/tests/benchmark \ | |
| -v ${{ github.workspace }}/test-results:/test-results \ | |
| -w /workspace/tilegym/tests/benchmark \ | |
| ${IMAGE} \ | |
| ./run_all.sh /test-results --json | |
| # Compare current results against baseline with three zones: | |
| # - Regression zone (< -5%): Build fails | |
| # - Neutral zone (-5% to +5%): Build passes, baseline NOT updated | |
| # - Improvement zone (> +5%): Build passes, baseline updated | |
| # Runs on ALL builds (PRs and nightly) to catch regressions early | |
| # But only nightly builds can update the baseline | |
| # Outputs: has_baseline, passed, should_update_baseline | |
| - name: Check for performance regressions | |
| id: regression_check | |
| continue-on-error: false | |
| run: | | |
| if [ -d "${{ github.workspace }}/baseline-results" ] && [ "$(ls -A ${{ github.workspace }}/baseline-results/*.json 2>/dev/null)" ]; then | |
| echo "Baseline results found, checking for regressions..." | |
| echo "has_baseline=true" >> $GITHUB_OUTPUT | |
| if python3 .github/scripts/check_benchmark_regression.py \ | |
| --current test-results \ | |
| --baseline baseline-results \ | |
| --threshold 5.0 \ | |
| --improvement-threshold 5.0 \ | |
| --output test-results/regression_report.json \ | |
| --fail-on-regression; then | |
| echo "✅ No regressions detected" | |
| echo "passed=true" >> $GITHUB_OUTPUT | |
| # Check if we should update baseline (only if significant improvements) | |
| SHOULD_UPDATE=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['should_update_baseline'])" 2>/dev/null || echo "false") | |
| echo "should_update_baseline=${SHOULD_UPDATE}" >> $GITHUB_OUTPUT | |
| if [ "$SHOULD_UPDATE" == "True" ]; then | |
| echo "🎉 Significant improvements detected - will update baseline (nightly only)" | |
| else | |
| echo "🟡 Performance within neutral zone - baseline will not be updated" | |
| fi | |
| else | |
| echo "❌ Performance regressions detected!" | |
| echo "passed=false" >> $GITHUB_OUTPUT | |
| echo "should_update_baseline=false" >> $GITHUB_OUTPUT | |
| # Temporary disable build failure due to inconsistent benchmark results | |
| # exit 1 | |
| fi | |
| else | |
| echo "No baseline results found - this will become the first baseline (nightly only)" | |
| echo "has_baseline=false" >> $GITHUB_OUTPUT | |
| echo "passed=true" >> $GITHUB_OUTPUT | |
| echo "should_update_baseline=true" >> $GITHUB_OUTPUT | |
| fi | |
| # Note: PR builds check for regressions but cannot update the baseline | |
| if [ "${{ needs.config.outputs.is_pr }}" == "true" ]; then | |
| echo "" | |
| echo "ℹ️ This is a PR build - regression check performed but baseline will not be updated" | |
| echo " Baseline updates only happen on nightly builds after merge to main" | |
| fi | |
| - name: Debug - List test results directory | |
| if: always() | |
| run: | | |
| echo "Contents of test-results directory:" | |
| ls -lah ${{ github.workspace }}/test-results/ || echo "Directory does not exist" | |
| echo "" | |
| echo "JSON files:" | |
| ls -lh ${{ github.workspace }}/test-results/*.json 2>/dev/null || echo "No JSON files found" | |
| - name: Format benchmark summary | |
| if: always() | |
| run: python3 .github/scripts/format_benchmark_summary.py test-results | |
| - name: Upload benchmark results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results | |
| path: test-results/*.json | |
| retention-days: 30 | |
| # SELECTIVE BASELINE UPDATE STRATEGY: | |
| # Instead of all-or-nothing, we update per-benchmark: | |
| # - Benchmarks that improved/stayed neutral → update to new baseline | |
| # - Benchmarks that regressed → keep old baseline (forces fix) | |
| # - Build still FAILS if any regression exists | |
| # This preserves progress on non-regressing benchmarks while catching issues | |
| - name: Merge baseline selectively (nightly only) | |
| if: | | |
| needs.config.outputs.is_pr == 'false' && | |
| steps.regression_check.outputs.has_baseline == 'true' | |
| run: | | |
| mkdir -p ${{ github.workspace }}/merged-baseline | |
| python3 .github/scripts/merge_baseline_selective.py \ | |
| --old-baseline baseline-results \ | |
| --new-results test-results \ | |
| --regression-report test-results/regression_report.json \ | |
| --output merged-baseline | |
| - name: Update baseline (nightly only - selective or full) | |
| if: needs.config.outputs.is_pr == 'false' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-baseline | |
| # Use merged baseline if it exists (partial update), otherwise use all new results (first run or all improved) | |
| path: ${{ steps.regression_check.outputs.has_baseline == 'true' && 'merged-baseline/*.json' || 'test-results/*.json' }} | |
| retention-days: 90 | |
| - name: Log baseline decision | |
| if: always() | |
| run: | | |
| if [ "${{ needs.config.outputs.is_pr }}" == "true" ]; then | |
| echo "📊 PR Build - Regression check completed" | |
| if [ "${{ steps.regression_check.outputs.passed }}" == "true" ]; then | |
| echo "✅ Performance check passed" | |
| else | |
| echo "❌ Performance regressions detected - fix before merging" | |
| fi | |
| echo "ℹ️ Note: Baseline will be updated after merge (on nightly build)" | |
| else | |
| # Nightly build - check if we have regression report for details | |
| if [ -f "test-results/regression_report.json" ]; then | |
| TOTAL_FILES=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['total_benchmark_files'])" 2>/dev/null || echo "0") | |
| FILES_WITH_REGRESSIONS=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['files_with_regressions'])" 2>/dev/null || echo "0") | |
| FILES_SAFE_TO_UPDATE=$(python3 -c "import json; print(json.load(open('test-results/regression_report.json'))['summary']['files_safe_to_update'])" 2>/dev/null || echo "0") | |
| echo "📊 Baseline Update Summary:" | |
| echo " Total benchmark files: $TOTAL_FILES" | |
| echo " Files with regressions: $FILES_WITH_REGRESSIONS" | |
| echo " Files updated: $FILES_SAFE_TO_UPDATE" | |
| echo "" | |
| if [ "$FILES_WITH_REGRESSIONS" -gt 0 ]; then | |
| echo "⚠️ SELECTIVE UPDATE: Some benchmarks regressed, keeping old baseline for those" | |
| echo " ✅ Updated baseline for $FILES_SAFE_TO_UPDATE non-regressing benchmarks" | |
| echo " ❌ Kept old baseline for $FILES_WITH_REGRESSIONS regressing benchmarks" | |
| echo " 🚨 Build FAILED - regressions must be fixed" | |
| elif [ "${{ steps.regression_check.outputs.has_baseline }}" == "false" ]; then | |
| echo "✅ FIRST RUN: Created initial baseline with all $TOTAL_FILES benchmarks" | |
| elif [ "${{ steps.regression_check.outputs.should_update_baseline }}" == "True" ]; then | |
| echo "✅ FULL UPDATE: All benchmarks improved, updated entire baseline" | |
| else | |
| echo "🟡 NO UPDATE: All benchmarks within neutral zone (±5%)" | |
| fi | |
| fi | |
| fi | |
| publish-wheel: | |
| name: publish-verified-wheel | |
| needs: [config, build-wheel, sanity-check, test-ops, test-benchmark] | |
| if: | | |
| always() && | |
| needs.build-wheel.result == 'success' && | |
| needs.sanity-check.result == 'success' && | |
| needs.test-ops.result == 'success' && | |
| needs.test-benchmark.result == 'success' | |
| # Note: Only marks the py310-x86_64 wheel as "verified" because that's the wheel | |
| # actually tested in Docker. Other wheels (py311, py312, arm64) are available but unverified. | |
| uses: ./.github/workflows/publish-wheel.yml | |
| with: | |
| artifact-name: ${{ needs.build-wheel.outputs.artifact-name }} | |
| python-versions: '["3.10"]' # Only verify the wheel actually tested in Docker | |
| architectures: '["x86_64"]' # Only verify the x86_64 wheel used in Docker tests | |
| retention-days: 30 # Verified wheel kept longer than initial builds (7 days) | |
| publish-to-pypi: false # Set to true and add PYPI_TOKEN secret when ready | |
| promote-to-latest: | |
| name: promote-to-latest | |
| needs: [config, build, sanity-check, test-ops, test-benchmark] | |
| if: | | |
| always() && | |
| needs.config.outputs.is_pr == 'false' && | |
| needs.build.result == 'success' && | |
| needs.sanity-check.result == 'success' && | |
| needs.test-ops.result == 'success' && | |
| needs.test-benchmark.result == 'success' | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Login to GHCR | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Promote SHA to latest and mark as verified | |
| run: | | |
| OWNER_LOWER=$(echo '${{ github.repository_owner }}' | tr '[:upper:]' '[:lower:]') | |
| IMAGE="ghcr.io/${OWNER_LOWER}/${{ needs.config.outputs.image_name }}" | |
| SHA="${{ github.sha }}" | |
| echo "Promoting ${IMAGE}:${SHA} to latest and adding verified tags (tests passed)" | |
| docker buildx imagetools create \ | |
| -t ${IMAGE}:latest \ | |
| -t ${IMAGE}:${SHA}-verified \ | |
| ${IMAGE}:${SHA} |