ROCm · micmelesse · Mar 9, 2026
diff --git a/.github/workflows/flash_attention_integration.yaml b/.github/workflows/flash_attention_integration.yaml
@@ -29,7 +29,6 @@ env:
   # TODO: Switch to Dao-AILab/flash-attention main
   FA_BRANCH: micmelesse/aiter_migration
   FA_REPOSITORY_URL: https://github.com/ROCm/flash-attention.git
-  GPU_ARCH: gfx950
   BASE_IMAGE: rocm/pytorch:latest@sha256:683765a52c61341e1674fe730ab3be861a444a45a36c0a8caae7653a08a0e208
   AITER_SUBMODULE_PATH: third_party/aiter
 
@@ -90,9 +89,17 @@ jobs:
   # =============================================================================
   flash_attention_triton:
     if: ${{ needs.prechecks.outputs.run_triton == 'true' }}
-    name: Flash Attention - Triton (1 GPU)
+    name: Flash Attention - Triton / ${{ matrix.label }} (1 GPU)
     needs: [check-signal, prechecks]
-    runs-on: linux-aiter-mi355-1
+    runs-on: ${{ matrix.runner }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runner: linux-aiter-mi355-1
+            label: MI355
+          - runner: aiter-gfx1100
+            label: RDNA3
 
     steps:
       - name: Checkout aiter repo
@@ -187,14 +194,14 @@ jobs:
             cd /flash-attention
             FLASH_ATTENTION_TRITON_AMD_ENABLE=TRUE \
             python benchmarks/benchmark_flash_attention.py
-          " |& tee benchmark_triton.log
+          " |& tee benchmark_triton_${{ matrix.label }}.log
 
       - name: Upload benchmark results
         if: success()
         uses: actions/upload-artifact@v4
         with:
-          name: flash-attention-triton-benchmark
-          path: benchmark_triton.log
+          name: flash-attention-triton-benchmark-${{ matrix.label }}
+          path: benchmark_triton_${{ matrix.label }}.log
 
       - name: Clean Up
         if: always()