Skip to content

Commit 3f8fe9b

Browse files
authored
Merge branch 'ROCm:main' into main
2 parents 5f0e3b4 + 0e0a378 commit 3f8fe9b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+12376
-318
lines changed

.github/workflows/aiter-test.yaml

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -28,46 +28,51 @@ jobs:
2828
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2929
GITHUB_SHA: ${{ github.sha }}
3030

31-
define-runners:
32-
runs-on: ubuntu-latest
33-
needs: [check-signal]
34-
outputs:
35-
standard_runners: ${{ steps.machines.outputs.standard_runners }}
36-
multigpu_runners: ${{ steps.machines.outputs.multigpu_runners }}
31+
# define-runners:
32+
# runs-on: ubuntu-latest
33+
# needs: [check-signal]
34+
# outputs:
35+
# standard_runners: ${{ steps.machines.outputs.standard_runners }}
36+
# multigpu_runners: ${{ steps.machines.outputs.multigpu_runners }}
3737

38-
steps:
39-
- name: Define whether runs on MI35X
40-
env:
41-
PR_TITLE: ${{ github.event.pull_request.title }}
42-
id: machines
43-
run: |
44-
set -euo pipefail
45-
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
46-
echo "It's main branch, running tests on MI325 and MI35X..."
47-
echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
48-
echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
49-
#elif echo "${PR_TITLE}" | grep -qi "mi325"; then
50-
# echo "PR title contains 'MI325', running tests on MI325 and MI35X..."
51-
# echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
52-
# echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
53-
else
54-
# echo "Not main branch and PR title does not contain mi325, only running on MI35X..."
55-
echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
56-
echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
57-
fi
58-
echo "$GITHUB_OUTPUT"
59-
60-
- name: Show output variable
61-
run: |
62-
echo "Standard: ${{ steps.machines.outputs.standard_runners }}"
63-
echo "Multipe: ${{ steps.machines.outputs.multigpu_runners }}"
38+
# steps:
39+
# - name: Define whether runs on MI35X
40+
# env:
41+
# PR_TITLE: ${{ github.event.pull_request.title }}
42+
# id: machines
43+
# run: |
44+
# set -euo pipefail
45+
# if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
46+
# echo "It's main branch, running tests on MI325 and MI35X..."
47+
# echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
48+
# echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
49+
# #elif echo "${PR_TITLE}" | grep -qi "mi325"; then
50+
# # echo "PR title contains 'MI325', running tests on MI325 and MI35X..."
51+
# # echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
52+
# # echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
53+
# else
54+
# # echo "Not main branch and PR title does not contain mi325, only running on MI35X..."
55+
# echo 'standard_runners=["aiter-mi355-1gpu", "aiter-1gpu-runner"]' >> "$GITHUB_OUTPUT"
56+
# echo 'multigpu_runners=["aiter-mi355-8gpu", "aiter-8gpu-runner"]' >> "$GITHUB_OUTPUT"
57+
# fi
58+
# echo "$GITHUB_OUTPUT"
59+
#
60+
# - name: Show output variable
61+
# run: |
62+
# echo "Standard: ${{ steps.machines.outputs.standard_runners }}"
63+
# echo "Multigpu: ${{ steps.machines.outputs.multigpu_runners }}"
6464

6565
standard:
66-
needs: define-runners
66+
name: Standard Tests (1 GPU)
67+
needs: check-signal
6768
strategy:
6869
fail-fast: false
6970
matrix:
70-
runner: ${{ fromJSON(needs.define-runners.outputs.standard_runners) }}
71+
include:
72+
- runner: aiter-mi355-1gpu
73+
label: MI355
74+
- runner: aiter-1gpu-runner
75+
label: MI325
7176
runs-on: ${{ matrix.runner }}
7277

7378
steps:
@@ -160,13 +165,16 @@ jobs:
160165
./.github/scripts/clean_up_rocm.sh
161166
162167
multi-gpu:
163-
needs: define-runners
168+
name: Multi-GPU Tests (8 GPU)
169+
needs: check-signal
164170
# only run multi-gpu tests on main branch due to limited multi-gpu resources
165171
if: github.ref == 'refs/heads/main'
166172
strategy:
167173
fail-fast: false
168174
matrix:
169-
runner: ${{ fromJSON(needs.define-runners.outputs.multigpu_runners) }}
175+
include:
176+
- runner: aiter-mi355-8gpu
177+
- runner: aiter-8gpu-runner
170178
runs-on: ${{ matrix.runner }}
171179

172180
steps:

.github/workflows/sglang_downstream.yaml

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,21 @@ jobs:
2525
GITHUB_SHA: ${{ github.sha }}
2626

2727
sglang:
28-
name: sglang integration
28+
name: Sglang Integration Test (1 GPU)
2929
needs: [check-signal]
30-
runs-on: aiter-1gpu-runner
30+
runs-on: ${{ matrix.runner }}
31+
strategy:
32+
fail-fast: false
33+
matrix:
34+
include:
35+
- runner: aiter-1gpu-runner
36+
label: MI325
37+
3138
env:
32-
SGL_BRANCH: v0.5.6
39+
SGL_BRANCH: v0.5.8
3340
GPU_ARCH: gfx942
3441
GPU_ARCH_CI: mi300 # used in sglang ci scripts
35-
SGL_IMAGE: rocm/sgl-dev:v0.5.6-rocm700-mi30x-20251208
42+
SGL_IMAGE: rocm/sgl-dev:v0.5.8-rocm700-mi30x-20260127
3643
GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/aiter.git' }}
3744
GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }}
3845

@@ -115,12 +122,13 @@ jobs:
115122
- name: Evaluate Accuracy
116123
timeout-minutes: 120
117124
run: |
125+
set -ex
118126
cd sglang
119127
sed -i 's/ci_sglang/sglang_aiter_test/g' scripts/ci/amd_ci_exec.sh
120128
bash scripts/ci/amd_ci_exec.sh printenv | grep GPU_ARCH || true
121-
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py
122-
bash scripts/ci/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
123-
bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py
129+
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 ../registered/eval/test_eval_accuracy_large.py
130+
bash scripts/ci/amd_ci_exec.sh python3 ../registered/quant/test_eval_fp8_accuracy.py
131+
bash scripts/ci/amd_ci_exec.sh python3 ../registered/models/test_qwen_models.py
124132
125133
# TODO: Clean up because some dependencies are installed under root user which can't be removed by runner, these dependencies should be installed as a non-root user
126134
- name: Clean Up

.github/workflows/triton-test.yaml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,15 @@ jobs:
3131
GITHUB_SHA: ${{ github.sha }}
3232

3333
triton:
34-
runs-on: aiter-1gpu-runner
34+
name: Triton Tests (1 GPU)
35+
runs-on: ${{ matrix.runner }}
3536
needs: [check-signal]
37+
strategy:
38+
fail-fast: false
39+
matrix:
40+
include:
41+
- runner: aiter-1gpu-runner
42+
label: MI325
3643
env:
3744
DOCKER_IMAGE: "rocm/pytorch:latest"
3845
TRITON_TEST: "op_tests/triton_tests/"

.github/workflows/vllm_benchmark.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ jobs:
3232
GITHUB_SHA: ${{ github.sha }}
3333

3434
build_vllm_image:
35+
name: Build vLLM Image
3536
if: ${{ !github.event.pull_request.head.repo.fork }}
3637
needs: [check-signal]
3738
runs-on: aiter-k8s-build
@@ -95,9 +96,10 @@ jobs:
9596
echo "Successfully prepared image: rocm/aiter-ci:${{ env.GITHUB_COMMIT_SHA }}"
9697
9798
vllm_benchmark:
99+
name: vLLM Benchmark (8 GPU)
98100
if: ${{ !github.event.pull_request.head.repo.fork }}
99-
runs-on: aiter-8gpu-runner
100101
needs: build_vllm_image
102+
runs-on: aiter-8gpu-runner
101103
strategy:
102104
fail-fast: false
103105
matrix:

0 commit comments

Comments
 (0)