Skip to content

Third party benchmarks #261

Third party benchmarks

Third party benchmarks #261

name: Third party benchmarks
on:
workflow_dispatch:
inputs:
runner_label:
description: Runner label, keep empty for default
type: string
default: ""
tag:
description: Tag for benchmark results
type: string
default: "test"
benchmarks:
description: JSON list of benchmarks to run. Leave empty to run all benchmarks.
type: string
default: ""
use_pyenv_python:
description: Use Python built with pyenv
type: boolean
default: false
schedule:
# About midnight PST (UTC-8)
- cron: "5 10 * * *"
permissions: read-all
env:
PYTHON_VERSION: "3.10"
TAG: ${{ inputs.tag || (github.event_name == 'pull_request' && format('pr-{0}', github.event.number)) || (github.event_name == 'schedule' && 'ci') || 'test' }}
HF_TOKEN: ${{ secrets.HF_TOKEN || '' }}
jobs:
build:
name: Third party benchmarks
runs-on:
- linux
- ${{ inputs.runner_label || 'max1550' }}
timeout-minutes: 720
defaults:
run:
shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}"
steps:
- name: Print inputs
run: |
cat <<EOF
${{ toJSON(inputs) }}
EOF
- name: Checkout repository
uses: actions/checkout@v5
- name: Install Python
if: ${{ !(inputs.use_pyenv_python || false) }}
uses: actions/setup-python@v6
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Python (from pyenv) ${{ inputs.python_version }}
if: ${{ inputs.use_pyenv_python }}
uses: ./.github/actions/setup-pyenv-python
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Identify Python version
run: |
PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info[0]}.{ sys.version_info[1]}")')"
echo "PYTHON_VERSION=$PYTHON_VERSION" | tee -a $GITHUB_ENV
- name: Install Python build dependencies
run: |
pip install wheel cmake
- name: Setup PyTorch
uses: ./.github/actions/setup-pytorch
- name: Setup Triton
uses: ./.github/actions/setup-triton
- name: Install benchmark dependencies
id: install
run: |
pip install transformers pandas pytest
- name: Create reports dir
run: |
mkdir reports
echo "REPORTS=$PWD/reports" >> $GITHUB_ENV
- name: Install benchmarks
id: install-benchmarks
run: |
cd benchmarks
pip install .
- name: Run vllm benchmarks bf16
if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }}
run: |
source ./scripts/capture-hw-details.sh
./scripts/test-triton.sh --install-vllm --skip-pip-install --skip-pytorch-install
cp -r vllm/tests benchmarks/third_party/vllm/tests
cd benchmarks/third_party/vllm
python batched_moe_benchmark.py --reports $REPORTS
python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-report.csv --tag $TAG --benchmark moe-bf16-benchmark
- name: Run vllm benchmarks fp8
if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }}
run: |
source ./scripts/capture-hw-details.sh
cd benchmarks/third_party/vllm
FP8="1" python batched_moe_benchmark.py --reports $REPORTS
python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-report.csv --tag $TAG --benchmark moe-fp8-benchmark
- name: Run Liger-Kernel benchmarks
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'liger-kernel')) }}
run: |
source ./scripts/capture-hw-details.sh
cd benchmarks/third_party/liger_kernels
git clone https://github.com/linkedin/Liger-Kernel
pip install -e Liger-Kernel
# To remember return code, but still copy results
RET_CODE=0
bash ./run_benchmarks.sh || RET_CODE=$?
cp Liger-Kernel/benchmark/data/all_benchmark_data.csv $REPORTS/liger-raw.csv
python transform.py $REPORTS/liger-raw.csv $REPORTS/liger-report.csv --tag $TAG
# Return the captured return code at the end
exit "$RET_CODE"
- name: Run e2e Llama 3.1 flex attention performance benchmark
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'llama3-1')) }}
run: |
source ./scripts/capture-hw-details.sh
git clone https://github.com/huggingface/transformers.git
cd transformers
git checkout $(<../benchmarks/third_party/e2e-flex_attention/transformers-commit.txt)
git apply ../benchmarks/third_party/e2e-flex_attention/transformers-patch-for-timing.diff
git submodule sync
git submodule update --init --recursive
pip install -v -e .
cd ../benchmarks/third_party/e2e-flex_attention
MODEL_NAME="meta-llama/Llama-3.1-8B"
MAX_NEW_TOKENS=128
INPUT_TOKENS=1024
BATCH_SIZE=1
python run_llm_inductor_greedy.py -m $MODEL_NAME --max-new-tokens $MAX_NEW_TOKENS --input-tokens $INPUT_TOKENS --num-warmup 2 --num-iter 7 --compile --profile | tee llm.compile.xpu.profile.log
echo "LLM profiling log is stored into $PWD/llm.compile.xpu.profile.log"
cp llm.compile.xpu.profile.log $REPORTS/llm.compile.xpu.profile.log
python transform_results.py $REPORTS/llm.compile.xpu.profile.log $REPORTS/llm-triton-report.csv \
--tag $TAG \
--model "$MODEL_NAME" \
--max-new-tokens $MAX_NEW_TOKENS \
--batch-size $BATCH_SIZE
- name: Upload benchmark reports
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: benchmark-reports
path: reports