Third party benchmarks #261
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Third party benchmarks | |
on: | |
workflow_dispatch: | |
inputs: | |
runner_label: | |
description: Runner label, keep empty for default | |
type: string | |
default: "" | |
tag: | |
description: Tag for benchmark results | |
type: string | |
default: "test" | |
benchmarks: | |
description: JSON list of benchmarks to run. Leave empty to run all benchmarks. | |
type: string | |
default: "" | |
use_pyenv_python: | |
description: Use Python built with pyenv | |
type: boolean | |
default: false | |
schedule: | |
# About midnight PST (UTC-8) | |
- cron: "5 10 * * *" | |
permissions: read-all | |
env: | |
PYTHON_VERSION: "3.10" | |
TAG: ${{ inputs.tag || (github.event_name == 'pull_request' && format('pr-{0}', github.event.number)) || (github.event_name == 'schedule' && 'ci') || 'test' }} | |
HF_TOKEN: ${{ secrets.HF_TOKEN || '' }} | |
jobs: | |
build: | |
name: Third party benchmarks | |
runs-on: | |
- linux | |
- ${{ inputs.runner_label || 'max1550' }} | |
timeout-minutes: 720 | |
defaults: | |
run: | |
shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}" | |
steps: | |
- name: Print inputs | |
run: | | |
cat <<EOF | |
${{ toJSON(inputs) }} | |
EOF | |
- name: Checkout repository | |
uses: actions/checkout@v5 | |
- name: Install Python | |
if: ${{ !(inputs.use_pyenv_python || false) }} | |
uses: actions/setup-python@v6 | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Install Python (from pyenv) ${{ inputs.python_version }} | |
if: ${{ inputs.use_pyenv_python }} | |
uses: ./.github/actions/setup-pyenv-python | |
with: | |
python-version: ${{ env.PYTHON_VERSION }} | |
- name: Identify Python version | |
run: | | |
PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info[0]}.{ sys.version_info[1]}")')" | |
echo "PYTHON_VERSION=$PYTHON_VERSION" | tee -a $GITHUB_ENV | |
- name: Install Python build dependencies | |
run: | | |
pip install wheel cmake | |
- name: Setup PyTorch | |
uses: ./.github/actions/setup-pytorch | |
- name: Setup Triton | |
uses: ./.github/actions/setup-triton | |
- name: Install benchmark dependencies | |
id: install | |
run: | | |
pip install transformers pandas pytest | |
- name: Create reports dir | |
run: | | |
mkdir reports | |
echo "REPORTS=$PWD/reports" >> $GITHUB_ENV | |
- name: Install benchmarks | |
id: install-benchmarks | |
run: | | |
cd benchmarks | |
pip install . | |
- name: Run vllm benchmarks bf16 | |
if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }} | |
run: | | |
source ./scripts/capture-hw-details.sh | |
./scripts/test-triton.sh --install-vllm --skip-pip-install --skip-pytorch-install | |
cp -r vllm/tests benchmarks/third_party/vllm/tests | |
cd benchmarks/third_party/vllm | |
python batched_moe_benchmark.py --reports $REPORTS | |
python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-report.csv --tag $TAG --benchmark moe-bf16-benchmark | |
- name: Run vllm benchmarks fp8 | |
if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'vllm')) }} | |
run: | | |
source ./scripts/capture-hw-details.sh | |
cd benchmarks/third_party/vllm | |
FP8="1" python batched_moe_benchmark.py --reports $REPORTS | |
python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-report.csv --tag $TAG --benchmark moe-fp8-benchmark | |
- name: Run Liger-Kernel benchmarks | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'liger-kernel')) }} | |
run: | | |
source ./scripts/capture-hw-details.sh | |
cd benchmarks/third_party/liger_kernels | |
git clone https://github.com/linkedin/Liger-Kernel | |
pip install -e Liger-Kernel | |
# To remember return code, but still copy results | |
RET_CODE=0 | |
bash ./run_benchmarks.sh || RET_CODE=$? | |
cp Liger-Kernel/benchmark/data/all_benchmark_data.csv $REPORTS/liger-raw.csv | |
python transform.py $REPORTS/liger-raw.csv $REPORTS/liger-report.csv --tag $TAG | |
# Return the captured return code at the end | |
exit "$RET_CODE" | |
- name: Run e2e Llama 3.1 flex attention performance benchmark | |
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'llama3-1')) }} | |
run: | | |
source ./scripts/capture-hw-details.sh | |
git clone https://github.com/huggingface/transformers.git | |
cd transformers | |
git checkout $(<../benchmarks/third_party/e2e-flex_attention/transformers-commit.txt) | |
git apply ../benchmarks/third_party/e2e-flex_attention/transformers-patch-for-timing.diff | |
git submodule sync | |
git submodule update --init --recursive | |
pip install -v -e . | |
cd ../benchmarks/third_party/e2e-flex_attention | |
MODEL_NAME="meta-llama/Llama-3.1-8B" | |
MAX_NEW_TOKENS=128 | |
INPUT_TOKENS=1024 | |
BATCH_SIZE=1 | |
python run_llm_inductor_greedy.py -m $MODEL_NAME --max-new-tokens $MAX_NEW_TOKENS --input-tokens $INPUT_TOKENS --num-warmup 2 --num-iter 7 --compile --profile | tee llm.compile.xpu.profile.log | |
echo "LLM profiling log is stored into $PWD/llm.compile.xpu.profile.log" | |
cp llm.compile.xpu.profile.log $REPORTS/llm.compile.xpu.profile.log | |
python transform_results.py $REPORTS/llm.compile.xpu.profile.log $REPORTS/llm-triton-report.csv \ | |
--tag $TAG \ | |
--model "$MODEL_NAME" \ | |
--max-new-tokens $MAX_NEW_TOKENS \ | |
--batch-size $BATCH_SIZE | |
- name: Upload benchmark reports | |
if: ${{ steps.install.outcome == 'success' && !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: benchmark-reports | |
path: reports |