Third party benchmarks #261

Workflow file for this run

.github/workflows/third-party-benchmarks.yml at 1f4105c

	name: Third party benchmarks

	on:
	workflow_dispatch:
	inputs:
	runner_label:
	description: Runner label, keep empty for default
	type: string
	default: ""
	tag:
	description: Tag for benchmark results
	type: string
	default: "test"
	benchmarks:
	description: JSON list of benchmarks to run. Leave empty to run all benchmarks.
	type: string
	default: ""
	use_pyenv_python:
	description: Use Python built with pyenv
	type: boolean
	default: false
	schedule:
	# About midnight PST (UTC-8)
	- cron: "5 10 * * *"

	permissions: read-all

	env:
	PYTHON_VERSION: "3.10"
	TAG: ${{ inputs.tag \|\| (github.event_name == 'pull_request' && format('pr-{0}', github.event.number)) \|\| (github.event_name == 'schedule' && 'ci') \|\| 'test' }}
	HF_TOKEN: ${{ secrets.HF_TOKEN \|\| '' }}

	jobs:
	build:
	name: Third party benchmarks
	runs-on:
	- linux
	- ${{ inputs.runner_label \|\| 'max1550' }}
	timeout-minutes: 720
	defaults:
	run:
	shell: bash -noprofile --norc -eo pipefail -c "source /opt/intel/oneapi/setvars.sh > /dev/null; source {0}"
	steps:
	- name: Print inputs
	run: \|
	cat <<EOF
	${{ toJSON(inputs) }}
	EOF

	- name: Checkout repository
	uses: actions/checkout@v5

	- name: Install Python
	if: ${{ !(inputs.use_pyenv_python \|\| false) }}
	uses: actions/setup-python@v6
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Install Python (from pyenv) ${{ inputs.python_version }}
	if: ${{ inputs.use_pyenv_python }}
	uses: ./.github/actions/setup-pyenv-python
	with:
	python-version: ${{ env.PYTHON_VERSION }}

	- name: Identify Python version
	run: \|
	PYTHON_VERSION="$(python -c 'import sys; print(f"{sys.version_info[0]}.{ sys.version_info[1]}")')"
	echo "PYTHON_VERSION=$PYTHON_VERSION" \| tee -a $GITHUB_ENV

	- name: Install Python build dependencies
	run: \|
	pip install wheel cmake

	- name: Setup PyTorch
	uses: ./.github/actions/setup-pytorch

	- name: Setup Triton
	uses: ./.github/actions/setup-triton

	- name: Install benchmark dependencies
	id: install
	run: \|
	pip install transformers pandas pytest

	- name: Create reports dir
	run: \|
	mkdir reports
	echo "REPORTS=$PWD/reports" >> $GITHUB_ENV

	- name: Install benchmarks
	id: install-benchmarks
	run: \|
	cd benchmarks
	pip install .

	- name: Run vllm benchmarks bf16
	if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' \|\| contains(fromJson(inputs.benchmarks \|\| '[]'), 'vllm')) }}
	run: \|
	source ./scripts/capture-hw-details.sh

	./scripts/test-triton.sh --install-vllm --skip-pip-install --skip-pytorch-install
	cp -r vllm/tests benchmarks/third_party/vllm/tests

	cd benchmarks/third_party/vllm
	python batched_moe_benchmark.py --reports $REPORTS
	python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-report.csv --tag $TAG --benchmark moe-bf16-benchmark

	- name: Run vllm benchmarks fp8
	if: ${{ steps.install-benchmarks.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' \|\| contains(fromJson(inputs.benchmarks \|\| '[]'), 'vllm')) }}
	run: \|
	source ./scripts/capture-hw-details.sh

	cd benchmarks/third_party/vllm
	FP8="1" python batched_moe_benchmark.py --reports $REPORTS
	python transform_results.py $REPORTS/moe-gemm-performance.csv $REPORTS/moe-gemm-report.csv --tag $TAG --benchmark moe-fp8-benchmark

	- name: Run Liger-Kernel benchmarks
	if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' \|\| contains(fromJson(inputs.benchmarks \|\| '[]'), 'liger-kernel')) }}
	run: \|
	source ./scripts/capture-hw-details.sh

	cd benchmarks/third_party/liger_kernels

	git clone https://github.com/linkedin/Liger-Kernel
	pip install -e Liger-Kernel

	# To remember return code, but still copy results
	RET_CODE=0
	bash ./run_benchmarks.sh \|\| RET_CODE=$?

	cp Liger-Kernel/benchmark/data/all_benchmark_data.csv $REPORTS/liger-raw.csv
	python transform.py $REPORTS/liger-raw.csv $REPORTS/liger-report.csv --tag $TAG

	# Return the captured return code at the end
	exit "$RET_CODE"

	- name: Run e2e Llama 3.1 flex attention performance benchmark
	if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' \|\| contains(fromJson(inputs.benchmarks \|\| '[]'), 'llama3-1')) }}
	run: \|
	source ./scripts/capture-hw-details.sh

	git clone https://github.com/huggingface/transformers.git

	cd transformers

	git checkout $(<../benchmarks/third_party/e2e-flex_attention/transformers-commit.txt)
	git apply ../benchmarks/third_party/e2e-flex_attention/transformers-patch-for-timing.diff

	git submodule sync
	git submodule update --init --recursive
	pip install -v -e .

	cd ../benchmarks/third_party/e2e-flex_attention

	MODEL_NAME="meta-llama/Llama-3.1-8B"
	MAX_NEW_TOKENS=128
	INPUT_TOKENS=1024
	BATCH_SIZE=1

	python run_llm_inductor_greedy.py -m $MODEL_NAME --max-new-tokens $MAX_NEW_TOKENS --input-tokens $INPUT_TOKENS --num-warmup 2 --num-iter 7 --compile --profile \| tee llm.compile.xpu.profile.log

	echo "LLM profiling log is stored into $PWD/llm.compile.xpu.profile.log"

	cp llm.compile.xpu.profile.log $REPORTS/llm.compile.xpu.profile.log
	python transform_results.py $REPORTS/llm.compile.xpu.profile.log $REPORTS/llm-triton-report.csv \
	--tag $TAG \
	--model "$MODEL_NAME" \
	--max-new-tokens $MAX_NEW_TOKENS \
	--batch-size $BATCH_SIZE

	- name: Upload benchmark reports
	if: ${{ steps.install.outcome == 'success' && !cancelled() }}
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-reports
	path: reports

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Third party benchmarks #261

Workflow file

Third party benchmarks #261

Uh oh!

Jobs

Run details

Workflow file for this run