Skip to content

Commit 7f02627

Browse files
Merge pull request #6 from huggingface/fix-onnx-openvino-for-cpu
fix onnx + openvino for cpu
2 parents a3561fd + 6f3d74f commit 7f02627

30 files changed

+451
-124
lines changed

.github/workflows/update_llm_perf_cpu_pytorch.yaml renamed to .github/workflows/benchmark_cpu_onnxruntime.yaml

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
name: Update LLM Perf Benchmarks - Intel PyTorch
1+
name: Benchmark CPU Onnxruntime
22

33
on:
44
workflow_dispatch:
55
schedule:
66
- cron: "0 0 * * *"
77
push:
88
branches:
9-
- main
9+
- '*'
1010
pull_request:
1111

1212
concurrency:
@@ -18,6 +18,18 @@ env:
1818

1919
jobs:
2020
run_benchmarks:
21+
if: >-
22+
(github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_onnxruntime')) ||
23+
(github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
24+
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
25+
github.event_name == 'workflow_dispatch' ||
26+
(github.event_name == 'pull_request' && (
27+
contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
28+
contains(github.event.pull_request.labels.*.name, 'cpu') ||
29+
contains(github.event.pull_request.labels.*.name, 'onnxruntime') ||
30+
contains(github.event.pull_request.labels.*.name, 'cpu_onnxruntime')
31+
))
32+
2133
strategy:
2234
fail-fast: false
2335
matrix:
@@ -37,6 +49,8 @@ jobs:
3749
SUBSET: ${{ matrix.subset }}
3850
MACHINE: ${{ matrix.machine.name }}
3951
HF_TOKEN: ${{ secrets.HF_TOKEN }}
52+
DISABLE_WARNINGS: 1
53+
BENCHMARK_TOP_N: 3
4054
with:
4155
image: ${{ env.IMAGE }}
4256
options: |
@@ -47,11 +61,10 @@ jobs:
4761
--env HF_TOKEN
4862
--env MKL_THREADING_LAYER=GNU
4963
--env HF_HUB_ENABLE_HF_TRANSFER=1
64+
--env DISABLE_WARNINGS
65+
--env BENCHMARK_TOP_N
5066
--volume ${{ github.workspace }}:/workspace
5167
--workdir /workspace
5268
run: |
53-
pip install packaging && pip install einops scipy optimum codecarbon
54-
pip install -U transformers huggingface_hub[hf_transfer]
55-
pip install git+https://github.com/huggingface/optimum-benchmark.git
56-
pip install -e .
57-
python src/benchmark_runners/cpu/update_llm_perf_cpu_pytorch.py
69+
pip install -e .[onnxruntime]
70+
llm-perf run-benchmark --hardware cpu --backend onnxruntime

.github/workflows/update_llm_perf_cpu_openvino.yaml renamed to .github/workflows/benchmark_cpu_openvino.yaml

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
name: Update LLM Perf Benchmarks - Intel OpenVINO
1+
name: Benchmark CPU OpenVINO
22

33
on:
44
workflow_dispatch:
55
schedule:
66
- cron: "0 0 * * *"
77
push:
88
branches:
9-
- main
9+
- '*'
1010
pull_request:
1111

1212
concurrency:
@@ -18,6 +18,18 @@ env:
1818

1919
jobs:
2020
run_benchmarks:
21+
if: >-
22+
(github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_openvino')) ||
23+
(github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
24+
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
25+
github.event_name == 'workflow_dispatch' ||
26+
(github.event_name == 'pull_request' && (
27+
contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
28+
contains(github.event.pull_request.labels.*.name, 'cpu') ||
29+
contains(github.event.pull_request.labels.*.name, 'openvino') ||
30+
contains(github.event.pull_request.labels.*.name, 'cpu_openvino')
31+
))
32+
2133
strategy:
2234
fail-fast: false
2335
matrix:
@@ -37,6 +49,8 @@ jobs:
3749
SUBSET: ${{ matrix.subset }}
3850
MACHINE: ${{ matrix.machine.name }}
3951
HF_TOKEN: ${{ secrets.HF_TOKEN }}
52+
DISABLE_WARNINGS: 1
53+
BENCHMARK_TOP_N: 3
4054
with:
4155
image: ${{ env.IMAGE }}
4256
options: |
@@ -47,11 +61,10 @@ jobs:
4761
--env HF_TOKEN
4862
--env MKL_THREADING_LAYER=GNU
4963
--env HF_HUB_ENABLE_HF_TRANSFER=1
64+
--env DISABLE_WARNINGS
65+
--env BENCHMARK_TOP_N
5066
--volume ${{ github.workspace }}:/workspace
5167
--workdir /workspace
5268
run: |
53-
pip install packaging && pip install einops scipy optimum codecarbon
54-
pip install -U transformers huggingface_hub[hf_transfer]
55-
pip install -e git+https://github.com/huggingface/optimum-benchmark.git#egg=optimum-benchmark[openvino]
56-
pip install -e .
57-
python src/benchmark_runners/cpu/update_llm_perf_cpu_openvino.py
69+
pip install -e .[openvino]
70+
llm-perf run-benchmark --hardware cpu --backend openvino

.github/workflows/update_llm_perf_cpu_onnxruntime.yaml renamed to .github/workflows/benchmark_cpu_pytorch.yaml

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
name: Update LLM Perf Benchmarks - Intel Onnxruntime
1+
name: Benchmark CPU PyTorch
22

33
on:
44
workflow_dispatch:
55
schedule:
66
- cron: "0 0 * * *"
77
push:
88
branches:
9-
- main
9+
- '*'
1010
pull_request:
1111

1212
concurrency:
@@ -18,6 +18,18 @@ env:
1818

1919
jobs:
2020
run_benchmarks:
21+
if: >-
22+
(github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_pytorch')) ||
23+
(github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
24+
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
25+
github.event_name == 'workflow_dispatch' ||
26+
(github.event_name == 'pull_request' && (
27+
contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
28+
contains(github.event.pull_request.labels.*.name, 'cpu') ||
29+
contains(github.event.pull_request.labels.*.name, 'pytorch') ||
30+
contains(github.event.pull_request.labels.*.name, 'cpu_pytorch')
31+
))
32+
2133
strategy:
2234
fail-fast: false
2335
matrix:
@@ -37,6 +49,7 @@ jobs:
3749
SUBSET: ${{ matrix.subset }}
3850
MACHINE: ${{ matrix.machine.name }}
3951
HF_TOKEN: ${{ secrets.HF_TOKEN }}
52+
BENCHMARK_TOP_N: 3
4053
with:
4154
image: ${{ env.IMAGE }}
4255
options: |
@@ -47,13 +60,9 @@ jobs:
4760
--env HF_TOKEN
4861
--env MKL_THREADING_LAYER=GNU
4962
--env HF_HUB_ENABLE_HF_TRANSFER=1
63+
--env BENCHMARK_TOP_N
5064
--volume ${{ github.workspace }}:/workspace
5165
--workdir /workspace
5266
run: |
53-
pip install packaging && pip install einops scipy optimum codecarbon
54-
pip install -U transformers huggingface_hub[hf_transfer]
55-
pip install -e git+https://github.com/huggingface/optimum-benchmark.git#egg=optimum-benchmark[onnxruntime]
5667
pip install -e .
57-
python src/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py
58-
59-
68+
llm-perf run-benchmark --hardware cpu --backend pytorch

.github/workflows/update_llm_perf_cuda_pytorch.yaml renamed to .github/workflows/benchmark_cuda_pytorch.yaml

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
name: Update LLM Perf Benchmarks - CUDA PyTorch
1+
name: Benchmark CUDA PyTorch
22

33
on:
44
workflow_dispatch:
55
schedule:
66
- cron: "0 0 * * *"
77
push:
88
branches:
9-
- main
9+
- '*'
1010
pull_request:
1111

1212
concurrency:
@@ -18,6 +18,18 @@ env:
1818

1919
jobs:
2020
run_benchmarks:
21+
if: >-
22+
(github.event_name == 'push' && contains(github.event.head_commit.message, 'cuda_pytorch')) ||
23+
(github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
24+
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
25+
github.event_name == 'workflow_dispatch' ||
26+
(github.event_name == 'pull_request' && (
27+
contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
28+
contains(github.event.pull_request.labels.*.name, 'cuda') ||
29+
contains(github.event.pull_request.labels.*.name, 'pytorch') ||
30+
contains(github.event.pull_request.labels.*.name, 'cuda_pytorch')
31+
))
32+
2133
strategy:
2234
fail-fast: false
2335
matrix:
@@ -41,6 +53,7 @@ jobs:
4153
SUBSET: ${{ matrix.subset }}
4254
MACHINE: ${{ matrix.machine.name }}
4355
HF_TOKEN: ${{ secrets.HF_TOKEN }}
56+
BENCHMARK_TOP_N: 3
4457
with:
4558
image: ${{ env.IMAGE }}
4659
options: |
@@ -52,11 +65,9 @@ jobs:
5265
--env HF_TOKEN
5366
--env MKL_THREADING_LAYER=GNU
5467
--env HF_HUB_ENABLE_HF_TRANSFER=1
68+
--env BENCHMARK_TOP_N
5569
--volume ${{ github.workspace }}:/workspace
5670
--workdir /workspace
5771
run: |
58-
pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
59-
pip install -U transformers huggingface_hub[hf_transfer]
60-
pip install git+https://github.com/huggingface/optimum-benchmark.git
61-
pip install -e .
62-
python src/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py
72+
pip install -e .[cuda]
73+
llm-perf run-benchmark --hardware cuda --backend pytorch

.github/workflows/style.yaml

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@ jobs:
2020
- name: Checkout
2121
uses: actions/checkout@v4
2222

23-
- name: Install a specific version
24-
uses: eifinger/setup-rye@v4
23+
- name: Set up Python 3.10
24+
uses: actions/setup-python@v5
2525
with:
26-
version: 'latest'
26+
python-version: "3.10"
2727

28-
- name: Set up Python and dependencies
28+
- name: Install quality requirements
2929
run: |
30-
rye sync
30+
pip install --upgrade pip
31+
pip install ruff
3132
32-
- name: Check quality
33-
run: rye run quality
33+
- name: Check style
34+
run: make quality

.github/workflows/update_llm_perf_leaderboard.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ concurrency:
1515

1616
jobs:
1717
update_llm_perf_leaderboard:
18+
if: ${{
19+
(github.event_name == 'push') ||
20+
(github.event_name == 'workflow_dispatch') ||
21+
contains( github.event.pull_request.labels.*.name, 'leaderboard') ||
22+
contains( github.event.pull_request.labels.*.name, 'update_leaderboard')
23+
}}
24+
1825
runs-on: ubuntu-latest
1926
steps:
2027
- name: Checkout
@@ -39,4 +46,5 @@ jobs:
3946
HF_TOKEN: ${{ secrets.HF_TOKEN }}
4047
HF_HUB_ENABLE_HF_TRANSFER: 1
4148
run: |
42-
python src/update_llm_perf_leaderboard.py
49+
pip install -e .
50+
llm-perf update-leaderboard

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,5 @@ external_repos/
185185
outputs/
186186
.env
187187
wip/
188+
189+
*.csv

.python-version

Lines changed: 0 additions & 1 deletion
This file was deleted.

Makefile

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Style and Quality checks
2+
.PHONY: style quality
3+
4+
quality:
5+
ruff check .
6+
ruff format --check .
7+
8+
style:
9+
ruff format .
10+
ruff check --fix .
11+
12+
.PHONY: install
13+
14+
install:
15+
pip install .
16+
17+
install-dev:
18+
DEBUG=1 uv pip install -e .
19+
20+
# Running containers
21+
.PHONY: run_cpu_container run_cuda_container run_rocm_container
22+
23+
run_cpu_container:
24+
docker run -it --rm --pid host --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cpu
25+
26+
run_cuda_container:
27+
docker run -it --rm --pid host --gpus all --shm-size 64G --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cuda
28+
29+
run_rocm_container:
30+
docker run -it --rm --shm-size 64G --device /dev/kfd --device /dev/dri --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-rocm
31+
32+
cpu-pytorch-container:
33+
docker build -t cpu-pytorch -f docker/cpu-pytorch/Dockerfile .
34+
# docker run -it --rm --pid host cpu-pytorch /bin/bash
35+
docker run -it --rm --pid host cpu-pytorch

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,14 @@
22
The backend of [the LLM-perf leaderboard](https://huggingface.co/spaces/optimum/llm-perf-leaderboard)
33

44
## Why
5-
this runs all the benchmarks to get results for the leaderboard
5+
this runs all the benchmarks to get results for the leaderboard
6+
7+
## How to install
8+
git clone
9+
pip install -e .[openvino]
10+
11+
## How to use the cli
12+
llm-perf run-benchmark --hardware cpu --backend openvino
13+
llm-perf run-benchmark --hardware cpu --backend pytorch
14+
15+
https://huggingface.co/datasets/optimum-benchmark/llm-perf-leaderboard

docker/cpu-pytorch/Dockerfile

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
2+
3+
WORKDIR /workspace
4+
5+
COPY . .
6+
7+
RUN pip install -e .
8+
9+
CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "pytorch"]
File renamed without changes.

src/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py renamed to llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
from optimum_benchmark.launchers.process.config import ProcessConfig
77
from optimum_benchmark.scenarios.inference.config import InferenceConfig
88

9-
from src.common.benchmark_runner import LLMPerfBenchmarkManager
10-
from src.common.utils import (
9+
from llm_perf.common.benchmark_runner import LLMPerfBenchmarkManager
10+
from llm_perf.common.utils import (
1111
CANONICAL_PRETRAINED_OPEN_LLM_LIST,
1212
GENERATE_KWARGS,
1313
INPUT_SHAPES,
@@ -19,7 +19,9 @@ def __init__(self):
1919
super().__init__(backend="onnxruntime", device="cpu")
2020

2121
self.attention_configs = self._get_attention_configs()
22-
assert self.subset is not None, "SUBSET environment variable must be set for benchmarking"
22+
assert (
23+
self.subset is not None
24+
), "SUBSET environment variable must be set for benchmarking"
2325
self.weights_configs = self._get_weights_configs(self.subset)
2426

2527
def get_list_of_benchmarks_to_run(self) -> List[Dict[str, Any]]:

0 commit comments

Comments
 (0)