huggingface
diff --git a/‎.github/workflows/update_llm_perf_cpu_pytorch.yaml renamed to ‎.github/workflows/benchmark_cpu_onnxruntime.yaml
Lines changed: 20 additions & 7 deletions b/‎.github/workflows/update_llm_perf_cpu_pytorch.yaml renamed to ‎.github/workflows/benchmark_cpu_onnxruntime.yaml
Lines changed: 20 additions & 7 deletions
diff --git a/‎.github/workflows/update_llm_perf_cpu_openvino.yaml renamed to ‎.github/workflows/benchmark_cpu_openvino.yaml
Lines changed: 20 additions & 7 deletions b/‎.github/workflows/update_llm_perf_cpu_openvino.yaml renamed to ‎.github/workflows/benchmark_cpu_openvino.yaml
Lines changed: 20 additions & 7 deletions
diff --git a/‎.github/workflows/update_llm_perf_cpu_onnxruntime.yaml renamed to ‎.github/workflows/benchmark_cpu_pytorch.yaml
Lines changed: 17 additions & 8 deletions b/‎.github/workflows/update_llm_perf_cpu_onnxruntime.yaml renamed to ‎.github/workflows/benchmark_cpu_pytorch.yaml
Lines changed: 17 additions & 8 deletions
diff --git a/‎.github/workflows/update_llm_perf_cuda_pytorch.yaml renamed to ‎.github/workflows/benchmark_cuda_pytorch.yaml
Lines changed: 18 additions & 7 deletions b/‎.github/workflows/update_llm_perf_cuda_pytorch.yaml renamed to ‎.github/workflows/benchmark_cuda_pytorch.yaml
Lines changed: 18 additions & 7 deletions
diff --git a/‎.github/workflows/style.yaml
Lines changed: 8 additions & 7 deletions b/‎.github/workflows/style.yaml
Lines changed: 8 additions & 7 deletions
diff --git a/‎.github/workflows/update_llm_perf_leaderboard.yaml
Lines changed: 9 additions & 1 deletion b/‎.github/workflows/update_llm_perf_leaderboard.yaml
Lines changed: 9 additions & 1 deletion
diff --git a/‎.gitignore
Lines changed: 2 additions & 0 deletions b/‎.gitignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎.python-version
Lines changed: 0 additions & 1 deletion b/‎.python-version
Lines changed: 0 additions & 1 deletion
diff --git a/‎Makefile
Lines changed: 35 additions & 0 deletions b/‎Makefile
Lines changed: 35 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 11 additions & 1 deletion b/‎README.md
Lines changed: 11 additions & 1 deletion
diff --git a/‎docker/cpu-pytorch/Dockerfile
Lines changed: 9 additions & 0 deletions b/‎docker/cpu-pytorch/Dockerfile
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/__init__.py renamed to ‎llm_perf/__init__.py b/‎src/__init__.py renamed to ‎llm_perf/__init__.py
diff --git a/‎src/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py renamed to ‎llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py
Lines changed: 5 additions & 3 deletions b/‎src/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py renamed to ‎llm_perf/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py
Lines changed: 5 additions & 3 deletions
@@ -1,12 +1,12 @@
-name: Update LLM Perf Benchmarks - Intel PyTorch
+name: Benchmark CPU Onnxruntime
 
 on:
   workflow_dispatch:
   schedule:
     - cron: "0 0 * * *"
   push:
     branches:
-      - main
+      - '*'
   pull_request:
 
 concurrency:
@@ -18,6 +18,18 @@ env:
 
 jobs:
   run_benchmarks:
+    if: >-
+      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_onnxruntime')) ||
+      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'pull_request' && (
+        contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
+        contains(github.event.pull_request.labels.*.name, 'cpu') ||
+        contains(github.event.pull_request.labels.*.name, 'onnxruntime') ||
+        contains(github.event.pull_request.labels.*.name, 'cpu_onnxruntime')
+      ))
+
     strategy:
       fail-fast: false
       matrix:
@@ -37,6 +49,8 @@ jobs:
           SUBSET: ${{ matrix.subset }}
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          DISABLE_WARNINGS: 1
+          BENCHMARK_TOP_N: 3
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -47,11 +61,10 @@ jobs:
             --env HF_TOKEN
             --env MKL_THREADING_LAYER=GNU
             --env HF_HUB_ENABLE_HF_TRANSFER=1
+            --env DISABLE_WARNINGS
+            --env BENCHMARK_TOP_N
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |
-            pip install packaging && pip install einops scipy optimum codecarbon
-            pip install -U transformers huggingface_hub[hf_transfer]
-            pip install git+https://github.com/huggingface/optimum-benchmark.git
-            pip install -e .
-            python src/benchmark_runners/cpu/update_llm_perf_cpu_pytorch.py
+            pip install -e .[onnxruntime]
+            llm-perf run-benchmark --hardware cpu --backend onnxruntime
@@ -1,12 +1,12 @@
-name: Update LLM Perf Benchmarks - Intel OpenVINO
+name: Benchmark CPU OpenVINO
 
 on:
   workflow_dispatch:
   schedule:
     - cron: "0 0 * * *"
   push:
     branches:
-      - main
+      - '*'
   pull_request:
 
 concurrency:
@@ -18,6 +18,18 @@ env:
 
 jobs:
   run_benchmarks:
+    if: >-
+      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_openvino')) ||
+      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'pull_request' && (
+        contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
+        contains(github.event.pull_request.labels.*.name, 'cpu') ||
+        contains(github.event.pull_request.labels.*.name, 'openvino') ||
+        contains(github.event.pull_request.labels.*.name, 'cpu_openvino')
+      ))
+
     strategy:
       fail-fast: false
       matrix:
@@ -37,6 +49,8 @@ jobs:
           SUBSET: ${{ matrix.subset }}
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          DISABLE_WARNINGS: 1
+          BENCHMARK_TOP_N: 3
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -47,11 +61,10 @@ jobs:
             --env HF_TOKEN
             --env MKL_THREADING_LAYER=GNU
             --env HF_HUB_ENABLE_HF_TRANSFER=1
+            --env DISABLE_WARNINGS
+            --env BENCHMARK_TOP_N
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |
-            pip install packaging && pip install einops scipy optimum codecarbon
-            pip install -U transformers huggingface_hub[hf_transfer]
-            pip install -e git+https://github.com/huggingface/optimum-benchmark.git#egg=optimum-benchmark[openvino]
-            pip install -e .
-            python src/benchmark_runners/cpu/update_llm_perf_cpu_openvino.py
+            pip install -e .[openvino]
+            llm-perf run-benchmark --hardware cpu --backend openvino
@@ -1,12 +1,12 @@
-name: Update LLM Perf Benchmarks - Intel Onnxruntime
+name: Benchmark CPU PyTorch
 
 on:
   workflow_dispatch:
   schedule:
     - cron: "0 0 * * *"
   push:
     branches:
-      - main
+      - '*'
   pull_request:
 
 concurrency:
@@ -18,6 +18,18 @@ env:
 
 jobs:
   run_benchmarks:
+    if: >-
+      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_pytorch')) ||
+      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'pull_request' && (
+        contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
+        contains(github.event.pull_request.labels.*.name, 'cpu') ||
+        contains(github.event.pull_request.labels.*.name, 'pytorch') ||
+        contains(github.event.pull_request.labels.*.name, 'cpu_pytorch')
+      ))
+
     strategy:
       fail-fast: false
       matrix:
@@ -37,6 +49,7 @@ jobs:
           SUBSET: ${{ matrix.subset }}
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          BENCHMARK_TOP_N: 3
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -47,13 +60,9 @@ jobs:
             --env HF_TOKEN
             --env MKL_THREADING_LAYER=GNU
             --env HF_HUB_ENABLE_HF_TRANSFER=1
+            --env BENCHMARK_TOP_N
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |
-            pip install packaging && pip install einops scipy optimum codecarbon
-            pip install -U transformers huggingface_hub[hf_transfer]
-            pip install -e git+https://github.com/huggingface/optimum-benchmark.git#egg=optimum-benchmark[onnxruntime]
             pip install -e .
-            python src/benchmark_runners/cpu/update_llm_perf_cpu_onnxruntime.py
-            
-
+            llm-perf run-benchmark --hardware cpu --backend pytorch
@@ -1,12 +1,12 @@
-name: Update LLM Perf Benchmarks - CUDA PyTorch
+name: Benchmark CUDA PyTorch
 
 on:
   workflow_dispatch:
   schedule:
     - cron: "0 0 * * *"
   push:
     branches:
-      - main
+      - '*'
   pull_request:
 
 concurrency:
@@ -18,6 +18,18 @@ env:
 
 jobs:
   run_benchmarks:
+    if: >-
+      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cuda_pytorch')) ||
+      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'pull_request' && (
+        contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
+        contains(github.event.pull_request.labels.*.name, 'cuda') ||
+        contains(github.event.pull_request.labels.*.name, 'pytorch') ||
+        contains(github.event.pull_request.labels.*.name, 'cuda_pytorch')
+      ))
+
     strategy:
       fail-fast: false
       matrix:
@@ -41,6 +53,7 @@ jobs:
           SUBSET: ${{ matrix.subset }}
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          BENCHMARK_TOP_N: 3
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -52,11 +65,9 @@ jobs:
             --env HF_TOKEN
             --env MKL_THREADING_LAYER=GNU
             --env HF_HUB_ENABLE_HF_TRANSFER=1
+            --env BENCHMARK_TOP_N
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |
-            pip install packaging && pip install flash-attn einops scipy auto-gptq optimum bitsandbytes autoawq codecarbon
-            pip install -U transformers huggingface_hub[hf_transfer]
-            pip install git+https://github.com/huggingface/optimum-benchmark.git
-            pip install -e .
-            python src/benchmark_runners/cuda/update_llm_perf_cuda_pytorch.py
+            pip install -e .[cuda]
+            llm-perf run-benchmark --hardware cuda --backend pytorch
@@ -20,14 +20,15 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
-      - name: Install a specific version
-        uses: eifinger/setup-rye@v4
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v5
         with:
-          version: 'latest'
+          python-version: "3.10"
 
-      - name: Set up Python and dependencies
+      - name: Install quality requirements
         run: |
-          rye sync
+          pip install --upgrade pip
+          pip install ruff
 
-      - name: Check quality
-        run: rye run quality
+      - name: Check style
+        run: make quality
@@ -15,6 +15,13 @@ concurrency:
 
 jobs:
   update_llm_perf_leaderboard:
+    if: ${{
+      (github.event_name == 'push') ||
+      (github.event_name == 'workflow_dispatch') ||
+      contains( github.event.pull_request.labels.*.name, 'leaderboard') ||
+      contains( github.event.pull_request.labels.*.name, 'update_leaderboard')
+      }}
+
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
@@ -39,4 +46,5 @@ jobs:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_HUB_ENABLE_HF_TRANSFER: 1
         run: |
-          python src/update_llm_perf_leaderboard.py
+          pip install -e .
+          llm-perf update-leaderboard
@@ -185,3 +185,5 @@ external_repos/
 outputs/
 .env
 wip/
+
+*.csv
@@ -0,0 +1,35 @@
+# Style and Quality checks
+.PHONY: style quality
+
+quality:
+	ruff check .
+	ruff format --check .
+
+style:
+	ruff format .
+	ruff check --fix .
+
+.PHONY: install
+
+install:
+	pip install .
+
+install-dev:
+	DEBUG=1 uv pip install -e .
+
+# Running containers
+.PHONY: run_cpu_container run_cuda_container run_rocm_container
+
+run_cpu_container:
+	docker run -it --rm --pid host --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cpu
+
+run_cuda_container:
+	docker run -it --rm --pid host --gpus all --shm-size 64G --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cuda
+
+run_rocm_container:
+	docker run -it --rm --shm-size 64G --device /dev/kfd --device /dev/dri --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-rocm
+
+cpu-pytorch-container:
+	docker build -t cpu-pytorch -f docker/cpu-pytorch/Dockerfile .
+	# docker run -it --rm --pid host cpu-pytorch /bin/bash
+	docker run -it --rm --pid host cpu-pytorch
@@ -2,4 +2,14 @@
 The backend of [the LLM-perf leaderboard](https://huggingface.co/spaces/optimum/llm-perf-leaderboard)
 
 ## Why
-this runs all the benchmarks to get results for the leaderboard
+this runs all the benchmarks to get results for the leaderboard
+
+## How to install
+git clone 
+pip install -e .[openvino]
+
+## How to use the cli 
+llm-perf run-benchmark --hardware cpu --backend openvino
+llm-perf run-benchmark --hardware cpu --backend pytorch
+
+https://huggingface.co/datasets/optimum-benchmark/llm-perf-leaderboard
@@ -0,0 +1,9 @@
+FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
+
+WORKDIR /workspace
+
+COPY . .
+
+RUN pip install -e .
+
+CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "pytorch"]
@@ -6,8 +6,8 @@
 from optimum_benchmark.launchers.process.config import ProcessConfig
 from optimum_benchmark.scenarios.inference.config import InferenceConfig
 
-from src.common.benchmark_runner import LLMPerfBenchmarkManager
-from src.common.utils import (
+from llm_perf.common.benchmark_runner import LLMPerfBenchmarkManager
+from llm_perf.common.utils import (
     CANONICAL_PRETRAINED_OPEN_LLM_LIST,
     GENERATE_KWARGS,
     INPUT_SHAPES,
@@ -19,7 +19,9 @@ def __init__(self):
         super().__init__(backend="onnxruntime", device="cpu")
 
         self.attention_configs = self._get_attention_configs()
-        assert self.subset is not None, "SUBSET environment variable must be set for benchmarking"
+        assert (
+            self.subset is not None
+        ), "SUBSET environment variable must be set for benchmarking"
         self.weights_configs = self._get_weights_configs(self.subset)
 
     def get_list_of_benchmarks_to_run(self) -> List[Dict[str, Any]]:
-Original file line number
+Diff line change
 outputs/
 .env
 wip/
++
 +*.csv