huggingface · baptistecolle · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025 · Jan 30, 2025
diff --git a/.github/workflows/benchmark_cpu_onnxruntime.yaml b/.github/workflows/benchmark_cpu_onnxruntime.yaml
@@ -2,8 +2,14 @@ name: Benchmark CPU Onnxruntime
 
 on:
   workflow_dispatch:
+    inputs:
+      rerun_already_conducted_benchmarks:
+        description: 'Rerun benchmarks that were already conducted'
+        required: true
+        type: boolean
+        default: false
   schedule:
-    - cron: "0 12 * * 3"
+    - cron: "0 12 * * *"
   pull_request:
 
 concurrency:
@@ -16,15 +22,18 @@ env:
 jobs:
   run_benchmarks:
     if: >-
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_onnxruntime')) ||
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && (
+        contains(github.event.head_commit.message, 'cpu_onnxruntime') ||
+        contains(github.event.head_commit.message, 'all_benchmarks')
+      )) ||
       (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
       github.event_name == 'workflow_dispatch' ||
       (github.event_name == 'pull_request' && (
         contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
         contains(github.event.pull_request.labels.*.name, 'cpu') ||
         contains(github.event.pull_request.labels.*.name, 'onnxruntime') ||
-        contains(github.event.pull_request.labels.*.name, 'cpu_onnxruntime')
+        contains(github.event.pull_request.labels.*.name, 'cpu_onnxruntime') ||
+        contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
       ))
 
     strategy:
@@ -48,6 +57,7 @@ jobs:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           DISABLE_WARNINGS: 1
           BENCHMARK_TOP_N: 3
+          RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -60,6 +70,7 @@ jobs:
             --env HF_HUB_ENABLE_HF_TRANSFER=1
             --env DISABLE_WARNINGS
             --env BENCHMARK_TOP_N
+            --env RERUN_ALREADY_CONDUCTED_BENCHMARKS
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |

diff --git a/.github/workflows/benchmark_cpu_openvino.yaml b/.github/workflows/benchmark_cpu_openvino.yaml
@@ -2,8 +2,14 @@ name: Benchmark CPU OpenVINO
 
 on:
   workflow_dispatch:
+     inputs:
+      rerun_already_conducted_benchmarks:
+        description: 'Rerun benchmarks that were already conducted'
+        required: true
+        type: boolean
+        default: false
   schedule:
-    - cron: "0 0 * * *"
+    - cron: "0 6 * * *"
   pull_request:
 
 concurrency:
@@ -16,15 +22,18 @@ env:
 jobs:
   run_benchmarks:
     if: >-
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_openvino')) ||
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && (
+        contains(github.event.head_commit.message, 'cpu_openvino') ||
+        contains(github.event.head_commit.message, 'all_benchmarks')
+      )) ||
       (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
       github.event_name == 'workflow_dispatch' ||
       (github.event_name == 'pull_request' && (
         contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
         contains(github.event.pull_request.labels.*.name, 'cpu') ||
         contains(github.event.pull_request.labels.*.name, 'openvino') ||
-        contains(github.event.pull_request.labels.*.name, 'cpu_openvino')
+        contains(github.event.pull_request.labels.*.name, 'cpu_openvino') ||
+        contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
       ))
 
     strategy:
@@ -48,6 +57,7 @@ jobs:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           DISABLE_WARNINGS: 1
           BENCHMARK_TOP_N: 50
+          RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -60,6 +70,7 @@ jobs:
             --env HF_HUB_ENABLE_HF_TRANSFER=1
             --env DISABLE_WARNINGS
             --env BENCHMARK_TOP_N
+            --env RERUN_ALREADY_CONDUCTED_BENCHMARKS
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |

diff --git a/.github/workflows/benchmark_cpu_pytorch.yaml b/.github/workflows/benchmark_cpu_pytorch.yaml
@@ -2,6 +2,12 @@ name: Benchmark CPU PyTorch
 
 on:
   workflow_dispatch:
+    inputs:
+      rerun_already_conducted_benchmarks:
+        description: 'Rerun benchmarks that were already conducted'
+        required: true
+        type: boolean
+        default: false
   schedule:
     - cron: "0 0 * * *"
   pull_request:
@@ -16,15 +22,18 @@ env:
 jobs:
   run_benchmarks:
     if: >-
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_pytorch')) ||
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && (
+        contains(github.event.head_commit.message, 'cpu_pytorch') ||
+        contains(github.event.head_commit.message, 'all_benchmarks')
+      )) ||
       (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
       github.event_name == 'workflow_dispatch' ||
       (github.event_name == 'pull_request' && (
         contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
         contains(github.event.pull_request.labels.*.name, 'cpu') ||
         contains(github.event.pull_request.labels.*.name, 'pytorch') ||
-        contains(github.event.pull_request.labels.*.name, 'cpu_pytorch')
+        contains(github.event.pull_request.labels.*.name, 'cpu_pytorch') ||
+        contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
       ))
 
     strategy:
@@ -47,6 +56,7 @@ jobs:
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           BENCHMARK_TOP_N: 50
+          RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -58,6 +68,7 @@ jobs:
             --env MKL_THREADING_LAYER=GNU
             --env HF_HUB_ENABLE_HF_TRANSFER=1
             --env BENCHMARK_TOP_N
+            --env RERUN_ALREADY_CONDUCTED_BENCHMARKS
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |

diff --git a/.github/workflows/benchmark_cuda_pytorch.yaml b/.github/workflows/benchmark_cuda_pytorch.yaml
@@ -2,8 +2,14 @@ name: Benchmark CUDA PyTorch
 
 on:
   workflow_dispatch:
+    inputs:
+      rerun_already_conducted_benchmarks:
+        description: 'Rerun benchmarks that were already conducted'
+        required: true
+        type: boolean
+        default: false
   schedule:
-    - cron: "0 3 * * 0"
+    - cron: "0 0 * * *"
   pull_request:
 
 concurrency:
@@ -16,15 +22,18 @@ env:
 jobs:
   run_benchmarks:
     if: >-
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'cuda_pytorch')) ||
-      (github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
+      (github.event_name == 'push' && (
+        contains(github.event.head_commit.message, 'cuda_pytorch') ||
+        contains(github.event.head_commit.message, 'all_benchmarks')
+      )) ||
       (github.event_name == 'push' && github.ref == 'refs/heads/main') ||
       github.event_name == 'workflow_dispatch' ||
       (github.event_name == 'pull_request' && (
         contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
         contains(github.event.pull_request.labels.*.name, 'cuda') ||
         contains(github.event.pull_request.labels.*.name, 'pytorch') ||
-        contains(github.event.pull_request.labels.*.name, 'cuda_pytorch')
+        contains(github.event.pull_request.labels.*.name, 'cuda_pytorch') ||
+        contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
       ))
 
     strategy:
@@ -51,6 +60,7 @@ jobs:
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           BENCHMARK_TOP_N: 50
+          RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
         with:
           image: ${{ env.IMAGE }}
           options: |
@@ -63,6 +73,7 @@ jobs:
             --env MKL_THREADING_LAYER=GNU
             --env HF_HUB_ENABLE_HF_TRANSFER=1
             --env BENCHMARK_TOP_N
+            --env RERUN_ALREADY_CONDUCTED_BENCHMARKS
             --volume ${{ github.workspace }}:/workspace
             --workdir /workspace
           run: |

diff --git a/.gitignore b/.gitignore
@@ -191,3 +191,4 @@ optimum-benchmark/
 
 *.egg-info/
 data/
+load_model_codecarbon.json
diff --git a/Makefile b/Makefile
@@ -1,5 +1,11 @@
+# Load environment variables
+ifneq (,$(wildcard .env))
+    include .env
+    export
+endif
+
 # Style and Quality checks
-.PHONY: style quality install install-dev run_cpu_container run_cuda_container run_rocm_container cpu-pytorch-container cpu-openvino-container collector-container
+.PHONY: style quality run_cpu_container run_cuda_container run_rocm_container cpu-pytorch-container cpu-openvino-container collector-container help
 
 quality:
 	ruff check .
@@ -9,31 +15,46 @@ style:
 	ruff format .
 	ruff check --fix .
 
-install:
-	pip install .
-
-install-dev:
-	DEBUG=1 uv pip install -e .
-
 # Running optimum-benchmark containers
-run_cpu_container:
+run-optimum-benchmark-cpu-container:
 	docker run -it --rm --pid host --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cpu
 
-run_cuda_container:
+run-optimum-benchmark-cuda-container:
 	docker run -it --rm --pid host --gpus all --shm-size 64G --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cuda
 
-run_rocm_container:
+run-optimum-benchmark-rocm-container:
 	docker run -it --rm --shm-size 64G --device /dev/kfd --device /dev/dri --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-rocm
 
-# Running llm-perf backend containers
-cpu-pytorch-container:
-	docker build -t cpu-pytorch -f docker/cpu-pytorch/Dockerfile .
-	docker run -it --rm --pid host cpu-pytorch
-
-cpu-openvino-container:
-	docker build -t cpu-openvino -f docker/cpu-openvino/Dockerfile .
-	docker run -it --rm --pid host cpu-openvino
+# Running llm-perf-leaderboard benchmarks
+run-llm-perf-benchmark-cpu-pytorch:
+	docker build -t llm-perf-backend-cpu-pytorch -f docker/cpu-pytorch/Dockerfile .
+	docker run -it --rm --pid host llm-perf-backend-cpu-pytorch
+
+run-llm-perf-benchmark-cpu-openvino:
+	docker build -t llm-perf-backend-cpu-openvino -f docker/cpu-openvino/Dockerfile .
+	docker run -it --rm --pid host llm-perf-backend-cpu-openvino
+
+run-llm-perf-benchmark-cuda-pytorch:
+	docker build -t llm-perf-backend-cuda-pytorch -f docker/gpu-cuda/Dockerfile .
+	docker run -it --rm --pid host --gpus all --shm-size 64G --volume .:/llm-perf-backend --workdir /llm-perf-backend llm-perf-backend-cuda-pytorch
+
+run-llm-perf-benchmark-collector:
+	docker build -t llm-perf-backend-collector -f docker/collector/Dockerfile .
+	docker run -it --rm --pid host llm-perf-backend-collector
+
+help:
+	@echo "Commands:"
+	@echo "  style                    - Format code and fix style issues"
+	@echo "  quality                  - Run style checks without fixing"
+	@echo ""
+	@echo "Optimum Benchmark Containers:"
+	@echo "  run-optimum-benchmark-cpu-container   - Run CPU container"
+	@echo "  run-optimum-benchmark-cuda-container  - Run CUDA container"
+	@echo "  run-optimum-benchmark-rocm-container  - Run ROCm container"
+	@echo ""
+	@echo "LLM Performance Backend Containers:"
+	@echo "  run-llm-perf-benchmark-cpu-pytorch   - Run the llm-perf-leaderboard Benchmark CPU PyTorch"
+	@echo "  run-llm-perf-benchmark-cpu-openvino  - Run the llm-perf-leaderboard Benchmark CPU OpenVINO"
+	@echo "  run-llm-perf-benchmark-cuda-pytorch  - Run the llm-perf-leaderboard Benchmark CUDA PyTorch"
+	@echo "  run-llm-perf-benchmark-collector     - Run the llm-perf-leaderboard Collector container"
 
-collector-container:
-	docker build -t collector -f docker/collector/Dockerfile .
-	docker run -it --rm --pid host collector
diff --git a/README.md b/README.md
@@ -23,8 +23,7 @@ LLM-perf Backend is designed to:
 
 ## Installation 🛠️
 
-1. Clone the repository:
-```bash
+1. Clone the repository:```bash
 git clone https://github.com/huggingface/llm-perf-backend
 cd llm-perf-backend
 ```
@@ -53,7 +52,6 @@ llm-perf run-benchmark --hardware cpu --backend pytorch
 ```
 
 ### Configuration Options
-
 View all the options with
 ```bash
 llm-perf run-benchmark --help
@@ -62,6 +60,18 @@ llm-perf run-benchmark --help
 - `--hardware`: Target hardware platform (cpu, cuda)
 - `--backend`: Backend framework to use (pytorch, onnxruntime, etc.)
 
+### (Optional) Running Benchmarks via Docker
+
+You can run the benchmarks using the following make commands:
+
+```bash
+# CPU Benchmarks
+make run-llm-perf-benchmark-cpu-pytorch     # Run PyTorch CPU benchmark
+make run-llm-perf-benchmark-cpu-openvino    # Run OpenVINO CPU benchmark
+
+# GPU Benchmarks
+make run-llm-perf-benchmark-cuda-pytorch    # Run PyTorch CUDA benchmark
+
 ## Benchmark Dataset 📊
 
 Results are published to the official dataset:
@@ -75,4 +85,5 @@ All benchmarks follow these standardized settings:
 - Memory tracking:
   - Maximum allocated memory
   - Maximum reserved memory
-  - Maximum used memory (via PyNVML for GPU)
+  - Maximum used memory (via PyNVML for GPU)
+
diff --git a/dashboard/main.py b/dashboard/main.py
@@ -0,0 +1,4 @@
+# -> need to view on the indvidual runs to get details
+# -> get stats about the latest runs for all the hardware.yml
+# -> get stats on the latest github actions
+# -> get the stats on the top 50 models
diff --git a/dashboard/requirements.txt b/dashboard/requirements.txt
diff --git a/docker/cpu-onnxruntime/Dockerfile b/docker/cpu-onnxruntime/Dockerfile
@@ -0,0 +1,11 @@
+FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
+
+WORKDIR /workspace
+
+COPY setup.py .
+
+RUN pip install -e .[onnxruntime]
+
+COPY . .
+
+ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "onnxruntime"]
diff --git a/docker/cpu-openvino/Dockerfile b/docker/cpu-openvino/Dockerfile
@@ -3,10 +3,9 @@ FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
 WORKDIR /workspace
 
 COPY setup.py .
-# COPY pyproject.toml .
 
 RUN pip install -e .[openvino]
 
 COPY . .
 
-CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "openvino"]
+ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "openvino"]
diff --git a/docker/cpu-pytorch/Dockerfile b/docker/cpu-pytorch/Dockerfile
@@ -2,8 +2,10 @@ FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
 
 WORKDIR /workspace
 
-COPY . .
+COPY setup.py .
 
 RUN pip install -e .
 
-CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "pytorch"]
+COPY . .
+
+ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "pytorch"]
diff --git a/docker/gpu-cuda/Dockerfile b/docker/gpu-cuda/Dockerfile
@@ -0,0 +1,12 @@
+FROM ghcr.io/huggingface/optimum-benchmark:latest-cuda
+
+WORKDIR /workspace
+
+COPY setup.py .
+
+RUN pip install -e .[cuda] \
+    && pip install flash-attn --no-build-isolation
+
+COPY . .
+
+ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cuda", "--backend", "pytorch"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -191,3 +191,4 @@ optimum-benchmark/

		*.egg-info/
		data/
		load_model_codecarbon.json