Skip to content

⚙️ dashboard for observability + rerun option #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions .github/workflows/benchmark_cpu_onnxruntime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@ name: Benchmark CPU Onnxruntime

on:
workflow_dispatch:
inputs:
rerun_already_conducted_benchmarks:
description: 'Rerun benchmarks that were already conducted'
required: true
type: boolean
default: false
schedule:
- cron: "0 12 * * 3"
- cron: "0 12 * * *"
pull_request:

concurrency:
Expand All @@ -16,15 +22,18 @@ env:
jobs:
run_benchmarks:
if: >-
(github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_onnxruntime')) ||
(github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
(github.event_name == 'push' && (
contains(github.event.head_commit.message, 'cpu_onnxruntime') ||
contains(github.event.head_commit.message, 'all_benchmarks')
)) ||
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'pull_request' && (
contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
contains(github.event.pull_request.labels.*.name, 'cpu') ||
contains(github.event.pull_request.labels.*.name, 'onnxruntime') ||
contains(github.event.pull_request.labels.*.name, 'cpu_onnxruntime')
contains(github.event.pull_request.labels.*.name, 'cpu_onnxruntime') ||
contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
))

strategy:
Expand All @@ -48,6 +57,7 @@ jobs:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
DISABLE_WARNINGS: 1
BENCHMARK_TOP_N: 3
RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
with:
image: ${{ env.IMAGE }}
options: |
Expand All @@ -60,6 +70,7 @@ jobs:
--env HF_HUB_ENABLE_HF_TRANSFER=1
--env DISABLE_WARNINGS
--env BENCHMARK_TOP_N
--env RERUN_ALREADY_CONDUCTED_BENCHMARKS
--volume ${{ github.workspace }}:/workspace
--workdir /workspace
run: |
Expand Down
19 changes: 15 additions & 4 deletions .github/workflows/benchmark_cpu_openvino.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@ name: Benchmark CPU OpenVINO

on:
workflow_dispatch:
inputs:
rerun_already_conducted_benchmarks:
description: 'Rerun benchmarks that were already conducted'
required: true
type: boolean
default: false
schedule:
- cron: "0 0 * * *"
- cron: "0 6 * * *"
pull_request:

concurrency:
Expand All @@ -16,15 +22,18 @@ env:
jobs:
run_benchmarks:
if: >-
(github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_openvino')) ||
(github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
(github.event_name == 'push' && (
contains(github.event.head_commit.message, 'cpu_openvino') ||
contains(github.event.head_commit.message, 'all_benchmarks')
)) ||
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'pull_request' && (
contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
contains(github.event.pull_request.labels.*.name, 'cpu') ||
contains(github.event.pull_request.labels.*.name, 'openvino') ||
contains(github.event.pull_request.labels.*.name, 'cpu_openvino')
contains(github.event.pull_request.labels.*.name, 'cpu_openvino') ||
contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
))

strategy:
Expand All @@ -48,6 +57,7 @@ jobs:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
DISABLE_WARNINGS: 1
BENCHMARK_TOP_N: 50
RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
with:
image: ${{ env.IMAGE }}
options: |
Expand All @@ -60,6 +70,7 @@ jobs:
--env HF_HUB_ENABLE_HF_TRANSFER=1
--env DISABLE_WARNINGS
--env BENCHMARK_TOP_N
--env RERUN_ALREADY_CONDUCTED_BENCHMARKS
--volume ${{ github.workspace }}:/workspace
--workdir /workspace
run: |
Expand Down
17 changes: 14 additions & 3 deletions .github/workflows/benchmark_cpu_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@ name: Benchmark CPU PyTorch

on:
workflow_dispatch:
inputs:
rerun_already_conducted_benchmarks:
description: 'Rerun benchmarks that were already conducted'
required: true
type: boolean
default: false
schedule:
- cron: "0 0 * * *"
pull_request:
Expand All @@ -16,15 +22,18 @@ env:
jobs:
run_benchmarks:
if: >-
(github.event_name == 'push' && contains(github.event.head_commit.message, 'cpu_pytorch')) ||
(github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
(github.event_name == 'push' && (
contains(github.event.head_commit.message, 'cpu_pytorch') ||
contains(github.event.head_commit.message, 'all_benchmarks')
)) ||
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'pull_request' && (
contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
contains(github.event.pull_request.labels.*.name, 'cpu') ||
contains(github.event.pull_request.labels.*.name, 'pytorch') ||
contains(github.event.pull_request.labels.*.name, 'cpu_pytorch')
contains(github.event.pull_request.labels.*.name, 'cpu_pytorch') ||
contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
))

strategy:
Expand All @@ -47,6 +56,7 @@ jobs:
MACHINE: ${{ matrix.machine.name }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
BENCHMARK_TOP_N: 50
RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
with:
image: ${{ env.IMAGE }}
options: |
Expand All @@ -58,6 +68,7 @@ jobs:
--env MKL_THREADING_LAYER=GNU
--env HF_HUB_ENABLE_HF_TRANSFER=1
--env BENCHMARK_TOP_N
--env RERUN_ALREADY_CONDUCTED_BENCHMARKS
--volume ${{ github.workspace }}:/workspace
--workdir /workspace
run: |
Expand Down
19 changes: 15 additions & 4 deletions .github/workflows/benchmark_cuda_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@ name: Benchmark CUDA PyTorch

on:
workflow_dispatch:
inputs:
rerun_already_conducted_benchmarks:
description: 'Rerun benchmarks that were already conducted'
required: true
type: boolean
default: false
schedule:
- cron: "0 3 * * 0"
- cron: "0 0 * * *"
pull_request:

concurrency:
Expand All @@ -16,15 +22,18 @@ env:
jobs:
run_benchmarks:
if: >-
(github.event_name == 'push' && contains(github.event.head_commit.message, 'cuda_pytorch')) ||
(github.event_name == 'push' && contains(github.event.head_commit.message, 'all_benchmarks')) ||
(github.event_name == 'push' && (
contains(github.event.head_commit.message, 'cuda_pytorch') ||
contains(github.event.head_commit.message, 'all_benchmarks')
)) ||
(github.event_name == 'push' && github.ref == 'refs/heads/main') ||
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'pull_request' && (
contains(github.event.pull_request.labels.*.name, 'leaderboard') ||
contains(github.event.pull_request.labels.*.name, 'cuda') ||
contains(github.event.pull_request.labels.*.name, 'pytorch') ||
contains(github.event.pull_request.labels.*.name, 'cuda_pytorch')
contains(github.event.pull_request.labels.*.name, 'cuda_pytorch') ||
contains(github.event.pull_request.labels.*.name, 'all_benchmarks')
))

strategy:
Expand All @@ -51,6 +60,7 @@ jobs:
MACHINE: ${{ matrix.machine.name }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
BENCHMARK_TOP_N: 50
RERUN_ALREADY_CONDUCTED_BENCHMARKS: ${{ github.event.inputs.rerun_already_conducted_benchmarks }}
with:
image: ${{ env.IMAGE }}
options: |
Expand All @@ -63,6 +73,7 @@ jobs:
--env MKL_THREADING_LAYER=GNU
--env HF_HUB_ENABLE_HF_TRANSFER=1
--env BENCHMARK_TOP_N
--env RERUN_ALREADY_CONDUCTED_BENCHMARKS
--volume ${{ github.workspace }}:/workspace
--workdir /workspace
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,4 @@ optimum-benchmark/

*.egg-info/
data/
load_model_codecarbon.json
63 changes: 42 additions & 21 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Load environment variables
ifneq (,$(wildcard .env))
include .env
export
endif

# Style and Quality checks
.PHONY: style quality install install-dev run_cpu_container run_cuda_container run_rocm_container cpu-pytorch-container cpu-openvino-container collector-container
.PHONY: style quality run_cpu_container run_cuda_container run_rocm_container cpu-pytorch-container cpu-openvino-container collector-container help

quality:
ruff check .
Expand All @@ -9,31 +15,46 @@ style:
ruff format .
ruff check --fix .

install:
pip install .

install-dev:
DEBUG=1 uv pip install -e .

# Running optimum-benchmark containers
run_cpu_container:
run-optimum-benchmark-cpu-container:
docker run -it --rm --pid host --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cpu

run_cuda_container:
run-optimum-benchmark-cuda-container:
docker run -it --rm --pid host --gpus all --shm-size 64G --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cuda

run_rocm_container:
run-optimum-benchmark-rocm-container:
docker run -it --rm --shm-size 64G --device /dev/kfd --device /dev/dri --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-rocm

# Running llm-perf backend containers
cpu-pytorch-container:
docker build -t cpu-pytorch -f docker/cpu-pytorch/Dockerfile .
docker run -it --rm --pid host cpu-pytorch

cpu-openvino-container:
docker build -t cpu-openvino -f docker/cpu-openvino/Dockerfile .
docker run -it --rm --pid host cpu-openvino
# Running llm-perf-leaderboard benchmarks
run-llm-perf-benchmark-cpu-pytorch:
docker build -t llm-perf-backend-cpu-pytorch -f docker/cpu-pytorch/Dockerfile .
docker run -it --rm --pid host llm-perf-backend-cpu-pytorch

run-llm-perf-benchmark-cpu-openvino:
docker build -t llm-perf-backend-cpu-openvino -f docker/cpu-openvino/Dockerfile .
docker run -it --rm --pid host llm-perf-backend-cpu-openvino

run-llm-perf-benchmark-cuda-pytorch:
docker build -t llm-perf-backend-cuda-pytorch -f docker/gpu-cuda/Dockerfile .
docker run -it --rm --pid host --gpus all --shm-size 64G --volume .:/llm-perf-backend --workdir /llm-perf-backend llm-perf-backend-cuda-pytorch

run-llm-perf-benchmark-collector:
docker build -t llm-perf-backend-collector -f docker/collector/Dockerfile .
docker run -it --rm --pid host llm-perf-backend-collector

help:
@echo "Commands:"
@echo " style - Format code and fix style issues"
@echo " quality - Run style checks without fixing"
@echo ""
@echo "Optimum Benchmark Containers:"
@echo " run-optimum-benchmark-cpu-container - Run CPU container"
@echo " run-optimum-benchmark-cuda-container - Run CUDA container"
@echo " run-optimum-benchmark-rocm-container - Run ROCm container"
@echo ""
@echo "LLM Performance Backend Containers:"
@echo " run-llm-perf-benchmark-cpu-pytorch - Run the llm-perf-leaderboard Benchmark CPU PyTorch"
@echo " run-llm-perf-benchmark-cpu-openvino - Run the llm-perf-leaderboard Benchmark CPU OpenVINO"
@echo " run-llm-perf-benchmark-cuda-pytorch - Run the llm-perf-leaderboard Benchmark CUDA PyTorch"
@echo " run-llm-perf-benchmark-collector - Run the llm-perf-leaderboard Collector container"

collector-container:
docker build -t collector -f docker/collector/Dockerfile .
docker run -it --rm --pid host collector
19 changes: 15 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ LLM-perf Backend is designed to:

## Installation 🛠️

1. Clone the repository:
```bash
1. Clone the repository:```bash
git clone https://github.com/huggingface/llm-perf-backend
cd llm-perf-backend
```
Expand Down Expand Up @@ -53,7 +52,6 @@ llm-perf run-benchmark --hardware cpu --backend pytorch
```

### Configuration Options

View all the options with
```bash
llm-perf run-benchmark --help
Expand All @@ -62,6 +60,18 @@ llm-perf run-benchmark --help
- `--hardware`: Target hardware platform (cpu, cuda)
- `--backend`: Backend framework to use (pytorch, onnxruntime, etc.)

### (Optional) Running Benchmarks via Docker

You can run the benchmarks using the following make commands:

```bash
# CPU Benchmarks
make run-llm-perf-benchmark-cpu-pytorch # Run PyTorch CPU benchmark
make run-llm-perf-benchmark-cpu-openvino # Run OpenVINO CPU benchmark

# GPU Benchmarks
make run-llm-perf-benchmark-cuda-pytorch # Run PyTorch CUDA benchmark

## Benchmark Dataset 📊

Results are published to the official dataset:
Expand All @@ -75,4 +85,5 @@ All benchmarks follow these standardized settings:
- Memory tracking:
- Maximum allocated memory
- Maximum reserved memory
- Maximum used memory (via PyNVML for GPU)
- Maximum used memory (via PyNVML for GPU)

4 changes: 4 additions & 0 deletions dashboard/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# -> need to view on the indvidual runs to get details
# -> get stats about the latest runs for all the hardware.yml
# -> get stats on the latest github actions
# -> get the stats on the top 50 models
Empty file added dashboard/requirements.txt
Empty file.
11 changes: 11 additions & 0 deletions docker/cpu-onnxruntime/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu

WORKDIR /workspace

COPY setup.py .

RUN pip install -e .[onnxruntime]

COPY . .

ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "onnxruntime"]
3 changes: 1 addition & 2 deletions docker/cpu-openvino/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
WORKDIR /workspace

COPY setup.py .
# COPY pyproject.toml .

RUN pip install -e .[openvino]

COPY . .

CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "openvino"]
ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "openvino"]
6 changes: 4 additions & 2 deletions docker/cpu-pytorch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu

WORKDIR /workspace

COPY . .
COPY setup.py .

RUN pip install -e .

CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "pytorch"]
COPY . .

ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "pytorch"]
12 changes: 12 additions & 0 deletions docker/gpu-cuda/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM ghcr.io/huggingface/optimum-benchmark:latest-cuda

WORKDIR /workspace

COPY setup.py .

RUN pip install -e .[cuda] \
&& pip install flash-attn --no-build-isolation

COPY . .

ENTRYPOINT ["llm-perf", "run-benchmark", "--hardware", "cuda", "--backend", "pytorch"]
Loading
Loading