refactor(public-release): refactro backend to make it public

baptistecolle · baptistecolle · commit 18db34db9ed7 · 2024-12-11T11:01:03.000Z
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 # Style and Quality checks
-.PHONY: style quality
+.PHONY: style quality install install-dev run_cpu_container run_cuda_container run_rocm_container cpu-pytorch-container cpu-openvino-container collector-container
 
 quality:
 	ruff check .
@@ -9,17 +9,13 @@ style:
 	ruff format .
 	ruff check --fix .
 
-.PHONY: install
-
 install:
 	pip install .
 
 install-dev:
 	DEBUG=1 uv pip install -e .
 
-# Running containers
-.PHONY: run_cpu_container run_cuda_container run_rocm_container
-
+# Running optimum-benchmark containers
 run_cpu_container:
 	docker run -it --rm --pid host --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-cpu
 
@@ -29,15 +25,15 @@ run_cuda_container:
 run_rocm_container:
 	docker run -it --rm --shm-size 64G --device /dev/kfd --device /dev/dri --volume .:/llm-perf-backend --workdir /llm-perf-backend ghcr.io/huggingface/optimum-benchmark:latest-rocm
 
+# Running llm-perf backend containers
 cpu-pytorch-container:
 	docker build -t cpu-pytorch -f docker/cpu-pytorch/Dockerfile .
-	# docker run -it --rm --pid host cpu-pytorch /bin/bash
 	docker run -it --rm --pid host cpu-pytorch
 
-collector-container:
-	docker build -t collector -f docker/collector/Dockerfile .
-	docker run -it --rm --pid host collector
-
 cpu-openvino-container:
 	docker build -t cpu-openvino -f docker/cpu-openvino/Dockerfile .
 	docker run -it --rm --pid host cpu-openvino
+
+collector-container:
+	docker build -t collector -f docker/collector/Dockerfile .
+	docker run -it --rm --pid host collector
diff --git a/README.md b/README.md
@@ -1,15 +1,78 @@
-# llm-perf-backend
-The backend of [the LLM-perf leaderboard](https://huggingface.co/spaces/optimum/llm-perf-leaderboard)
+# LLM-perf Backend 🏋️
 
-## Why
-this runs all the benchmarks to get results for the leaderboard
+The official backend system powering the [LLM-perf Leaderboard](https://huggingface.co/spaces/optimum/llm-perf-leaderboard). This repository contains the infrastructure and tools needed to run standardized benchmarks for Large Language Models (LLMs) across different hardware configurations and optimization backends.
 
-## How to install
-git clone 
-pip install -e .[openvino]
+## About 📝
 
-## How to use the cli 
-llm-perf run-benchmark --hardware cpu --backend openvino
+LLM-perf Backend is designed to:
+- Run automated benchmarks for the LLM-perf leaderboard
+- Ensure consistent and reproducible performance measurements
+- Support multiple hardware configurations and optimization backends
+- Generate standardized performance metrics for latency, throughput, memory usage, and energy consumption
+
+## Key Features 🔑
+
+- Standardized benchmarking pipeline using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark)
+- Support for multiple hardware configurations (CPU, GPU)
+- Multiple backend implementations (PyTorch, Onnxruntime, etc.)
+- Automated metric collection:
+  - Latency and throughput measurements
+  - Memory usage tracking
+  - Energy consumption monitoring
+  - Quality metrics integration with Open LLM Leaderboard
+
+## Installation 🛠️
+
+1. Clone the repository:
+```bash
+git clone https://github.com/huggingface/llm-perf-backend
+cd llm-perf-backend
+```
+
+2. Create a python env
+```bash
+python -m venv .venv
+source .venv/bin/activate
+```
+
+2. Install the package with required dependencies:
+```bash
+pip install -e "." 
+# or
+pip install -e ".[all]" # to install optional dependency like Onnxruntime
+```
+
+## Usage 📋
+
+### Command Line Interface
+
+Run benchmarks using the CLI tool:
+
+```bash
 llm-perf run-benchmark --hardware cpu --backend pytorch
+```
+
+### Configuration Options
+
+View all the options with
+```bash
+llm-perf run-benchmark --help
+```
+
+- `--hardware`: Target hardware platform (cpu, cuda)
+- `--backend`: Backend framework to use (pytorch, onnxruntime, etc.)
+
+## Benchmark Dataset 📊
+
+Results are published to the official dataset:
+[optimum-benchmark/llm-perf-leaderboard](https://huggingface.co/datasets/optimum-benchmark/llm-perf-leaderboard)
+
+## Benchmark Specifications 📑
 
-https://huggingface.co/datasets/optimum-benchmark/llm-perf-leaderboard
+All benchmarks follow these standardized settings:
+- Single GPU usage to avoid communication-dependent results
+- Energy monitoring via CodeCarbon
+- Memory tracking:
+  - Maximum allocated memory
+  - Maximum reserved memory
+  - Maximum used memory (via PyNVML for GPU)