diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..36d3817 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,23 @@ +optimum-benchmark + +**/*.pyc +**/*.pyo +**/*.mo +.gitignore +.git/ +**/__pycache__/ +Dockerfile +db.sqlite3 +**/*.md +build +.venv +ruff_cache +*.egg-info +*.egg +*.dist-info +*.whl +*.tar.gz +*.zip +*.rar +*.7z +*.iso diff --git a/.github/workflows/benchmark_cpu_openvino.yaml b/.github/workflows/benchmark_cpu_openvino.yaml index 012fa69..afafb96 100644 --- a/.github/workflows/benchmark_cpu_openvino.yaml +++ b/.github/workflows/benchmark_cpu_openvino.yaml @@ -50,7 +50,7 @@ jobs: MACHINE: ${{ matrix.machine.name }} HF_TOKEN: ${{ secrets.HF_TOKEN }} DISABLE_WARNINGS: 1 - BENCHMARK_TOP_N: 3 + BENCHMARK_TOP_N: 50 with: image: ${{ env.IMAGE }} options: | diff --git a/.github/workflows/benchmark_cpu_pytorch.yaml b/.github/workflows/benchmark_cpu_pytorch.yaml index 0c25540..b2aa7e0 100644 --- a/.github/workflows/benchmark_cpu_pytorch.yaml +++ b/.github/workflows/benchmark_cpu_pytorch.yaml @@ -49,7 +49,7 @@ jobs: SUBSET: ${{ matrix.subset }} MACHINE: ${{ matrix.machine.name }} HF_TOKEN: ${{ secrets.HF_TOKEN }} - BENCHMARK_TOP_N: 3 + BENCHMARK_TOP_N: 50 with: image: ${{ env.IMAGE }} options: | diff --git a/.github/workflows/benchmark_cuda_pytorch.yaml b/.github/workflows/benchmark_cuda_pytorch.yaml index 9d169b5..624adef 100644 --- a/.github/workflows/benchmark_cuda_pytorch.yaml +++ b/.github/workflows/benchmark_cuda_pytorch.yaml @@ -53,7 +53,7 @@ jobs: SUBSET: ${{ matrix.subset }} MACHINE: ${{ matrix.machine.name }} HF_TOKEN: ${{ secrets.HF_TOKEN }} - BENCHMARK_TOP_N: 3 + BENCHMARK_TOP_N: 50 with: image: ${{ env.IMAGE }} options: | diff --git a/.gitignore b/.gitignore index 5a97b64..e6b5c39 100644 --- a/.gitignore +++ b/.gitignore @@ -186,4 +186,5 @@ outputs/ .env wip/ -*.csv \ No newline at end of file +*.csv +optimum-benchmark/ \ No newline at end of file diff --git a/Makefile b/Makefile index 0d17649..aa80fe1 100644 --- a/Makefile +++ b/Makefile @@ -33,3 +33,11 @@ cpu-pytorch-container: docker build -t cpu-pytorch -f docker/cpu-pytorch/Dockerfile . # docker run -it --rm --pid host cpu-pytorch /bin/bash docker run -it --rm --pid host cpu-pytorch + +collector-container: + docker build -t collector -f docker/collector/Dockerfile . + docker run -it --rm --pid host collector + +cpu-openvino-container: + docker build -t cpu-openvino -f docker/cpu-openvino/Dockerfile . + docker run -it --rm --pid host cpu-openvino diff --git a/docker/collector/Dockerfile b/docker/collector/Dockerfile new file mode 100644 index 0000000..6e14e44 --- /dev/null +++ b/docker/collector/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.12-slim + +WORKDIR /workspace + +# Install git +RUN apt-get update && apt-get install -y git && apt-get clean && rm -rf /var/lib/apt/lists/* + +COPY setup.py . +COPY pyproject.toml . + +RUN pip install -e . + +COPY . . + +CMD ["llm-perf", "update-leaderboard"] diff --git a/docker/cpu-openvino/Dockerfile b/docker/cpu-openvino/Dockerfile new file mode 100644 index 0000000..2f88e1e --- /dev/null +++ b/docker/cpu-openvino/Dockerfile @@ -0,0 +1,12 @@ +FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu + +WORKDIR /workspace + +COPY setup.py . +# COPY pyproject.toml . + +RUN pip install -e .[openvino] + +COPY . . + +CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "openvino"] diff --git a/llm_perf/common/utils.py b/llm_perf/common/utils.py index c9186ca..bf67fe9 100644 --- a/llm_perf/common/utils.py +++ b/llm_perf/common/utils.py @@ -52,7 +52,7 @@ def get_top_llm_list(n: int = 10) -> list[str]: if is_debug_mode(): - CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["gpt2"] + CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["bigscience/bloomz-560m"] else: CANONICAL_PRETRAINED_OPEN_LLM_LIST = get_top_llm_list(n=get_benchmark_top_n()) print( diff --git a/llm_perf/hardware.yaml b/llm_perf/hardware.yaml index 1a351b6..c3cd754 100644 --- a/llm_perf/hardware.yaml +++ b/llm_perf/hardware.yaml @@ -33,4 +33,6 @@ subsets: - unquantized backends: - - pytorch \ No newline at end of file + - pytorch + - openvino + - onnxruntime diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py index 5486b6e..91d8592 100644 --- a/llm_perf/update_llm_perf_leaderboard.py +++ b/llm_perf/update_llm_perf_leaderboard.py @@ -2,10 +2,15 @@ from glob import glob import pandas as pd -from huggingface_hub import create_repo, snapshot_download, upload_file +from huggingface_hub import create_repo, snapshot_download, upload_file, repo_exists from optimum_benchmark import Benchmark +import requests +import json from llm_perf.common.hardware_config import load_hardware_configs +from huggingface_hub.utils import disable_progress_bars + +disable_progress_bars() REPO_TYPE = "dataset" MAIN_REPO_ID = "optimum-benchmark/llm-perf-leaderboard" @@ -14,6 +19,43 @@ PERF_DF = "perf-df-{backend}-{hardware}-{subset}-{machine}.csv" LLM_DF = "llm-df.csv" +def patch_json(file): + """ + Patch a JSON file by adding a 'stdev_' key with the same value as 'stdev' for all occurrences, + but only if 'stdev_' doesn't already exist at the same level. + This is to make the old optimum benchmark compatible with the new one. + + This function reads a JSON file, recursively traverses the data structure, + and for each dictionary that contains a 'stdev' key without a corresponding 'stdev_' key, + it adds a 'stdev_' key with the same value. The modified data is then written back to the file. + + Args: + file (str): The path to the JSON file to be patched. + + Returns: + None + """ + with open(file, "r") as f: + data = json.load(f) + + def add_stdev_(obj): + if isinstance(obj, dict): + new_items = [] + for key, value in obj.items(): + if key == "stdev" and "stdev_" not in obj: + new_items.append(("stdev_", value)) + if isinstance(value, (dict, list)): + add_stdev_(value) + for key, value in new_items: + obj[key] = value + elif isinstance(obj, list): + for item in obj: + add_stdev_(item) + + add_stdev_(data) + + with open(file, "w") as f: + json.dump(data, f, indent=4) def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str): """ @@ -23,11 +65,14 @@ def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str): subset=subset, machine=machine, backend=backend, hardware=hardware ) snapshot = snapshot_download( - repo_type=REPO_TYPE, repo_id=perf_repo_id, allow_patterns=["**/benchmark.json"] + repo_type=REPO_TYPE, + repo_id=perf_repo_id, + allow_patterns=["**/benchmark.json"], ) dfs = [] for file in glob(f"{snapshot}/**/benchmark.json", recursive=True): + patch_json(file) dfs.append(Benchmark.from_json(file).to_dataframe()) benchmarks = pd.concat(dfs, ignore_index=True) @@ -42,6 +87,17 @@ def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str): path_in_repo=perf_df, path_or_fileobj=perf_df, ) + print(f"Uploaded {perf_df} to {MAIN_REPO_ID}") + + +# def check_if_url_exists(url: str): +# """ +# Check if a URL exists +# """ +# repo_exists +# print(f"response: {response}") +# return response.status_code == 200 + def update_perf_dfs(): @@ -67,15 +123,17 @@ def update_perf_dfs(): print(f" • Machine: {hardware_config.machine}") print(f" • Hardware Type: {hardware_config.hardware}") url = f"{PERF_REPO_ID.format(subset=subset, machine=hardware_config.machine, backend=backend, hardware=hardware_config.hardware)}" - print( - f"Check that URL exists: https://huggingface.co/datasets/{url}" - ) - raise e + + does_exist = repo_exists(url, repo_type="dataset") + + if does_exist: + print(f"Dataset exists: {url} but could not be processed") + scrapping_script = """ git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git -pip install -r scrape-open-llm-leaderboard/requirements.txt +pip install -r scrape-open-llm-leaderboard/requirements.txt -q python scrape-open-llm-leaderboard/main.py rm -rf scrape-open-llm-leaderboard """