huggingface · baptistecolle · Oct 23, 2024 · Oct 23, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,23 @@
+optimum-benchmark
+
+**/*.pyc
+**/*.pyo
+**/*.mo
+.gitignore
+.git/
+**/__pycache__/
+Dockerfile
+db.sqlite3
+**/*.md
+build
+.venv
+ruff_cache
+*.egg-info
+*.egg
+*.dist-info
+*.whl
+*.tar.gz
+*.zip
+*.rar
+*.7z
+*.iso
diff --git a/.github/workflows/benchmark_cpu_openvino.yaml b/.github/workflows/benchmark_cpu_openvino.yaml
@@ -50,7 +50,7 @@ jobs:
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           DISABLE_WARNINGS: 1
-          BENCHMARK_TOP_N: 3
+          BENCHMARK_TOP_N: 50
         with:
           image: ${{ env.IMAGE }}
           options: |

diff --git a/.github/workflows/benchmark_cpu_pytorch.yaml b/.github/workflows/benchmark_cpu_pytorch.yaml
@@ -49,7 +49,7 @@ jobs:
           SUBSET: ${{ matrix.subset }}
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
-          BENCHMARK_TOP_N: 3
+          BENCHMARK_TOP_N: 50
         with:
           image: ${{ env.IMAGE }}
           options: |

diff --git a/.github/workflows/benchmark_cuda_pytorch.yaml b/.github/workflows/benchmark_cuda_pytorch.yaml
@@ -53,7 +53,7 @@ jobs:
           SUBSET: ${{ matrix.subset }}
           MACHINE: ${{ matrix.machine.name }}
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
-          BENCHMARK_TOP_N: 3
+          BENCHMARK_TOP_N: 50
         with:
           image: ${{ env.IMAGE }}
           options: |

diff --git a/.gitignore b/.gitignore
@@ -186,4 +186,5 @@ outputs/
 .env
 wip/
 
-*.csv
+*.csv
+optimum-benchmark/
diff --git a/Makefile b/Makefile
@@ -33,3 +33,11 @@ cpu-pytorch-container:
 	docker build -t cpu-pytorch -f docker/cpu-pytorch/Dockerfile .
 	# docker run -it --rm --pid host cpu-pytorch /bin/bash
 	docker run -it --rm --pid host cpu-pytorch
+
+collector-container:
+	docker build -t collector -f docker/collector/Dockerfile .
+	docker run -it --rm --pid host collector
+
+cpu-openvino-container:
+	docker build -t cpu-openvino -f docker/cpu-openvino/Dockerfile .
+	docker run -it --rm --pid host cpu-openvino
diff --git a/docker/collector/Dockerfile b/docker/collector/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.12-slim
+
+WORKDIR /workspace
+
+# Install git
+RUN apt-get update && apt-get install -y git && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+COPY setup.py .
+COPY pyproject.toml .
+
+RUN pip install -e .
+
+COPY . .
+
+CMD ["llm-perf", "update-leaderboard"]
diff --git a/docker/cpu-openvino/Dockerfile b/docker/cpu-openvino/Dockerfile
@@ -0,0 +1,12 @@
+FROM ghcr.io/huggingface/optimum-benchmark:latest-cpu
+
+WORKDIR /workspace
+
+COPY setup.py .
+# COPY pyproject.toml .
+
+RUN pip install -e .[openvino]
+
+COPY . .
+
+CMD ["llm-perf", "run-benchmark", "--hardware", "cpu", "--backend", "openvino"]
diff --git a/llm_perf/common/utils.py b/llm_perf/common/utils.py
@@ -52,7 +52,7 @@ def get_top_llm_list(n: int = 10) -> list[str]:
 
 
 if is_debug_mode():
-    CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["gpt2"]
+    CANONICAL_PRETRAINED_OPEN_LLM_LIST = ["bigscience/bloomz-560m"]
 else:
     CANONICAL_PRETRAINED_OPEN_LLM_LIST = get_top_llm_list(n=get_benchmark_top_n())
     print(

diff --git a/llm_perf/hardware.yaml b/llm_perf/hardware.yaml
@@ -33,4 +33,6 @@
   subsets:
     - unquantized
   backends:
-    - pytorch
+    - pytorch
+    - openvino
+    - onnxruntime
diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py
@@ -2,10 +2,15 @@
 from glob import glob
 
 import pandas as pd
-from huggingface_hub import create_repo, snapshot_download, upload_file
+from huggingface_hub import create_repo, snapshot_download, upload_file, repo_exists
 from optimum_benchmark import Benchmark
+import requests
+import json
 
 from llm_perf.common.hardware_config import load_hardware_configs
+from huggingface_hub.utils import disable_progress_bars
+
+disable_progress_bars()
 
 REPO_TYPE = "dataset"
 MAIN_REPO_ID = "optimum-benchmark/llm-perf-leaderboard"
@@ -14,6 +19,43 @@
 PERF_DF = "perf-df-{backend}-{hardware}-{subset}-{machine}.csv"
 LLM_DF = "llm-df.csv"
 
+def patch_json(file):
+    """
+    Patch a JSON file by adding a 'stdev_' key with the same value as 'stdev' for all occurrences,
+    but only if 'stdev_' doesn't already exist at the same level.
+    This is to make the old optimum benchmark compatible with the new one.
+
+    This function reads a JSON file, recursively traverses the data structure,
+    and for each dictionary that contains a 'stdev' key without a corresponding 'stdev_' key,
+    it adds a 'stdev_' key with the same value. The modified data is then written back to the file.
+
+    Args:
+        file (str): The path to the JSON file to be patched.
+
+    Returns:
+        None
+    """
+    with open(file, "r") as f:
+        data = json.load(f)
+
+    def add_stdev_(obj):
+        if isinstance(obj, dict):
+            new_items = []
+            for key, value in obj.items():
+                if key == "stdev" and "stdev_" not in obj:
+                    new_items.append(("stdev_", value))
+                if isinstance(value, (dict, list)):
+                    add_stdev_(value)
+            for key, value in new_items:
+                obj[key] = value
+        elif isinstance(obj, list):
+            for item in obj:
+                add_stdev_(item)
+
+    add_stdev_(data)
+
+    with open(file, "w") as f:
+        json.dump(data, f, indent=4)
 
 def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str):
     """
@@ -23,11 +65,14 @@ def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str):
         subset=subset, machine=machine, backend=backend, hardware=hardware
     )
     snapshot = snapshot_download(
-        repo_type=REPO_TYPE, repo_id=perf_repo_id, allow_patterns=["**/benchmark.json"]
+        repo_type=REPO_TYPE,
+        repo_id=perf_repo_id,
+        allow_patterns=["**/benchmark.json"],
     )
 
     dfs = []
     for file in glob(f"{snapshot}/**/benchmark.json", recursive=True):
+        patch_json(file)
         dfs.append(Benchmark.from_json(file).to_dataframe())
     benchmarks = pd.concat(dfs, ignore_index=True)
 
@@ -42,6 +87,17 @@ def gather_benchmarks(subset: str, machine: str, backend: str, hardware: str):
         path_in_repo=perf_df,
         path_or_fileobj=perf_df,
     )
+    print(f"Uploaded {perf_df} to {MAIN_REPO_ID}")
+
+
+# def check_if_url_exists(url: str):
+#     """
+#     Check if a URL exists
+#     """
+#     repo_exists
+#     print(f"response: {response}")
+#     return response.status_code == 200
+
 
 
 def update_perf_dfs():
@@ -67,15 +123,17 @@ def update_perf_dfs():
                     print(f"  • Machine: {hardware_config.machine}")
                     print(f"  • Hardware Type: {hardware_config.hardware}")
                     url = f"{PERF_REPO_ID.format(subset=subset, machine=hardware_config.machine, backend=backend, hardware=hardware_config.hardware)}"
-                    print(
-                        f"Check that URL exists: https://huggingface.co/datasets/{url}"
-                    )
-                    raise e
+
+                    does_exist = repo_exists(url, repo_type="dataset")
+
+                    if does_exist:
+                        print(f"Dataset exists: {url} but could not be processed")
+
 
 
 scrapping_script = """
 git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
-pip install -r scrape-open-llm-leaderboard/requirements.txt
+pip install -r scrape-open-llm-leaderboard/requirements.txt -q
 python scrape-open-llm-leaderboard/main.py
 rm -rf scrape-open-llm-leaderboard
 """
-Original file line number
+Diff line change
@@ Expand Up / @@ -186,4 +186,5 @@ outputs/ @@
     .env
     wip/
-    *.csv
+    *.csv
+    optimum-benchmark/