Merge pull request #9 from huggingface/make-backend-public

baptistecolle · web-flow · commit 0370855e4058 · 2024-12-13T10:22:03.000+01:00
fix(open-llm): disable broken open-llm scrapper
diff --git a/.github/workflows/benchmark_cpu_onnxruntime.yaml b/.github/workflows/benchmark_cpu_onnxruntime.yaml
@@ -3,7 +3,7 @@ name: Benchmark CPU Onnxruntime
 on:
   workflow_dispatch:
   schedule:
-    - cron: "0 12 * * *"
+    - cron: "0 12 * * 3"
   pull_request:
 
 concurrency:
diff --git a/.github/workflows/benchmark_cuda_pytorch.yaml b/.github/workflows/benchmark_cuda_pytorch.yaml
@@ -3,7 +3,7 @@ name: Benchmark CUDA PyTorch
 on:
   workflow_dispatch:
   schedule:
-    - cron: "0 3 * * *"
+    - cron: "0 3 * * 0"
   pull_request:
 
 concurrency:
diff --git a/.gitignore b/.gitignore
@@ -189,4 +189,5 @@ wip/
 *.csv
 optimum-benchmark/
 
-*.egg-info/
+*.egg-info/
+data/
diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py
@@ -1,5 +1,6 @@
 import subprocess
 from glob import glob
+import os
 
 import pandas as pd
 from huggingface_hub import create_repo, snapshot_download, upload_file, repo_exists
@@ -15,8 +16,12 @@
 MAIN_REPO_ID = "optimum-benchmark/llm-perf-leaderboard"
 PERF_REPO_ID = "optimum-benchmark/llm-perf-{backend}-{hardware}-{subset}-{machine}"
 
-PERF_DF = "perf-df-{backend}-{hardware}-{subset}-{machine}.csv"
-LLM_DF = "llm-df.csv"
+DATA_DIR = "data"
+PERF_DF = os.path.join(DATA_DIR, "perf-df-{backend}-{hardware}-{subset}-{machine}.csv")
+LLM_DF = os.path.join(DATA_DIR, "llm-df.csv")
+
+# Create data directory if it doesn't exist
+os.makedirs(DATA_DIR, exist_ok=True)
 
 
 def patch_json(file):
@@ -104,6 +109,7 @@ def update_perf_dfs():
     """
     Update the performance dataframes for all machines
     """
+
     hardware_configs = load_hardware_configs("llm_perf/hardware.yaml")
 
     for hardware_config in hardware_configs:
@@ -130,18 +136,18 @@ def update_perf_dfs():
                         print(f"Dataset exists: {url} but could not be processed")
 
 
-scrapping_script = """
-git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
-pip install -r scrape-open-llm-leaderboard/requirements.txt -q
-python scrape-open-llm-leaderboard/main.py
-rm -rf scrape-open-llm-leaderboard
-"""
-
-
 def update_llm_df():
     """
     Scrape the open-llm-leaderboard and update the leaderboard dataframe
     """
+    
+    scrapping_script = """
+    git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
+    pip install -r scrape-open-llm-leaderboard/requirements.txt -q
+    python scrape-open-llm-leaderboard/main.py
+    rm -rf scrape-open-llm-leaderboard
+    """
+    
     subprocess.run(scrapping_script, shell=True)
     create_repo(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, exist_ok=True, private=False)
     upload_file(
@@ -153,7 +159,7 @@ def update_llm_df():
 
 
 def update_llm_perf_leaderboard():
-    update_llm_df()
+    # update_llm_df() # TO FIX: open-llm scraper is broken otherwise use https://huggingface.co/datasets/open-llm-leaderboard/contents directly
     update_perf_dfs()