Skip to content

Commit 0370855

Browse files
Merge pull request #9 from huggingface/make-backend-public
fix(open-llm): disable broken open-llm scrapper
2 parents fa884a5 + 985e712 commit 0370855

File tree

4 files changed

+21
-14
lines changed

4 files changed

+21
-14
lines changed

.github/workflows/benchmark_cpu_onnxruntime.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: Benchmark CPU Onnxruntime
33
on:
44
workflow_dispatch:
55
schedule:
6-
- cron: "0 12 * * *"
6+
- cron: "0 12 * * 3"
77
pull_request:
88

99
concurrency:

.github/workflows/benchmark_cuda_pytorch.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: Benchmark CUDA PyTorch
33
on:
44
workflow_dispatch:
55
schedule:
6-
- cron: "0 3 * * *"
6+
- cron: "0 3 * * 0"
77
pull_request:
88

99
concurrency:

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,4 +189,5 @@ wip/
189189
*.csv
190190
optimum-benchmark/
191191

192-
*.egg-info/
192+
*.egg-info/
193+
data/

llm_perf/update_llm_perf_leaderboard.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import subprocess
22
from glob import glob
3+
import os
34

45
import pandas as pd
56
from huggingface_hub import create_repo, snapshot_download, upload_file, repo_exists
@@ -15,8 +16,12 @@
1516
MAIN_REPO_ID = "optimum-benchmark/llm-perf-leaderboard"
1617
PERF_REPO_ID = "optimum-benchmark/llm-perf-{backend}-{hardware}-{subset}-{machine}"
1718

18-
PERF_DF = "perf-df-{backend}-{hardware}-{subset}-{machine}.csv"
19-
LLM_DF = "llm-df.csv"
19+
DATA_DIR = "data"
20+
PERF_DF = os.path.join(DATA_DIR, "perf-df-{backend}-{hardware}-{subset}-{machine}.csv")
21+
LLM_DF = os.path.join(DATA_DIR, "llm-df.csv")
22+
23+
# Create data directory if it doesn't exist
24+
os.makedirs(DATA_DIR, exist_ok=True)
2025

2126

2227
def patch_json(file):
@@ -104,6 +109,7 @@ def update_perf_dfs():
104109
"""
105110
Update the performance dataframes for all machines
106111
"""
112+
107113
hardware_configs = load_hardware_configs("llm_perf/hardware.yaml")
108114

109115
for hardware_config in hardware_configs:
@@ -130,18 +136,18 @@ def update_perf_dfs():
130136
print(f"Dataset exists: {url} but could not be processed")
131137

132138

133-
scrapping_script = """
134-
git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
135-
pip install -r scrape-open-llm-leaderboard/requirements.txt -q
136-
python scrape-open-llm-leaderboard/main.py
137-
rm -rf scrape-open-llm-leaderboard
138-
"""
139-
140-
141139
def update_llm_df():
142140
"""
143141
Scrape the open-llm-leaderboard and update the leaderboard dataframe
144142
"""
143+
144+
scrapping_script = """
145+
git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
146+
pip install -r scrape-open-llm-leaderboard/requirements.txt -q
147+
python scrape-open-llm-leaderboard/main.py
148+
rm -rf scrape-open-llm-leaderboard
149+
"""
150+
145151
subprocess.run(scrapping_script, shell=True)
146152
create_repo(repo_id=MAIN_REPO_ID, repo_type=REPO_TYPE, exist_ok=True, private=False)
147153
upload_file(
@@ -153,7 +159,7 @@ def update_llm_df():
153159

154160

155161
def update_llm_perf_leaderboard():
156-
update_llm_df()
162+
# update_llm_df() # TO FIX: open-llm scraper is broken otherwise use https://huggingface.co/datasets/open-llm-leaderboard/contents directly
157163
update_perf_dfs()
158164

159165

0 commit comments

Comments
 (0)