1
1
import subprocess
2
2
from glob import glob
3
+ import os
3
4
4
5
import pandas as pd
5
6
from huggingface_hub import create_repo , snapshot_download , upload_file , repo_exists
15
16
MAIN_REPO_ID = "optimum-benchmark/llm-perf-leaderboard"
16
17
PERF_REPO_ID = "optimum-benchmark/llm-perf-{backend}-{hardware}-{subset}-{machine}"
17
18
18
- PERF_DF = "perf-df-{backend}-{hardware}-{subset}-{machine}.csv"
19
- LLM_DF = "llm-df.csv"
19
+ DATA_DIR = "data"
20
+ PERF_DF = os .path .join (DATA_DIR , "perf-df-{backend}-{hardware}-{subset}-{machine}.csv" )
21
+ LLM_DF = os .path .join (DATA_DIR , "llm-df.csv" )
22
+
23
+ # Create data directory if it doesn't exist
24
+ os .makedirs (DATA_DIR , exist_ok = True )
20
25
21
26
22
27
def patch_json (file ):
@@ -104,6 +109,7 @@ def update_perf_dfs():
104
109
"""
105
110
Update the performance dataframes for all machines
106
111
"""
112
+
107
113
hardware_configs = load_hardware_configs ("llm_perf/hardware.yaml" )
108
114
109
115
for hardware_config in hardware_configs :
@@ -130,18 +136,18 @@ def update_perf_dfs():
130
136
print (f"Dataset exists: { url } but could not be processed" )
131
137
132
138
133
- scrapping_script = """
134
- git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
135
- pip install -r scrape-open-llm-leaderboard/requirements.txt -q
136
- python scrape-open-llm-leaderboard/main.py
137
- rm -rf scrape-open-llm-leaderboard
138
- """
139
-
140
-
141
139
def update_llm_df ():
142
140
"""
143
141
Scrape the open-llm-leaderboard and update the leaderboard dataframe
144
142
"""
143
+
144
+ scrapping_script = """
145
+ git clone https://github.com/Weyaxi/scrape-open-llm-leaderboard.git
146
+ pip install -r scrape-open-llm-leaderboard/requirements.txt -q
147
+ python scrape-open-llm-leaderboard/main.py
148
+ rm -rf scrape-open-llm-leaderboard
149
+ """
150
+
145
151
subprocess .run (scrapping_script , shell = True )
146
152
create_repo (repo_id = MAIN_REPO_ID , repo_type = REPO_TYPE , exist_ok = True , private = False )
147
153
upload_file (
@@ -153,7 +159,7 @@ def update_llm_df():
153
159
154
160
155
161
def update_llm_perf_leaderboard ():
156
- update_llm_df ()
162
+ # update_llm_df() # TO FIX: open-llm scraper is broken otherwise use https://huggingface.co/datasets/open-llm-leaderboard/contents directly
157
163
update_perf_dfs ()
158
164
159
165
0 commit comments