Skip to content

Commit 8e00573

Browse files
committed
add dasboard
1 parent 9aabcf9 commit 8e00573

File tree

7 files changed

+251
-34
lines changed

7 files changed

+251
-34
lines changed

dashboard/clustering.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import torch
2+
from sentence_transformers import SentenceTransformer
3+
4+
# Each query needs to be accompanied by an corresponding instruction describing the task.
5+
task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
6+
7+
query_prefix = "Instruct: "+task_name_to_instruct["example"]+"\nQuery: "
8+
queries = [
9+
'are judo throws allowed in wrestling?',
10+
'how to become a radiology technician in michigan?'
11+
]
12+
13+
# No instruction needed for retrieval passages
14+
passages = [
15+
"Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
16+
"Below are the basic steps to becoming a radiologic technologist in Michigan:Earn a high school diploma. As with most careers in health care, a high school education is the first step to finding entry-level employment. Taking classes in math and science, such as anatomy, biology, chemistry, physiology, and physics, can help prepare students for their college studies and future careers.Earn an associate degree. Entry-level radiologic positions typically require at least an Associate of Applied Science. Before enrolling in one of these degree programs, students should make sure it has been properly accredited by the Joint Review Committee on Education in Radiologic Technology (JRCERT).Get licensed or certified in the state of Michigan."
17+
]
18+
19+
# load model with tokenizer
20+
model = SentenceTransformer('nvidia/NV-Embed-v2', trust_remote_code=True)
21+
model.max_seq_length = 32768
22+
model.tokenizer.padding_side="right"
23+
24+
def add_eos(input_examples):
25+
input_examples = [input_example + model.tokenizer.eos_token for input_example in input_examples]
26+
return input_examples
27+
28+
# get the embeddings
29+
batch_size = 2
30+
query_embeddings = model.encode(add_eos(queries), batch_size=batch_size, prompt=query_prefix, normalize_embeddings=True)
31+
passage_embeddings = model.encode(add_eos(passages), batch_size=batch_size, normalize_embeddings=True)
32+
33+
scores = (query_embeddings @ passage_embeddings.T) * 100
34+
print(scores.tolist())

dashboard/dashboard.py

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
import gradio as gr
2+
import pandas as pd
3+
from huggingface_hub import repo_exists, snapshot_download
4+
from llm_perf.common.hardware_config import load_hardware_configs
5+
from glob import glob
6+
from llm_perf.update_llm_perf_leaderboard import patch_json
7+
from optimum_benchmark import Benchmark
8+
import json
9+
from huggingface_hub.errors import RepositoryNotFoundError
10+
11+
PERF_REPO_ID = "optimum-benchmark/llm-perf-{backend}-{hardware}-{subset}-{machine}"
12+
13+
def create_status_df():
14+
hardware_configs = load_hardware_configs("llm_perf/hardware.yaml")
15+
16+
rows = []
17+
for hardware_config in hardware_configs:
18+
for subset in hardware_config.subsets:
19+
for backend in hardware_config.backends:
20+
repo_id = PERF_REPO_ID.format(
21+
subset=subset,
22+
machine=hardware_config.machine,
23+
backend=backend,
24+
hardware=hardware_config.hardware
25+
)
26+
27+
exists = repo_exists(repo_id, repo_type="dataset")
28+
status = "✅" if exists else "⛔️"
29+
30+
rows.append({
31+
"Backend": backend,
32+
"Hardware": hardware_config.hardware,
33+
"Subset": subset,
34+
"Machine": hardware_config.machine,
35+
"Status": status
36+
})
37+
38+
df = pd.DataFrame(rows)
39+
return df
40+
41+
def create_benchmark_status_df():
42+
hardware_configs = load_hardware_configs("llm_perf/hardware.yaml")
43+
44+
rows = []
45+
for hardware_config in hardware_configs:
46+
for subset in hardware_config.subsets:
47+
for backend in hardware_config.backends:
48+
repo_id = PERF_REPO_ID.format(
49+
subset=subset,
50+
machine=hardware_config.machine,
51+
backend=backend,
52+
hardware=hardware_config.hardware
53+
)
54+
55+
try:
56+
snapshot = snapshot_download(
57+
repo_type="dataset",
58+
repo_id=repo_id,
59+
allow_patterns=["**/benchmark.json"],
60+
)
61+
except RepositoryNotFoundError as e:
62+
print(f"Repository {repo_id} not found")
63+
continue
64+
65+
for file in glob(f"{snapshot}/**/benchmark.json", recursive=True):
66+
patch_json(file)
67+
68+
with open(file, "r") as f:
69+
data = json.load(f)
70+
benchmark = Benchmark.from_json(file)
71+
df = benchmark.to_dataframe()
72+
73+
# print("hello")
74+
75+
for _, row in df.iterrows():
76+
if "report.traceback" in row:
77+
traceback = row["report.traceback"]
78+
else:
79+
traceback = ""
80+
# print(f"No traceback for {row['config.name']} {row['config.backend.model']}")
81+
rows.append({
82+
"Backend": backend,
83+
"Hardware": hardware_config.hardware,
84+
"Subset": subset,
85+
"Machine": hardware_config.machine,
86+
"Status": "✅" if traceback == "" else "⛔️",
87+
"Model": row["config.backend.model"],
88+
"Experiment": row["config.name"],
89+
"Traceback": traceback,
90+
"Full Data": json.dumps(row.to_dict()),
91+
# "Markdown": f"### Model: {row['config.backend.model']}\n### Experiment: {row['config.name']}\n\n```json\n{json.dumps(row.to_dict(), indent=2)}\n```"
92+
})
93+
# except:
94+
# rows.append({
95+
# "Backend": backend,
96+
# "Hardware": hardware_config.hardware,
97+
# "Subset": subset,
98+
# "Machine": hardware_config.machine,
99+
# "Status": "⛔️",
100+
# "Model": "N/A",
101+
# "Experiment": "N/A"
102+
# })
103+
104+
df = pd.DataFrame(rows)
105+
return df
106+
107+
def create_status_table():
108+
df = create_status_df()
109+
return gr.DataFrame(
110+
value=df,
111+
headers=["Backend", "Hardware", "Subset", "Machine", "Status"],
112+
row_count=(len(df), "fixed"),
113+
col_count=(5, "fixed"),
114+
wrap=True
115+
)
116+
117+
def create_benchmark_table(df_benchmark_status):
118+
119+
return gr.DataFrame(
120+
value=df_benchmark_status,
121+
headers=["Backend", "Hardware", "Subset", "Machine", "Status", "Model", "Experiment", "Traceback", "Full Data"],
122+
row_count=(len(df_benchmark_status), "fixed"),
123+
col_count=(9, "fixed"),
124+
column_widths=[100, 100, 100, 100, 100, 200, 100, 100, 100],
125+
)
126+
127+
def compute_machine_stats(df_benchmark_status):
128+
"""
129+
Compute statistics about failed benchmarks per machine
130+
Args:
131+
df_benchmark_status (pd.DataFrame): DataFrame containing benchmark status information
132+
Returns:
133+
gr.DataFrame: Gradio DataFrame with machine failure statistics
134+
"""
135+
# Stats per machine
136+
stats_by_machine = df_benchmark_status.groupby(['Machine']).agg(
137+
Total_Benchmarks=('Status', 'count'),
138+
Failed_Benchmarks=('Status', lambda x: (x == '⛔️').sum())
139+
).reset_index()
140+
141+
stats_by_machine['Success_Rate'] = ((stats_by_machine['Total_Benchmarks'] - stats_by_machine['Failed_Benchmarks']) /
142+
stats_by_machine['Total_Benchmarks'] * 100).round(2)
143+
stats_by_machine['Success_Rate'] = stats_by_machine['Success_Rate'].astype(str) + '%'
144+
145+
machine_stats = gr.DataFrame(
146+
value=stats_by_machine,
147+
headers=["Machine", "Total_Benchmarks", "Failed_Benchmarks", "Success_Rate"],
148+
row_count=(len(stats_by_machine), "fixed"),
149+
col_count=(4, "fixed"),
150+
wrap=True
151+
)
152+
153+
return machine_stats
154+
155+
def compute_config_stats(df_benchmark_status):
156+
"""
157+
Compute statistics about failed benchmarks per configuration
158+
Args:
159+
df_benchmark_status (pd.DataFrame): DataFrame containing benchmark status information
160+
Returns:
161+
gr.DataFrame: Gradio DataFrame with configuration failure statistics
162+
"""
163+
# Stats per configuration
164+
stats_by_config = df_benchmark_status.groupby(['Backend', 'Hardware', 'Subset', 'Machine']).agg(
165+
Total_Benchmarks=('Status', 'count'),
166+
Failed_Benchmarks=('Status', lambda x: (x == '⛔️').sum())
167+
).reset_index()
168+
169+
stats_by_config['Success_Rate'] = ((stats_by_config['Total_Benchmarks'] - stats_by_config['Failed_Benchmarks']) /
170+
stats_by_config['Total_Benchmarks'] * 100).round(2)
171+
stats_by_config['Success_Rate'] = stats_by_config['Success_Rate'].astype(str) + '%'
172+
173+
config_stats = gr.DataFrame(
174+
value=stats_by_config,
175+
headers=["Backend", "Hardware", "Subset", "Machine", "Total_Benchmarks", "Failed_Benchmarks", "Success_Rate"],
176+
row_count=(len(stats_by_config), "fixed"),
177+
col_count=(7, "fixed"),
178+
wrap=True
179+
)
180+
181+
return config_stats
182+
183+
def main():
184+
185+
df_benchmark_status = create_benchmark_status_df()
186+
187+
with gr.Blocks() as demo:
188+
with gr.Tab("Hardware status"):
189+
gr.Markdown("# LLM Performance Dashboard")
190+
gr.Markdown("Status of benchmark results across different configurations")
191+
create_status_table()
192+
with gr.Tab("Benchmark status"):
193+
gr.Markdown("# Benchmark Results Status")
194+
gr.Markdown("Status of individual benchmark runs with model and experiment details")
195+
create_benchmark_table(df_benchmark_status)
196+
with gr.Tab("Stats"):
197+
gr.Markdown("# Stats")
198+
gr.Markdown("## Stats by Machine")
199+
gr.Markdown("Overall statistics per machine")
200+
compute_machine_stats(df_benchmark_status)
201+
gr.Markdown("## Stats by Configuration")
202+
gr.Markdown("Detailed statistics for each configuration")
203+
compute_config_stats(df_benchmark_status)
204+
with gr.Tab("Trends"):
205+
gr.Markdown("## Trends")
206+
gr.Markdown("Trends in benchmark results")
207+
gr.Markdown("TODO")
208+
209+
demo.launch()
210+
211+
if __name__ == "__main__":
212+
main()

optimum-benchmark

Lines changed: 0 additions & 1 deletion
This file was deleted.

pyproject.toml.bak

Lines changed: 0 additions & 29 deletions
This file was deleted.

setup.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"huggingface_hub[hf_transfer]",
1515
"datasets>=2.14.6",
1616
"beautifulsoup4",
17+
1718
"optimum-benchmark @ git+https://github.com/huggingface/optimum-benchmark.git",
1819
]
1920

@@ -33,6 +34,10 @@
3334
"bitsandbytes",
3435
"autoawq",
3536
],
37+
"dashboard": [
38+
"gradio>=5.0.0",
39+
"sentence-transformers",
40+
]
3641
}
3742

3843
setup(

test.py

Whitespace-only changes.

test.sh

Lines changed: 0 additions & 4 deletions
This file was deleted.

0 commit comments

Comments
 (0)