Skip to content

Commit 3d68df3

Browse files
authored
Merge branch 'main' into main
2 parents b5d8972 + 5178114 commit 3d68df3

13 files changed

+378
-51
lines changed

.github/workflows/nightly_benchmarks.yaml

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,21 +15,16 @@
1515
# limitations under the License.
1616
#
1717

18-
name: 'run benchmarks main'
18+
name: 'Benchmarks / Performance'
19+
# This workflow runs nightly benchmarks for vllm-ascend.
1920

2021
on:
2122
schedule:
22-
- cron: '00 16 * * *'
23-
workflow_dispatch:
24-
25-
# after merged, secrets will be available
26-
# pull_request:
27-
# branches:
28-
# - 'main'
29-
# - '*-dev'
30-
# paths:
31-
# - '.github/workflows/nightly_benchmarks.yaml'
23+
# Run at 02:00 everyday
24+
- cron: '00 18 * * *'
3225

26+
pull_request:
27+
types: [ labeled ]
3328

3429
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
3530
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
@@ -38,9 +33,15 @@ defaults:
3833
run:
3934
shell: bash -el {0}
4035

36+
concurrency:
37+
group: pr-${{ github.event.pull_request.number }}
38+
cancel-in-progress: true
39+
4140
jobs:
4241
test:
43-
name: run benchmarks main
42+
if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
43+
44+
name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}
4445
runs-on: 'linux-arm64-npu-static-8'
4546
strategy:
4647
matrix:
@@ -85,13 +86,12 @@ jobs:
8586
run: |
8687
git config --global --add safe.directory "$GITHUB_WORKSPACE"
8788
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
88-
8989
9090
- name: Checkout vllm-project/vllm-ascend repo
9191
uses: actions/checkout@v4
9292
with:
93-
ref: ${{ matrix.vllm_ascend_branch }}
94-
93+
fetch-depth: 0
94+
9595
- name: Checkout vllm-project/vllm repo
9696
uses: actions/checkout@v4
9797
with:
@@ -109,25 +109,44 @@ jobs:
109109
pip install -e .
110110
pip install -r benchmarks/requirements-bench.txt
111111
112-
- name: Checkout cosdt/elastic-tool
113-
uses: actions/checkout@v4
112+
- name: Run current commit benchmarks
113+
if: github.event_name != 'schedule'
114+
run: |
115+
# Sometimes we only want to run benchmarks on the current commit
116+
# This is useful for debugging or a release benchmark
117+
bash benchmarks/scripts/run-performance-benchmarks.sh
118+
# Convert the benchmark results to markdown format
119+
python3 benchmarks/scripts/convert_json_to_markdown.py
120+
121+
- name: Generate step summary
122+
if: github.event_name != 'schedule'
123+
run: |
124+
cat ./benchmarks/results/benchmark_results.md >> $GITHUB_STEP_SUMMARY
125+
126+
- name: Upload benchmark artifacts
127+
if: github.event_name != 'schedule'
128+
uses: actions/upload-artifact@v4
114129
with:
115-
repository: cosdt/elastic-tool
116-
path: ./elastic_tool
117-
ref: 0.1.0-dev
130+
name: "benchmark-performance-${{ matrix.vllm_branch }}-${{ matrix.vllm_ascend_branch }}-report"
131+
path: ./benchmarks/results/benchmark_results.md
132+
if-no-files-found: warn
133+
retention-days: 90
134+
overwrite: true
118135

119136
- name: Install elastic_tool
120-
working-directory: ./elastic_tool
137+
if: github.event_name == 'schedule'
121138
run: |
122-
pip install -e .
139+
pip install escli-tool==0.2.1
123140
124141
- name: Collect pr info from vllm-project/vllm-ascend
142+
if: github.event_name == 'schedule'
125143
run: |
126144
# Only get the pull request which may influences performance
127145
git log --pretty=format:"%H %s" -- '**/*.py' ':!docs/*' ':!tests/*' ':!examples/*' > commit_log.txt
128146
escli check commit_log.txt
129147
130148
- name: Run benchmark iteration
149+
if: github.event_name == 'schedule'
131150
run: |
132151
while IFS= read -r line || [[ -n "$line" ]]; do
133152
commit_id=${line%% *}

.github/workflows/vllm_ascend_test.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,12 @@ jobs:
127127
pytest -sv tests/singlecard/test_scheduler.py
128128
# guided decoding doesn't work, fix it later
129129
# pytest -sv tests/singlecard/test_guided_decoding.py.py
130-
pytest -sv tests/singlecard/ --ignore=tests/singlecard/test_offline_inference.py --ignore=tests/singlecard/test_scheduler.py --ignore=tests/singlecard/test_guided_decoding.py
130+
pytest -sv tests/singlecard/test_camem.py
131+
pytest -sv tests/singlecard/ \
132+
--ignore=tests/singlecard/test_offline_inference.py \
133+
--ignore=tests/singlecard/test_scheduler.py \
134+
--ignore=tests/singlecard/test_guided_decoding.py \
135+
--ignore=tests/singlecard/test_camem.py
131136
else
132137
pytest -sv tests/multicard/test_ilama_lora_tp2.py
133138
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.

benchmarks/requirements-bench.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
pandas
22
datasets
3-
modelscope
3+
modelscope
4+
libcst
5+
tabulate
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
import argparse
2+
import json
3+
import os
4+
from pathlib import Path
5+
6+
import pandas as pd
7+
from tabulate import tabulate
8+
9+
CUR_PATH = Path(__file__).parent.resolve()
10+
# latency results and the keys that will be printed into markdown
11+
latency_results = []
12+
latency_column_mapping = {
13+
"test_name": "Test name",
14+
"avg_latency": "Mean latency (ms)",
15+
"P50": "Median latency (ms)",
16+
"P99": "P99 latency (ms)",
17+
}
18+
19+
# throughput tests and the keys that will be printed into markdown
20+
throughput_results = []
21+
throughput_results_column_mapping = {
22+
"test_name": "Test name",
23+
"num_requests": "Num of reqs",
24+
"total_num_tokens": "Total num of tokens",
25+
"elapsed_time": "Elapsed time (s)",
26+
"requests_per_second": "Tput (req/s)",
27+
"tokens_per_second": "Tput (tok/s)",
28+
}
29+
30+
# serving results and the keys that will be printed into markdown
31+
serving_results = []
32+
serving_column_mapping = {
33+
"test_name": "Test name",
34+
"request_rate": "Request rate (req/s)",
35+
"request_throughput": "Tput (req/s)",
36+
"output_throughput": "Output Tput (tok/s)",
37+
"median_ttft_ms": "TTFT (ms)",
38+
"median_tpot_ms": "TPOT (ms)",
39+
"median_itl_ms": "ITL (ms)",
40+
}
41+
42+
43+
def read_markdown(file):
44+
if os.path.exists(file):
45+
with open(file) as f:
46+
return f.read() + "\n"
47+
else:
48+
return f"{file} not found.\n"
49+
50+
51+
def results_to_json(latency, throughput, serving):
52+
return json.dumps({
53+
'latency': latency.to_dict(),
54+
'throughput': throughput.to_dict(),
55+
'serving': serving.to_dict()
56+
})
57+
58+
59+
if __name__ == "__main__":
60+
parser = argparse.ArgumentParser(
61+
description="Process the results of the benchmark tests.")
62+
parser.add_argument(
63+
"--results_folder",
64+
type=str,
65+
default="../results/",
66+
help="The folder where the benchmark results are stored.")
67+
parser.add_argument(
68+
"--output_folder",
69+
type=str,
70+
default="../results/",
71+
help="The folder where the benchmark results are stored.")
72+
parser.add_argument("--markdown_template",
73+
type=str,
74+
default="./perf_result_template.md",
75+
help="The template file for the markdown report.")
76+
parser.add_argument("--tag",
77+
default="main",
78+
help="Tag to be used for release message.")
79+
parser.add_argument("--commit_id",
80+
default="",
81+
help="Commit ID to be used for release message.")
82+
83+
args = parser.parse_args()
84+
results_folder = (CUR_PATH / args.results_folder).resolve()
85+
output_folder = (CUR_PATH / args.output_folder).resolve()
86+
markdown_template = (CUR_PATH / args.markdown_template).resolve()
87+
88+
# collect results
89+
for test_file in results_folder.glob("*.json"):
90+
91+
with open(test_file) as f:
92+
raw_result = json.loads(f.read())
93+
94+
if "serving" in str(test_file):
95+
# this result is generated via `benchmark_serving.py`
96+
97+
# update the test name of this result
98+
raw_result.update({"test_name": test_file.stem})
99+
100+
# add the result to raw_result
101+
serving_results.append(raw_result)
102+
continue
103+
104+
elif "latency" in f.name:
105+
# this result is generated via `benchmark_latency.py`
106+
107+
# update the test name of this result
108+
raw_result.update({"test_name": test_file.stem})
109+
110+
# get different percentiles
111+
for perc in [10, 25, 50, 75, 90, 99]:
112+
# Multiply 1000 to convert the time unit from s to ms
113+
raw_result.update(
114+
{f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]})
115+
raw_result["avg_latency"] = raw_result["avg_latency"] * 1000
116+
117+
# add the result to raw_result
118+
latency_results.append(raw_result)
119+
continue
120+
121+
elif "throughput" in f.name:
122+
# this result is generated via `benchmark_throughput.py`
123+
124+
# update the test name of this result
125+
raw_result.update({"test_name": test_file.stem})
126+
127+
# add the result to raw_result
128+
throughput_results.append(raw_result)
129+
continue
130+
131+
print(f"Skipping {test_file}")
132+
serving_results.sort(key=lambda x: (len(x['test_name']), x['test_name']))
133+
134+
latency_results = pd.DataFrame.from_dict(latency_results)
135+
serving_results = pd.DataFrame.from_dict(serving_results)
136+
throughput_results = pd.DataFrame.from_dict(throughput_results)
137+
138+
raw_results_json = results_to_json(latency_results, throughput_results,
139+
serving_results)
140+
141+
# remapping the key, for visualization purpose
142+
if not latency_results.empty:
143+
latency_results = latency_results[list(
144+
latency_column_mapping.keys())].rename(
145+
columns=latency_column_mapping)
146+
if not serving_results.empty:
147+
serving_results = serving_results[list(
148+
serving_column_mapping.keys())].rename(
149+
columns=serving_column_mapping)
150+
if not throughput_results.empty:
151+
throughput_results = throughput_results[list(
152+
throughput_results_column_mapping.keys())].rename(
153+
columns=throughput_results_column_mapping)
154+
155+
processed_results_json = results_to_json(latency_results,
156+
throughput_results,
157+
serving_results)
158+
159+
# get markdown tables
160+
latency_md_table = tabulate(latency_results,
161+
headers='keys',
162+
tablefmt='pipe',
163+
showindex=False)
164+
serving_md_table = tabulate(serving_results,
165+
headers='keys',
166+
tablefmt='pipe',
167+
showindex=False)
168+
throughput_md_table = tabulate(throughput_results,
169+
headers='keys',
170+
tablefmt='pipe',
171+
showindex=False)
172+
173+
# document the result
174+
print(output_folder)
175+
with open(output_folder / "benchmark_results.md", "w") as f:
176+
177+
results = read_markdown(markdown_template)
178+
results = results.format(
179+
latency_tests_markdown_table=latency_md_table,
180+
throughput_tests_markdown_table=throughput_md_table,
181+
serving_tests_markdown_table=serving_md_table,
182+
benchmarking_results_in_json_string=processed_results_json)
183+
f.write(results)
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from argparse import ArgumentParser
2+
3+
import libcst as cst
4+
import libcst.matchers as m
5+
6+
# Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls
7+
8+
9+
# TDOO(Potabk): Remove this patch when the issue is fixed in the upstream
10+
class StreamingFalseTransformer(cst.CSTTransformer):
11+
12+
def __init__(self):
13+
self.in_target_class = False
14+
self.in_target_func = False
15+
16+
def visit_ClassDef(self, node):
17+
if node.name.value == "HuggingFaceDataset":
18+
self.in_target_class = True
19+
20+
def leave_ClassDef(self, original_node, updated_node):
21+
self.in_target_class = False
22+
return updated_node
23+
24+
def visit_FunctionDef(self, node):
25+
if self.in_target_class and node.name.value == "load_data":
26+
self.in_target_func = True
27+
28+
def leave_FunctionDef(self, original_node, updated_node):
29+
self.in_target_func = False
30+
return updated_node
31+
32+
def leave_Call(self, original_node, updated_node):
33+
if self.in_target_class and self.in_target_func:
34+
if m.matches(updated_node.func, m.Name("load_dataset")):
35+
new_args = []
36+
for arg in updated_node.args:
37+
if arg.keyword and arg.keyword.value == "streaming":
38+
new_arg = arg.with_changes(value=cst.Name("False"))
39+
new_args.append(new_arg)
40+
else:
41+
new_args.append(arg)
42+
return updated_node.with_changes(args=new_args)
43+
return updated_node
44+
45+
46+
def patch_file(path):
47+
with open(path, "r", encoding="utf-8") as f:
48+
source = f.read()
49+
50+
module = cst.parse_module(source)
51+
modified = module.visit(StreamingFalseTransformer())
52+
53+
with open(path, "w", encoding="utf-8") as f:
54+
f.write(modified.code)
55+
56+
print(f"Patched: {path}")
57+
58+
59+
if __name__ == '__main__':
60+
parser = ArgumentParser(
61+
description=
62+
"Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
63+
)
64+
parser.add_argument("--path",
65+
type=str,
66+
help="Path to the benchmark_dataset.py file")
67+
args = parser.parse_args()
68+
patch_file(args.path)

0 commit comments

Comments
 (0)