From 474d08fddbbac8f2e3840ef6b07b772375fd9a04 Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 17:39:27 +0800 Subject: [PATCH 01/31] cache dataset for speed Signed-off-by: wangli --- benchmarks/tests/serving-tests.json | 2 +- benchmarks/tests/throughput-tests.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/tests/serving-tests.json b/benchmarks/tests/serving-tests.json index bf28799dcbb..3817a13ee5f 100644 --- a/benchmarks/tests/serving-tests.json +++ b/benchmarks/tests/serving-tests.json @@ -46,7 +46,7 @@ "model": "Qwen/Qwen3-8B", "backend": "vllm", "dataset_name": "sharegpt", - "dataset_path": "/root/.cache/datasets/sharegpt/ShareGPT_V3_unfiltered_cleaned_split.json", + "dataset_path": "/github/home/.cache/datasets/sharegpt/ShareGPT_V3_unfiltered_cleaned_split.json", "num_prompts": 200 } } diff --git a/benchmarks/tests/throughput-tests.json b/benchmarks/tests/throughput-tests.json index 58b02966a05..6ee837a25eb 100644 --- a/benchmarks/tests/throughput-tests.json +++ b/benchmarks/tests/throughput-tests.json @@ -5,7 +5,7 @@ "model": "Qwen/Qwen3-8B", "tensor_parallel_size": 1, "load_format": "dummy", - "dataset_path": "/root/.cache/datasets/sharegpt/ShareGPT_V3_unfiltered_cleaned_split.json", + "dataset_path": "/github/home/.cache/datasets/sharegpt/ShareGPT_V3_unfiltered_cleaned_split.json", "num_prompts": 200, "backend": "vllm" } From aa92f4ead1ab7b83cff0c8c2b72d512a690a0fbf Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 17:39:55 +0800 Subject: [PATCH 02/31] pr trigger for test Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 14 ++++++-------- benchmarks/tests/serving-tests.json | 2 +- benchmarks/tests/throughput-tests.json | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index fc2dc2cbba0..02a5a764cbd 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -21,14 +21,12 @@ on: schedule: - cron: '00 16 * * *' workflow_dispatch: - - # after merged, secrets will be available - # pull_request: - # branches: - # - 'main' - # - '*-dev' - # paths: - # - '.github/workflows/nightly_benchmarks.yaml' + pull_request: + branches: + - 'main' + - '*-dev' + paths: + - '.github/workflows/nightly_benchmarks.yaml' # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly diff --git a/benchmarks/tests/serving-tests.json b/benchmarks/tests/serving-tests.json index 3817a13ee5f..d8ad2be2b59 100644 --- a/benchmarks/tests/serving-tests.json +++ b/benchmarks/tests/serving-tests.json @@ -46,7 +46,7 @@ "model": "Qwen/Qwen3-8B", "backend": "vllm", "dataset_name": "sharegpt", - "dataset_path": "/github/home/.cache/datasets/sharegpt/ShareGPT_V3_unfiltered_cleaned_split.json", + "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json", "num_prompts": 200 } } diff --git a/benchmarks/tests/throughput-tests.json b/benchmarks/tests/throughput-tests.json index 6ee837a25eb..551d23882e6 100644 --- a/benchmarks/tests/throughput-tests.json +++ b/benchmarks/tests/throughput-tests.json @@ -5,7 +5,7 @@ "model": "Qwen/Qwen3-8B", "tensor_parallel_size": 1, "load_format": "dummy", - "dataset_path": "/github/home/.cache/datasets/sharegpt/ShareGPT_V3_unfiltered_cleaned_split.json", + "dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json", "num_prompts": 200, "backend": "vllm" } From b740cce4c6ecbdb9f9861726d00c155cabf793d1 Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 19:45:21 +0800 Subject: [PATCH 03/31] fix dataset path Signed-off-by: wangli --- benchmarks/scripts/run-performance-benchmarks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index 9e7c85c8399..eab10014430 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -19,7 +19,7 @@ check_npus() { } ensure_sharegpt_downloaded() { - local FILE=ShareGPT_V3_unfiltered_cleaned_split.json + local FILE=/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json if [ ! -f "$FILE" ]; then echo "$FILE not found, downloading from hf-mirror ..." wget https://hf-mirror.com/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/$FILE From dee9f54783ba9da8aa7f5dad0901a67ef64ca31a Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 20:05:17 +0800 Subject: [PATCH 04/31] rename job Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 02a5a764cbd..75524222c8f 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -15,18 +15,14 @@ # limitations under the License. # -name: 'run benchmarks main' +name: 'Nightly Benchmarks' +# This workflow runs nightly benchmarks for vllm-ascend. on: schedule: + # Run at 24:00 everyday - cron: '00 16 * * *' workflow_dispatch: - pull_request: - branches: - - 'main' - - '*-dev' - paths: - - '.github/workflows/nightly_benchmarks.yaml' # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly @@ -38,7 +34,7 @@ defaults: jobs: test: - name: run benchmarks main + name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }} runs-on: 'linux-arm64-npu-static-8' strategy: matrix: From 47ee5ad5eff0d811fab6265b0317528f4ab56e5f Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 20:07:12 +0800 Subject: [PATCH 05/31] add hf_home Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 75524222c8f..19dd48c04a1 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -58,6 +58,7 @@ jobs: env: HF_ENDPOINT: https://hf-mirror.com HF_TOKEN: ${{ secrets.HF_TOKEN }} + HF_HOME: /github/home/.cache/huggingface ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }} ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }} steps: From 00eda3c3be746834304817badb0d98d685455993 Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 20:26:38 +0800 Subject: [PATCH 06/31] fix Signed-off-by: wangli --- benchmarks/scripts/run-performance-benchmarks.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index eab10014430..68fb40bb32e 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -1,6 +1,5 @@ #!/bin/bash -set -e check_npus() { # shellcheck disable=SC2155 From 579ef0c59e04a419a3d92aa421e6c5305915f132 Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 21:14:32 +0800 Subject: [PATCH 07/31] fix curl Signed-off-by: wangli --- benchmarks/scripts/run-performance-benchmarks.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index 68fb40bb32e..584fb34cb80 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -48,7 +48,8 @@ wait_for_server() { # wait for vllm server to start # return 1 if vllm server crashes timeout 1200 bash -c ' - until curl -s -X POST localhost:8000/v1/completions || curl -s -X POST localhost:8000/v1/chat/completions; do + until curl -s -X GET localhost:8000/health; do + echo "Waiting for vllm server to start..." sleep 1 done' && return 0 || return 1 } From bdf87e54790b5baf09fa4996b1855ae0551f14ab Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 21:32:23 +0800 Subject: [PATCH 08/31] fake testing Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 19dd48c04a1..7a6627a29e7 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -23,6 +23,12 @@ on: # Run at 24:00 everyday - cron: '00 16 * * *' workflow_dispatch: + pull_request: + branches: + - 'main' + - '*-dev' + paths: + - '.github/workflows/nightly_benchmarks.yaml' # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly @@ -119,8 +125,15 @@ jobs: - name: Collect pr info from vllm-project/vllm-ascend run: | # Only get the pull request which may influences performance - git log --pretty=format:"%H %s" -- '**/*.py' ':!docs/*' ':!tests/*' ':!examples/*' > commit_log.txt - escli check commit_log.txt + # git log --pretty=format:"%H %s" -- '**/*.py' ':!docs/*' ':!tests/*' ':!examples/*' > commit_log.txt + # escli check commit_log.txt + + # make a fake commit log for testing + printf "%s\n" \ + "507ae627cad68d93c62adf3c2409f4ff10a25536 feat: support compile torchair graph while warming up (#839)" \ + "5a1689fc648c8afa04ee040bf1b3526a6fe3d75e [Fix] Fix update_aclgraph_sizes when running MoE models (#913)" \ + "3442fbdb235b4c6d72c2bc64a49707a7bd89958e [1/N][UT][v1 MTP] add basic v1 mtp features (#890)" \ + > commit_log.txt - name: Run benchmark iteration run: | From 94cc7faf77803bea2b3e8d1f994fe95875dd2671 Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 21:46:56 +0800 Subject: [PATCH 09/31] test Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 22 +++++++++---------- .../scripts/run-performance-benchmarks.sh | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 7a6627a29e7..c881ff186ef 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -110,17 +110,17 @@ jobs: pip install -e . pip install -r benchmarks/requirements-bench.txt - - name: Checkout cosdt/elastic-tool - uses: actions/checkout@v4 - with: - repository: cosdt/elastic-tool - path: ./elastic_tool - ref: 0.1.0-dev - - - name: Install elastic_tool - working-directory: ./elastic_tool - run: | - pip install -e . + # - name: Checkout cosdt/elastic-tool + # uses: actions/checkout@v4 + # with: + # repository: cosdt/elastic-tool + # path: ./elastic_tool + # ref: 0.1.0-dev + + # - name: Install elastic_tool + # working-directory: ./elastic_tool + # run: | + # pip install -e . - name: Collect pr info from vllm-project/vllm-ascend run: | diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index 584fb34cb80..bc0b9a7d19e 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -18,10 +18,10 @@ check_npus() { } ensure_sharegpt_downloaded() { - local FILE=/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json + local FILE="/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json" if [ ! -f "$FILE" ]; then echo "$FILE not found, downloading from hf-mirror ..." - wget https://hf-mirror.com/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/$FILE + wget https://hf-mirror.com/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json else echo "$FILE already exists." fi From 35e93e3d9390a0c7009a6b308d55c89513347544 Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 23:26:54 +0800 Subject: [PATCH 10/31] add benchmark patch Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 2 +- benchmarks/requirements-bench.txt | 3 +- benchmarks/scripts/patch_benchmark_dataset.py | 68 +++++++++++++++++++ 3 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 benchmarks/scripts/patch_benchmark_dataset.py diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index c881ff186ef..166c08c1c60 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -15,7 +15,7 @@ # limitations under the License. # -name: 'Nightly Benchmarks' +name: 'Benchmarks / Performance' # This workflow runs nightly benchmarks for vllm-ascend. on: diff --git a/benchmarks/requirements-bench.txt b/benchmarks/requirements-bench.txt index b3f3c06aad0..fa52169f74f 100644 --- a/benchmarks/requirements-bench.txt +++ b/benchmarks/requirements-bench.txt @@ -1,3 +1,4 @@ pandas datasets -modelscope \ No newline at end of file +modelscope +libcst \ No newline at end of file diff --git a/benchmarks/scripts/patch_benchmark_dataset.py b/benchmarks/scripts/patch_benchmark_dataset.py new file mode 100644 index 00000000000..1e5aeabb96f --- /dev/null +++ b/benchmarks/scripts/patch_benchmark_dataset.py @@ -0,0 +1,68 @@ +from argparse import ArgumentParser + +import libcst as cst +import libcst.matchers as m + +# Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls + + + +class StreamingFalseTransformer(cst.CSTTransformer): + + def __init__(self): + self.in_target_class = False + self.in_target_func = False + + def visit_ClassDef(self, node): + if node.name.value == "HuggingFaceDataset": + self.in_target_class = True + + def leave_ClassDef(self, original_node, updated_node): + self.in_target_class = False + return updated_node + + def visit_FunctionDef(self, node): + if self.in_target_class and node.name.value == "load_data": + self.in_target_func = True + + def leave_FunctionDef(self, original_node, updated_node): + self.in_target_func = False + return updated_node + + def leave_Call(self, original_node, updated_node): + if self.in_target_class and self.in_target_func: + if m.matches(updated_node.func, m.Name("load_dataset")): + new_args = [] + for arg in updated_node.args: + if arg.keyword and arg.keyword.value == "streaming": + new_arg = arg.with_changes(value=cst.Name("False")) + new_args.append(new_arg) + else: + new_args.append(arg) + return updated_node.with_changes(args=new_args) + return updated_node + + +def patch_file(path): + with open(path, "r", encoding="utf-8") as f: + source = f.read() + + module = cst.parse_module(source) + modified = module.visit(StreamingFalseTransformer()) + + with open(path, "w", encoding="utf-8") as f: + f.write(modified.code) + + print(f"Patched: {path}") + + +if __name__ == '__main__': + parser = ArgumentParser( + description= + "Patch benchmark_dataset.py to set streaming=False in load_dataset calls" + ) + parser.add_argument("--path", + type=str, + help="Path to the benchmark_dataset.py file") + args = parser.parse_args() + patch_file(args.path) From 546d383b6e01d0a3b0c9bb2489ad9a1caa00ac75 Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 23:28:33 +0800 Subject: [PATCH 11/31] add patch Signed-off-by: wangli --- benchmarks/scripts/run-performance-benchmarks.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index bc0b9a7d19e..caabc5fe35b 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -290,6 +290,7 @@ main() { # prepare for benchmarking cd benchmarks || exit 1 get_benchmarks_scripts + python3 scripts/patch_benchmark_dataset.py trap cleanup EXIT QUICK_BENCHMARK_ROOT=./ From 5979e305b3736a100941196109a1ca9d7112308d Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 23:29:19 +0800 Subject: [PATCH 12/31] fix path Signed-off-by: wangli --- benchmarks/scripts/run-performance-benchmarks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index caabc5fe35b..17950e48038 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -290,7 +290,7 @@ main() { # prepare for benchmarking cd benchmarks || exit 1 get_benchmarks_scripts - python3 scripts/patch_benchmark_dataset.py + python3 scripts/patch_benchmark_dataset.pyc--path vllm_benchmarks/benchmark_dataset.py trap cleanup EXIT QUICK_BENCHMARK_ROOT=./ From 19de361f74f0d0cc3c67d5120fdeff3304d5af8d Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 23:34:47 +0800 Subject: [PATCH 13/31] fix format Signed-off-by: wangli --- benchmarks/scripts/patch_benchmark_dataset.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchmarks/scripts/patch_benchmark_dataset.py b/benchmarks/scripts/patch_benchmark_dataset.py index 1e5aeabb96f..81fbea1934e 100644 --- a/benchmarks/scripts/patch_benchmark_dataset.py +++ b/benchmarks/scripts/patch_benchmark_dataset.py @@ -2,11 +2,9 @@ import libcst as cst import libcst.matchers as m - # Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls - class StreamingFalseTransformer(cst.CSTTransformer): def __init__(self): From 53bb071b796faa632eb88ace652654fbd6831ed9 Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 23:36:22 +0800 Subject: [PATCH 14/31] fix Signed-off-by: wangli --- benchmarks/scripts/run-performance-benchmarks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index 17950e48038..32d59b7ecac 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -290,7 +290,7 @@ main() { # prepare for benchmarking cd benchmarks || exit 1 get_benchmarks_scripts - python3 scripts/patch_benchmark_dataset.pyc--path vllm_benchmarks/benchmark_dataset.py + python3 scripts/patch_benchmark_dataset.pyc --path vllm_benchmarks/benchmark_dataset.py trap cleanup EXIT QUICK_BENCHMARK_ROOT=./ From edf44aca8ade1b026e1dfed2b3fb2b6e12dec1a4 Mon Sep 17 00:00:00 2001 From: wangli Date: Sat, 31 May 2025 23:50:50 +0800 Subject: [PATCH 15/31] fix isort Signed-off-by: wangli --- benchmarks/scripts/patch_benchmark_dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/scripts/patch_benchmark_dataset.py b/benchmarks/scripts/patch_benchmark_dataset.py index 81fbea1934e..a435d207073 100644 --- a/benchmarks/scripts/patch_benchmark_dataset.py +++ b/benchmarks/scripts/patch_benchmark_dataset.py @@ -2,6 +2,7 @@ import libcst as cst import libcst.matchers as m + # Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls From df3b385f39f42aef74f3cd42a1e4f59b8a691f60 Mon Sep 17 00:00:00 2001 From: wangli Date: Sun, 1 Jun 2025 23:43:44 +0800 Subject: [PATCH 16/31] fix bug Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 4 ++++ benchmarks/scripts/run-performance-benchmarks.sh | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 166c08c1c60..4c7c77c16df 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -38,6 +38,10 @@ defaults: run: shell: bash -el {0} +concurrency: + group: pr-${{ github.event.pull_request.number }} + cancel-in-progress: true + jobs: test: name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }} diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index 32d59b7ecac..22152e626af 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -290,7 +290,7 @@ main() { # prepare for benchmarking cd benchmarks || exit 1 get_benchmarks_scripts - python3 scripts/patch_benchmark_dataset.pyc --path vllm_benchmarks/benchmark_dataset.py + python3 scripts/patch_benchmark_dataset.py --path vllm_benchmarks/benchmark_dataset.py trap cleanup EXIT QUICK_BENCHMARK_ROOT=./ From efc0c1b5db81fa54e87cbb534eaa3dd716f82be2 Mon Sep 17 00:00:00 2001 From: wangli Date: Sun, 1 Jun 2025 23:45:50 +0800 Subject: [PATCH 17/31] testing Signed-off-by: wangli --- benchmarks/scripts/run-performance-benchmarks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index 22152e626af..94c16ace12b 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -299,7 +299,7 @@ main() { mkdir -p $RESULTS_FOLDER trap cleanup_on_error ERR - ensure_sharegpt_downloaded + # ensure_sharegpt_downloaded # benchmarks run_serving_tests $QUICK_BENCHMARK_ROOT/tests/serving-tests.json run_latency_tests $QUICK_BENCHMARK_ROOT/tests/latency-tests.json From c1431194b8f3d1b9a4f93c417a7cca2af4ed6f49 Mon Sep 17 00:00:00 2001 From: wangli Date: Mon, 2 Jun 2025 00:01:01 +0800 Subject: [PATCH 18/31] test Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 4 ++-- benchmarks/scripts/run-performance-benchmarks.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 4c7c77c16df..0b83c504622 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -94,8 +94,8 @@ jobs: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v4 - with: - ref: ${{ matrix.vllm_ascend_branch }} + # with: + # ref: ${{ matrix.vllm_ascend_branch }} - name: Checkout vllm-project/vllm repo uses: actions/checkout@v4 diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index 94c16ace12b..22152e626af 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -299,7 +299,7 @@ main() { mkdir -p $RESULTS_FOLDER trap cleanup_on_error ERR - # ensure_sharegpt_downloaded + ensure_sharegpt_downloaded # benchmarks run_serving_tests $QUICK_BENCHMARK_ROOT/tests/serving-tests.json run_latency_tests $QUICK_BENCHMARK_ROOT/tests/latency-tests.json From 7890585c588ed3d76510701a25c9494086f3071f Mon Sep 17 00:00:00 2001 From: wangli Date: Mon, 2 Jun 2025 00:38:29 +0800 Subject: [PATCH 19/31] test Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 28 +++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 0b83c504622..517ec7cd167 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -29,6 +29,8 @@ on: - '*-dev' paths: - '.github/workflows/nightly_benchmarks.yaml' + pull_request_target: + types: [labeled] # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly @@ -94,8 +96,9 @@ jobs: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v4 - # with: - # ref: ${{ matrix.vllm_ascend_branch }} + with: + ref: dev-bench + repository: Potabk/vllm-ascend - name: Checkout vllm-project/vllm repo uses: actions/checkout@v4 @@ -114,6 +117,25 @@ jobs: pip install -e . pip install -r benchmarks/requirements-bench.txt + - name: Run current commit benchmarks + if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch' + run: | + # Sometimes we only want to run benchmarks on the current commit + # This is useful for debugging or a release benchmark + commit_id=$(git log -1 --pretty=format:"%H") + commit_title=$(git log -1 --pretty=format:"%s") + commit_time=$(git show -s --format=%cd $commit_hash --date=iso-strict) + commit_time_no_tz=${commit_time::19} + + echo "------------------------" + echo "commit_id: $commit_id" + echo "commit_title: $commit_title" + echo "commit_time: $commit_time_no_tz" + echo "vllm branch: ${{ matrix.vllm_branch }}" + echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}" + echo "------------------------" + bash benchmarks/scripts/run-performance-benchmarks.sh + # - name: Checkout cosdt/elastic-tool # uses: actions/checkout@v4 # with: @@ -127,6 +149,7 @@ jobs: # pip install -e . - name: Collect pr info from vllm-project/vllm-ascend + if: github.event_name == 'schedule' run: | # Only get the pull request which may influences performance # git log --pretty=format:"%H %s" -- '**/*.py' ':!docs/*' ':!tests/*' ':!examples/*' > commit_log.txt @@ -140,6 +163,7 @@ jobs: > commit_log.txt - name: Run benchmark iteration + if: github.event_name == 'schedule' run: | while IFS= read -r line || [[ -n "$line" ]]; do commit_id=${line%% *} From acc5c3f9b4c2ff5885f6f38c58883ce346c25b4e Mon Sep 17 00:00:00 2001 From: wangli Date: Mon, 2 Jun 2025 01:09:14 +0800 Subject: [PATCH 20/31] testing Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 26 ++++++++-------- benchmarks/requirements-bench.txt | 3 +- .../scripts/convert_json_to_markdown.py | 0 benchmarks/scripts/perf_result_template.md | 31 +++++++++++++++++++ benchmarks/tests/latency-tests.json | 10 ------ 5 files changed, 46 insertions(+), 24 deletions(-) create mode 100644 benchmarks/scripts/convert_json_to_markdown.py create mode 100644 benchmarks/scripts/perf_result_template.md diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 517ec7cd167..1fa53a68234 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -118,23 +118,23 @@ jobs: pip install -r benchmarks/requirements-bench.txt - name: Run current commit benchmarks - if: github.event_name != 'schedule' && github.event_name != 'workflow_dispatch' + if: github.event_name != 'schedule' run: | # Sometimes we only want to run benchmarks on the current commit # This is useful for debugging or a release benchmark - commit_id=$(git log -1 --pretty=format:"%H") - commit_title=$(git log -1 --pretty=format:"%s") - commit_time=$(git show -s --format=%cd $commit_hash --date=iso-strict) - commit_time_no_tz=${commit_time::19} - - echo "------------------------" - echo "commit_id: $commit_id" - echo "commit_title: $commit_title" - echo "commit_time: $commit_time_no_tz" - echo "vllm branch: ${{ matrix.vllm_branch }}" - echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}" - echo "------------------------" bash benchmarks/scripts/run-performance-benchmarks.sh + # Convert the benchmark results to markdown format + python3 benchmarks/scripts/convert-benchmark-results.py + + - name: Upload benchmark artifacts + if: github.event_name != 'schedule' + uses: actions/upload-artifact@v4 + with: + name: "benchmark-performance-${{ matrix.vllm_branch }}-${{ matrix.vllm_ascend_branch }}-report" + path: ./benchmarks/results/benchmark_results.md + if-no-files-found: warn + retention-days: 90 + overwrite: true # - name: Checkout cosdt/elastic-tool # uses: actions/checkout@v4 diff --git a/benchmarks/requirements-bench.txt b/benchmarks/requirements-bench.txt index fa52169f74f..54c28c84d1c 100644 --- a/benchmarks/requirements-bench.txt +++ b/benchmarks/requirements-bench.txt @@ -1,4 +1,5 @@ pandas datasets modelscope -libcst \ No newline at end of file +libcst +tabulate \ No newline at end of file diff --git a/benchmarks/scripts/convert_json_to_markdown.py b/benchmarks/scripts/convert_json_to_markdown.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/benchmarks/scripts/perf_result_template.md b/benchmarks/scripts/perf_result_template.md new file mode 100644 index 00000000000..2bf857a277b --- /dev/null +++ b/benchmarks/scripts/perf_result_template.md @@ -0,0 +1,31 @@ +## Online serving tests + +- Input length: randomly sample 200 prompts from [ShareGPT](https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/blob/main/ShareGPT_V3_unfiltered_cleaned_split.json) and [lmarena-ai/vision-arena-bench-v0.1](https://huggingface.co/datasets/lmarena-ai/vision-arena-bench-v0.1/tree/main)(multi-modal) dataset (with fixed random seed). +- Output length: the corresponding output length of these 200 prompts. +- Batch size: dynamically determined by vllm and the arrival pattern of the requests. +- **Average QPS (query per second)**: 1, 4, 16 and inf. QPS = inf means all requests come at once. For other QPS values, the arrival time of each query is determined using a random Poisson process (with fixed random seed). +- Models: Qwen/Qwen3-8B, Qwen/Qwen2.5-VL-7B-Instruct +- Evaluation metrics: throughput, TTFT (median time to the first token ), ITL (median inter-token latency) TPOT(median time per output token). + +{serving_tests_markdown_table} + +## Offline tests +### Latency tests + +- Input length: 32 tokens. +- Output length: 128 tokens. +- Batch size: fixed (8). +- Models: Qwen/Qwen3-8B, Qwen/Qwen2.5-VL-7B-Instruct +- Evaluation metrics: end-to-end latency. + +{latency_tests_markdown_table} + +### Throughput tests + +- Input length: randomly sample 200 prompts from [ShareGPT](https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/blob/main/ShareGPT_V3_unfiltered_cleaned_split.json) and [lmarena-ai/vision-arena-bench-v0.1](https://huggingface.co/datasets/lmarena-ai/vision-arena-bench-v0.1/tree/main)(multi-modal) dataset (with fixed random seed). +- Output length: the corresponding output length of these 200 prompts. +- Batch size: dynamically determined by vllm to achieve maximum throughput. +- Models: Qwen/Qwen3-8B, Qwen/Qwen2.5-VL-7B-Instruct +- Evaluation metrics: throughput. + +{throughput_tests_markdown_table} \ No newline at end of file diff --git a/benchmarks/tests/latency-tests.json b/benchmarks/tests/latency-tests.json index d7d86748bf6..576ced213a3 100644 --- a/benchmarks/tests/latency-tests.json +++ b/benchmarks/tests/latency-tests.json @@ -1,14 +1,4 @@ [ - { - "test_name": "latency_qwen2_5vl_7B_tp1", - "parameters": { - "model": "Qwen/Qwen2.5-VL-7B-Instruct", - "tensor_parallel_size": 1, - "max_model_len": 16384, - "num_iters_warmup": 5, - "num_iters": 15 - } - }, { "test_name": "latency_qwen3_8B_tp1", "parameters": { From 23bb2e51cd53c864e145d6c137d3ed7078f371a7 Mon Sep 17 00:00:00 2001 From: wangli Date: Mon, 2 Jun 2025 01:13:37 +0800 Subject: [PATCH 21/31] fix dataset path Signed-off-by: wangli --- benchmarks/scripts/run-performance-benchmarks.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index 22152e626af..8997fc468d2 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -19,9 +19,18 @@ check_npus() { ensure_sharegpt_downloaded() { local FILE="/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json" + local DIR + DIR=$(dirname "$FILE") + if [ ! -f "$FILE" ]; then echo "$FILE not found, downloading from hf-mirror ..." - wget https://hf-mirror.com/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + mkdir -p "$DIR" + wget -O "$FILE" https://hf-mirror.com/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json + if [ $? -ne 0 ]; then + echo "Download failed!" >&2 + return 1 + fi + echo "Download completed and saved to $FILE" else echo "$FILE already exists." fi From 4751457bebb3775c5ff533deccd88e879b58b0cf Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 3 Jun 2025 09:12:30 +0800 Subject: [PATCH 22/31] fix convert name Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 1fa53a68234..3f49c4281aa 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -124,7 +124,7 @@ jobs: # This is useful for debugging or a release benchmark bash benchmarks/scripts/run-performance-benchmarks.sh # Convert the benchmark results to markdown format - python3 benchmarks/scripts/convert-benchmark-results.py + python3 benchmarks/scripts/convert_json_to_markdown.py - name: Upload benchmark artifacts if: github.event_name != 'schedule' From 3f77fa4e9c36cf5bfe9e0126385958dae52e1034 Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 3 Jun 2025 09:22:40 +0800 Subject: [PATCH 23/31] add convert script Signed-off-by: wangli --- .../scripts/convert_json_to_markdown.py | 183 ++++++++++++++++++ 1 file changed, 183 insertions(+) diff --git a/benchmarks/scripts/convert_json_to_markdown.py b/benchmarks/scripts/convert_json_to_markdown.py index e69de29bb2d..2ff6f15dea6 100644 --- a/benchmarks/scripts/convert_json_to_markdown.py +++ b/benchmarks/scripts/convert_json_to_markdown.py @@ -0,0 +1,183 @@ +import argparse +import json +import os +from pathlib import Path + +import pandas as pd +from tabulate import tabulate + +CUR_PATH = Path(__file__).parent.resolve() +# latency results and the keys that will be printed into markdown +latency_results = [] +latency_column_mapping = { + "test_name": "Test name", + "avg_latency": "Mean latency (ms)", + "P50": "Median latency (ms)", + "P99": "P99 latency (ms)", +} + +# throughput tests and the keys that will be printed into markdown +throughput_results = [] +throughput_results_column_mapping = { + "test_name": "Test name", + "num_requests": "Num of reqs", + "total_num_tokens": "Total num of tokens", + "elapsed_time": "Elapsed time (s)", + "requests_per_second": "Tput (req/s)", + "tokens_per_second": "Tput (tok/s)", +} + +# serving results and the keys that will be printed into markdown +serving_results = [] +serving_column_mapping = { + "test_name": "Test name", + "request_rate": "Request rate (req/s)", + "request_throughput": "Tput (req/s)", + "output_throughput": "Output Tput (tok/s)", + "median_ttft_ms": "TTFT (ms)", + "median_tpot_ms": "TPOT (ms)", + "median_itl_ms": "ITL (ms)", +} + + +def read_markdown(file): + if os.path.exists(file): + with open(file) as f: + return f.read() + "\n" + else: + return f"{file} not found.\n" + + +def results_to_json(latency, throughput, serving): + return json.dumps({ + 'latency': latency.to_dict(), + 'throughput': throughput.to_dict(), + 'serving': serving.to_dict() + }) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Process the results of the benchmark tests.") + parser.add_argument( + "--results_folder", + type=str, + default="../results/", + help="The folder where the benchmark results are stored.") + parser.add_argument( + "--output_folder", + type=str, + default="../results/", + help="The folder where the benchmark results are stored.") + parser.add_argument("--markdown_template", + type=str, + default="./perf_result_template.md", + help="The template file for the markdown report.") + parser.add_argument("--tag", + default="main", + help="Tag to be used for release message.") + parser.add_argument("--commit_id", + default="", + help="Commit ID to be used for release message.") + + args = parser.parse_args() + results_folder = (CUR_PATH / args.results_folder).resolve() + output_folder = (CUR_PATH / args.output_folder).resolve() + markdown_template = (CUR_PATH / args.markdown_template).resolve() + + # collect results + for test_file in results_folder.glob("*.json"): + + with open(test_file) as f: + raw_result = json.loads(f.read()) + + if "serving" in str(test_file): + # this result is generated via `benchmark_serving.py` + + # update the test name of this result + raw_result.update({"test_name": test_file.stem}) + + # add the result to raw_result + serving_results.append(raw_result) + continue + + elif "latency" in f.name: + # this result is generated via `benchmark_latency.py` + + # update the test name of this result + raw_result.update({"test_name": test_file.stem}) + + # get different percentiles + for perc in [10, 25, 50, 75, 90, 99]: + # Multiply 1000 to convert the time unit from s to ms + raw_result.update( + {f"P{perc}": 1000 * raw_result["percentiles"][str(perc)]}) + raw_result["avg_latency"] = raw_result["avg_latency"] * 1000 + + # add the result to raw_result + latency_results.append(raw_result) + continue + + elif "throughput" in f.name: + # this result is generated via `benchmark_throughput.py` + + # update the test name of this result + raw_result.update({"test_name": test_file.stem}) + + # add the result to raw_result + throughput_results.append(raw_result) + continue + + print(f"Skipping {test_file}") + serving_results.sort(key=lambda x: (len(x['test_name']), x['test_name'])) + + latency_results = pd.DataFrame.from_dict(latency_results) + serving_results = pd.DataFrame.from_dict(serving_results) + throughput_results = pd.DataFrame.from_dict(throughput_results) + + raw_results_json = results_to_json(latency_results, throughput_results, + serving_results) + + # remapping the key, for visualization purpose + if not latency_results.empty: + latency_results = latency_results[list( + latency_column_mapping.keys())].rename( + columns=latency_column_mapping) + if not serving_results.empty: + serving_results = serving_results[list( + serving_column_mapping.keys())].rename( + columns=serving_column_mapping) + if not throughput_results.empty: + throughput_results = throughput_results[list( + throughput_results_column_mapping.keys())].rename( + columns=throughput_results_column_mapping) + + processed_results_json = results_to_json(latency_results, + throughput_results, + serving_results) + + # get markdown tables + latency_md_table = tabulate(latency_results, + headers='keys', + tablefmt='pipe', + showindex=False) + serving_md_table = tabulate(serving_results, + headers='keys', + tablefmt='pipe', + showindex=False) + throughput_md_table = tabulate(throughput_results, + headers='keys', + tablefmt='pipe', + showindex=False) + + # document the result + print(output_folder) + with open(output_folder / "benchmark_results.md", "w") as f: + + results = read_markdown(markdown_template) + results = results.format( + latency_tests_markdown_table=latency_md_table, + throughput_tests_markdown_table=throughput_md_table, + serving_tests_markdown_table=serving_md_table, + benchmarking_results_in_json_string=processed_results_json) + f.write(results) \ No newline at end of file From d753de090a2c3e7de827c4767fcd0852c604f9b8 Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 3 Jun 2025 15:14:42 +0800 Subject: [PATCH 24/31] add step summary Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 38 +++++++++++------------ 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 3f49c4281aa..36701e99635 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -126,6 +126,11 @@ jobs: # Convert the benchmark results to markdown format python3 benchmarks/scripts/convert_json_to_markdown.py + - name: Generate step summary + if: github.event_name != 'schedule' + run: | + cat ./benchmarks/results/benchmark_results.md >> $GITHUB_STEP_SUMMARY + - name: Upload benchmark artifacts if: github.event_name != 'schedule' uses: actions/upload-artifact@v4 @@ -136,31 +141,24 @@ jobs: retention-days: 90 overwrite: true - # - name: Checkout cosdt/elastic-tool - # uses: actions/checkout@v4 - # with: - # repository: cosdt/elastic-tool - # path: ./elastic_tool - # ref: 0.1.0-dev - - # - name: Install elastic_tool - # working-directory: ./elastic_tool - # run: | - # pip install -e . + - name: Checkout cosdt/elastic-tool + uses: actions/checkout@v4 + with: + repository: cosdt/elastic-tool + path: ./elastic_tool + ref: 0.1.0-dev + + - name: Install elastic_tool + working-directory: ./elastic_tool + run: | + pip install -e . - name: Collect pr info from vllm-project/vllm-ascend if: github.event_name == 'schedule' run: | # Only get the pull request which may influences performance - # git log --pretty=format:"%H %s" -- '**/*.py' ':!docs/*' ':!tests/*' ':!examples/*' > commit_log.txt - # escli check commit_log.txt - - # make a fake commit log for testing - printf "%s\n" \ - "507ae627cad68d93c62adf3c2409f4ff10a25536 feat: support compile torchair graph while warming up (#839)" \ - "5a1689fc648c8afa04ee040bf1b3526a6fe3d75e [Fix] Fix update_aclgraph_sizes when running MoE models (#913)" \ - "3442fbdb235b4c6d72c2bc64a49707a7bd89958e [1/N][UT][v1 MTP] add basic v1 mtp features (#890)" \ - > commit_log.txt + git log --pretty=format:"%H %s" -- '**/*.py' ':!docs/*' ':!tests/*' ':!examples/*' > commit_log.txt + escli check commit_log.txt - name: Run benchmark iteration if: github.event_name == 'schedule' From 8b857b9b17d6a05356b2fbdeb2715db26d869657 Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 3 Jun 2025 15:58:40 +0800 Subject: [PATCH 25/31] use pypi install escli Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 36701e99635..86d7e8d7eb6 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -141,17 +141,10 @@ jobs: retention-days: 90 overwrite: true - - name: Checkout cosdt/elastic-tool - uses: actions/checkout@v4 - with: - repository: cosdt/elastic-tool - path: ./elastic_tool - ref: 0.1.0-dev - - name: Install elastic_tool working-directory: ./elastic_tool run: | - pip install -e . + pip install escli-tool==0.2.0 - name: Collect pr info from vllm-project/vllm-ascend if: github.event_name == 'schedule' From 19543b291c90dfb9f395fab415cee6ae49ecdf68 Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 3 Jun 2025 17:28:09 +0800 Subject: [PATCH 26/31] fix path Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 1 - demo.py | 0 vllm_ascend/worker/model_runner_v1.py | 1 - 3 files changed, 2 deletions(-) create mode 100644 demo.py diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 86d7e8d7eb6..1ad147e5b03 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -142,7 +142,6 @@ jobs: overwrite: true - name: Install elastic_tool - working-directory: ./elastic_tool run: | pip install escli-tool==0.2.0 diff --git a/demo.py b/demo.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index a0bc2125935..109e0a9fd75 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1016,7 +1016,6 @@ def _profile_multimodal(self) -> None: or self.max_num_encoder_input_tokens <= 0 or self.encoder_cache_size <= 0): return - max_tokens_by_modality_dict = ( MULTIMODAL_REGISTRY.get_max_tokens_per_item_by_nonzero_modality( self.model_config)) From 6a95c81ffb08be0c12bda131fbe6b891f41e5b46 Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 3 Jun 2025 18:51:03 +0800 Subject: [PATCH 27/31] remove redundant files Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 10 ++-------- benchmarks/scripts/convert_json_to_markdown.py | 2 +- benchmarks/scripts/patch_benchmark_dataset.py | 2 +- demo.py | 0 vllm_ascend/worker/model_runner_v1.py | 1 + 5 files changed, 5 insertions(+), 10 deletions(-) delete mode 100644 demo.py diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 1ad147e5b03..ea8ee5cc68c 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -23,12 +23,7 @@ on: # Run at 24:00 everyday - cron: '00 16 * * *' workflow_dispatch: - pull_request: - branches: - - 'main' - - '*-dev' - paths: - - '.github/workflows/nightly_benchmarks.yaml' + pull_request_target: types: [labeled] @@ -97,8 +92,7 @@ jobs: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v4 with: - ref: dev-bench - repository: Potabk/vllm-ascend + ref: ${{ matrix.vllm_ascend_branch }} - name: Checkout vllm-project/vllm repo uses: actions/checkout@v4 diff --git a/benchmarks/scripts/convert_json_to_markdown.py b/benchmarks/scripts/convert_json_to_markdown.py index 2ff6f15dea6..7a1c5d9968a 100644 --- a/benchmarks/scripts/convert_json_to_markdown.py +++ b/benchmarks/scripts/convert_json_to_markdown.py @@ -180,4 +180,4 @@ def results_to_json(latency, throughput, serving): throughput_tests_markdown_table=throughput_md_table, serving_tests_markdown_table=serving_md_table, benchmarking_results_in_json_string=processed_results_json) - f.write(results) \ No newline at end of file + f.write(results) diff --git a/benchmarks/scripts/patch_benchmark_dataset.py b/benchmarks/scripts/patch_benchmark_dataset.py index a435d207073..073b283ccc7 100644 --- a/benchmarks/scripts/patch_benchmark_dataset.py +++ b/benchmarks/scripts/patch_benchmark_dataset.py @@ -5,7 +5,7 @@ # Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls - +# TDOO(Potabk): Remove this patch when the issue is fixed in the upstream class StreamingFalseTransformer(cst.CSTTransformer): def __init__(self): diff --git a/demo.py b/demo.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 109e0a9fd75..2d3afa3d1ec 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1016,6 +1016,7 @@ def _profile_multimodal(self) -> None: or self.max_num_encoder_input_tokens <= 0 or self.encoder_cache_size <= 0): return + max_tokens_by_modality_dict = ( MULTIMODAL_REGISTRY.get_max_tokens_per_item_by_nonzero_modality( self.model_config)) From 34e0275280af91153500a565f3068dc380b62c5c Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 3 Jun 2025 18:53:43 +0800 Subject: [PATCH 28/31] fix yapf Signed-off-by: wangli --- benchmarks/scripts/patch_benchmark_dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/scripts/patch_benchmark_dataset.py b/benchmarks/scripts/patch_benchmark_dataset.py index 073b283ccc7..d114a65a733 100644 --- a/benchmarks/scripts/patch_benchmark_dataset.py +++ b/benchmarks/scripts/patch_benchmark_dataset.py @@ -5,6 +5,7 @@ # Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls + # TDOO(Potabk): Remove this patch when the issue is fixed in the upstream class StreamingFalseTransformer(cst.CSTTransformer): From a765910492b149352bb214bccb9f3b40ae439f69 Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 3 Jun 2025 19:03:30 +0800 Subject: [PATCH 29/31] fix Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 10 +++------- vllm_ascend/worker/model_runner_v1.py | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index ea8ee5cc68c..65e8f24766c 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -24,9 +24,8 @@ on: - cron: '00 16 * * *' workflow_dispatch: - pull_request_target: - types: [labeled] - + pull_request: + types: [ labeled ] # Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly # declared as "shell: bash -el {0}" on steps that need to be properly activated. @@ -87,13 +86,10 @@ jobs: run: | git config --global --add safe.directory "$GITHUB_WORKSPACE" git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/ - - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v4 - with: - ref: ${{ matrix.vllm_ascend_branch }} - + - name: Checkout vllm-project/vllm repo uses: actions/checkout@v4 with: diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 2d3afa3d1ec..a0bc2125935 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1016,7 +1016,7 @@ def _profile_multimodal(self) -> None: or self.max_num_encoder_input_tokens <= 0 or self.encoder_cache_size <= 0): return - + max_tokens_by_modality_dict = ( MULTIMODAL_REGISTRY.get_max_tokens_per_item_by_nonzero_modality( self.model_config)) From 4de2996b3b93c95fd138792b765cc2c9644cec5b Mon Sep 17 00:00:00 2001 From: wangli Date: Tue, 3 Jun 2025 19:05:20 +0800 Subject: [PATCH 30/31] fix Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 65e8f24766c..0c9d986a6d9 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -132,6 +132,7 @@ jobs: overwrite: true - name: Install elastic_tool + if: github.event_name == 'schedule' run: | pip install escli-tool==0.2.0 From 7f5bdb2bb875e8aec38c04e83d013520b0a9478c Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Tue, 3 Jun 2025 23:31:36 +0800 Subject: [PATCH 31/31] Apply suggestions from code review Signed-off-by: Yikun Jiang --- .github/workflows/nightly_benchmarks.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 0c9d986a6d9..fb82c8898a8 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -40,6 +40,8 @@ concurrency: jobs: test: + if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }} + name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }} runs-on: 'linux-arm64-npu-static-8' strategy: @@ -64,7 +66,6 @@ jobs: env: HF_ENDPOINT: https://hf-mirror.com HF_TOKEN: ${{ secrets.HF_TOKEN }} - HF_HOME: /github/home/.cache/huggingface ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }} ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }} steps: