Skip to content

Commit 1ccff57

Browse files
洪炜杰hahazhky
authored andcommitted
add fix routing for performance test
Signed-off-by: zhky <[email protected]>
1 parent 31dd471 commit 1ccff57

File tree

2 files changed

+22
-5
lines changed

2 files changed

+22
-5
lines changed

tests/multicard/test_offline_inference_distributed.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
Run `pytest tests/test_offline_inference.py`.
2222
"""
2323
import os
24+
from unittest.mock import patch
2425

2526
import vllm # noqa: F401
2627

@@ -61,3 +62,20 @@ def test_models_distributed_DeepSeek():
6162
distributed_executor_backend="mp",
6263
) as vllm_model:
6364
vllm_model.generate_greedy(example_prompts, max_tokens)
65+
66+
@patch.dict(os.environ, {"VLLM_ENABLE_MC2": "1"})
67+
def test_models_distributed_mc2_DeepSeek():
68+
example_prompts = [
69+
"vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",
70+
"Briefly describe the major milestones in the development of artificial intelligence from 1950 to 2020.",
71+
"Compare and contrast artificial intelligence with human intelligence in terms of processing information.",
72+
]
73+
dtype = "half"
74+
max_tokens = 5
75+
with VllmRunner(
76+
"deepseek-ai/DeepSeek-V2-Lite",
77+
dtype=dtype,
78+
tensor_parallel_size=4,
79+
distributed_executor_backend="mp",
80+
) as vllm_model:
81+
vllm_model.generate_greedy(example_prompts, max_tokens)

vllm_ascend/ops/fused_moe.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,14 @@ def fused_experts_with_mc2(
8888
0:5]
8989

9090
w1 = w1.transpose(1, 2)
91-
expert_token_nums = torch.cumsum(expert_token_nums,
92-
dim=0,
93-
dtype=torch.int64)
91+
9492
group_list = expert_token_nums.to(torch.int64)
9593
gate_up_out_list = torch_npu.npu_grouped_matmul(
9694
x=[expand_x],
9795
weight=[w1],
9896
split_item=2,
99-
group_list_type=0,
97+
# 1 means count mode, to avoid cumulative operation of the group list
98+
group_list_type=1,
10099
group_type=0,
101100
group_list=group_list,
102101
)
@@ -110,7 +109,7 @@ def fused_experts_with_mc2(
110109
x=[gate_up_out],
111110
weight=[w2],
112111
split_item=2,
113-
group_list_type=0,
112+
group_list_type=1,
114113
group_type=0,
115114
group_list=group_list,
116115
)

0 commit comments

Comments
 (0)