File tree Expand file tree Collapse file tree 2 files changed +22
-5
lines changed Expand file tree Collapse file tree 2 files changed +22
-5
lines changed Original file line number Diff line number Diff line change 21
21
Run `pytest tests/test_offline_inference.py`.
22
22
"""
23
23
import os
24
+ from unittest .mock import patch
24
25
25
26
import vllm # noqa: F401
26
27
@@ -61,3 +62,20 @@ def test_models_distributed_DeepSeek():
61
62
distributed_executor_backend = "mp" ,
62
63
) as vllm_model :
63
64
vllm_model .generate_greedy (example_prompts , max_tokens )
65
+
66
+ @patch .dict (os .environ , {"VLLM_ENABLE_MC2" : "1" })
67
+ def test_models_distributed_mc2_DeepSeek ():
68
+ example_prompts = [
69
+ "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs." ,
70
+ "Briefly describe the major milestones in the development of artificial intelligence from 1950 to 2020." ,
71
+ "Compare and contrast artificial intelligence with human intelligence in terms of processing information." ,
72
+ ]
73
+ dtype = "half"
74
+ max_tokens = 5
75
+ with VllmRunner (
76
+ "deepseek-ai/DeepSeek-V2-Lite" ,
77
+ dtype = dtype ,
78
+ tensor_parallel_size = 4 ,
79
+ distributed_executor_backend = "mp" ,
80
+ ) as vllm_model :
81
+ vllm_model .generate_greedy (example_prompts , max_tokens )
Original file line number Diff line number Diff line change @@ -88,15 +88,14 @@ def fused_experts_with_mc2(
88
88
0 :5 ]
89
89
90
90
w1 = w1 .transpose (1 , 2 )
91
- expert_token_nums = torch .cumsum (expert_token_nums ,
92
- dim = 0 ,
93
- dtype = torch .int64 )
91
+
94
92
group_list = expert_token_nums .to (torch .int64 )
95
93
gate_up_out_list = torch_npu .npu_grouped_matmul (
96
94
x = [expand_x ],
97
95
weight = [w1 ],
98
96
split_item = 2 ,
99
- group_list_type = 0 ,
97
+ # 1 means count mode, to avoid cumulative operation of the group list
98
+ group_list_type = 1 ,
100
99
group_type = 0 ,
101
100
group_list = group_list ,
102
101
)
@@ -110,7 +109,7 @@ def fused_experts_with_mc2(
110
109
x = [gate_up_out ],
111
110
weight = [w2 ],
112
111
split_item = 2 ,
113
- group_list_type = 0 ,
112
+ group_list_type = 1 ,
114
113
group_type = 0 ,
115
114
group_list = group_list ,
116
115
)
You can’t perform that action at this time.
0 commit comments