File tree Expand file tree Collapse file tree 2 files changed +22
-5
lines changed Expand file tree Collapse file tree 2 files changed +22
-5
lines changed Original file line number Diff line number Diff line change @@ -61,3 +61,21 @@ def test_models_distributed_DeepSeek():
61
61
distributed_executor_backend = "mp" ,
62
62
) as vllm_model :
63
63
vllm_model .generate_greedy (example_prompts , max_tokens )
64
+
65
+
66
+ def test_models_distributed_ep_DeepSeek ():
67
+ example_prompts = [
68
+ "vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs." ,
69
+ "Briefly describe the major milestones in the development of artificial intelligence from 1950 to 2020." ,
70
+ "Compare and contrast artificial intelligence with human intelligence in terms of processing information." ,
71
+ ]
72
+ dtype = "half"
73
+ max_tokens = 5
74
+ with VllmRunner (
75
+ "deepseek-ai/DeepSeek-V2-Lite" ,
76
+ dtype = dtype ,
77
+ tensor_parallel_size = 8 ,
78
+ enable_expert_parallel = True ,
79
+ distributed_executor_backend = "mp" ,
80
+ ) as vllm_model :
81
+ vllm_model .generate_greedy (example_prompts , max_tokens )
Original file line number Diff line number Diff line change @@ -88,15 +88,14 @@ def fused_experts_with_mc2(
88
88
0 :5 ]
89
89
90
90
w1 = w1 .transpose (1 , 2 )
91
- expert_token_nums = torch .cumsum (expert_token_nums ,
92
- dim = 0 ,
93
- dtype = torch .int64 )
91
+
94
92
group_list = expert_token_nums .to (torch .int64 )
95
93
gate_up_out_list = torch_npu .npu_grouped_matmul (
96
94
x = [expand_x ],
97
95
weight = [w1 ],
98
96
split_item = 2 ,
99
- group_list_type = 0 ,
97
+ # 1 means count mode, to avoid cumulative operation of the group list
98
+ group_list_type = 1 ,
100
99
group_type = 0 ,
101
100
group_list = group_list ,
102
101
)
@@ -110,7 +109,7 @@ def fused_experts_with_mc2(
110
109
x = [gate_up_out ],
111
110
weight = [w2 ],
112
111
split_item = 2 ,
113
- group_list_type = 0 ,
112
+ group_list_type = 1 ,
114
113
group_type = 0 ,
115
114
group_list = group_list ,
116
115
)
You can’t perform that action at this time.
0 commit comments