remove redundant cumsum

洪炜杰 · hahazhky · commit 2e717076e69e · 2025-06-05T15:33:20.000+08:00
Signed-off-by: zhky &lt;hahazhky@163.com&gt;
diff --git a/vllm_ascend/ops/fused_moe.py b/vllm_ascend/ops/fused_moe.py
@@ -89,15 +89,14 @@ def fused_experts_with_mc2(
         0:5]
 
     w1 = w1.transpose(1, 2)
-    expert_token_nums = torch.cumsum(expert_token_nums,
-                                     dim=0,
-                                     dtype=torch.int64)
+
     group_list = expert_token_nums.to(torch.int64)
     gate_up_out_list = torch_npu.npu_grouped_matmul(
         x=[expand_x],
         weight=[w1],
         split_item=2,
-        group_list_type=0,
+        # 1 means count mode, to avoid cumulative operation of the group list
+        group_list_type=1,
         group_type=0,
         group_list=group_list,
     )
@@ -111,7 +110,7 @@ def fused_experts_with_mc2(
         x=[gate_up_out],
         weight=[w2],
         split_item=2,
-        group_list_type=0,
+        group_list_type=1,
         group_type=0,
         group_list=group_list,
     )