update ReduceSum for the attention mask subgraph as well

Honry · Honry · commit 435172cb14fd · 2025-11-24T13:03:53.000+08:00
diff --git a/src/python/py/models/builders/base.py b/src/python/py/models/builders/base.py
@@ -4258,6 +4258,7 @@ def make_attention_mask_reformatting_for_sparse_attn(self):
         #                attention_mask
         #               /              \
         #          ReduceSum          Shape
+        #         (keepdims=0)          |
         #              |                |
         #        Cast to int32        Gather
         #              |                |
@@ -4272,9 +4273,9 @@ def make_attention_mask_reformatting_for_sparse_attn(self):
         # Left path
         reduce_sum_name = f"{attn_mask_basename}/ReduceSum"
         reduce_sum_inputs = ["attention_mask", "/model/constants/INT64/[1]"]
-        self.make_reduce_sum(reduce_sum_name, reduce_sum_inputs, dtype=ir.DataType.INT64, shape=["batch_size", 1])
+        self.make_reduce_sum(reduce_sum_name, reduce_sum_inputs, dtype=ir.DataType.INT64, shape=["batch_size"], keepdims=False)
         cast_1_name = f"{attn_mask_basename}/ReduceSum/Cast"
-        self.make_cast(cast_1_name, f"{reduce_sum_name}/output_0", dtype=ir.DataType.INT32, shape=["batch_size", 1])
+        self.make_cast(cast_1_name, f"{reduce_sum_name}/output_0", dtype=ir.DataType.INT32, shape=["batch_size"])
 
         # Right path
         shape_name = f"{attn_mask_basename}/Shape"