Arm backend: Fix decompose_meandim_pass bug (#11141)

AdrianLundell · web-flow · commit d147a2ce6a55 · 2025-05-27T13:56:56.000+02:00
Previously this pass could insert avg_pool ops with configurations not
supported for Ethos-U55. Check this using the AvgPool2dSupported checker
and decompose fully using sum for non supported cases.

Also modifies the test cases to test this, +more varied shapes.

Signed-off-by: Adrian Lundell &lt;adrian.lundell@arm.com&gt;
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -90,7 +90,9 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
         self.add_pass(DecomposeLinearVectorNormPass())
-        self.add_pass(DecomposeMeanDimPass())
+        self.add_pass(
+            DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
+        )
         self.add_pass(ConvertFullLikeToFullPass())
         self.add_pass(ConvertToClampPass())
         self.add_pass(ConvertMinMaxPass())
@@ -144,7 +146,9 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(DecomposeBatchNormPass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
-        self.add_pass(DecomposeMeanDimPass())
+        self.add_pass(
+            DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
+        )
         self.add_pass(DecomposeNotEqualPass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeSoftmaxPass())
@@ -209,7 +213,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(ScalarsToAttributePass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
-        self.add_pass(DecomposeMeanDimPass())
+        self.add_pass(DecomposeMeanDimPass(graph_module, self.tosa_spec))
         self.add_pass(DecomposeNotEqualPass())
         self.add_pass(DecomposeCosineSimilarityPass())
         self.add_pass(DecomposeDivPass())
diff --git a/backends/arm/_passes/decompose_meandim_pass.py b/backends/arm/_passes/decompose_meandim_pass.py
@@ -8,6 +8,8 @@
 import torch
 from executorch.backends.arm._passes import ArmPass
 from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
+from executorch.backends.arm.operator_support.pool_2d_support import AvgPool2dSupported
+from executorch.exir.backend.utils import WhyNoPartitionReporter
 from executorch.exir.dialects._ops import ops as exir_ops
 
 
@@ -60,6 +62,14 @@ class DecomposeMeanDimPass(ArmPass):
         x = view_copy.default(x, new_shape=(h)) # Squeeze dims since keepdims = False
     """
 
+    def __init__(self, graph_module, tosa_spec):
+        super().__init__()
+        self._graph_module = graph_module
+        self._tosa_spec = tosa_spec
+        self._avg_pool_checker = AvgPool2dSupported(
+            self._tosa_spec, WhyNoPartitionReporter()
+        )
+
     def call_operator(self, op, args, kwargs, meta):
         if op not in (exir_ops.edge.aten.mean.dim, torch.ops.aten.mean.dim):
             return super().call_operator(op, args, kwargs, meta)
@@ -86,13 +96,11 @@ def call_operator(self, op, args, kwargs, meta):
 
             x = super().call_operator(view_op, (x, new_shape), {}, meta, True)
 
-        # Reduce (h,w) by avg pool
-        dims_to_reduce_by_avgpool = [dim for dim in dims_to_reduce if dim >= 2]
-        x = self._reduce_by_average_pool(op, x, dims_to_reduce_by_avgpool, meta)
+        # Reduce (h,w) dims by avg pool if possible
+        x, dims_to_reduce = self._reduce_by_average_pool(op, x, dims_to_reduce, meta)
 
-        # Reduce (n, c) by reduce sum
-        dims_to_reduce_by_sum = [dim for dim in dims_to_reduce if dim < 2]
-        x = self._reduce_by_sum(op, x, dims_to_reduce_by_sum, meta, dtype)
+        # Reduce remaining dims by sum
+        x = self._reduce_by_sum(op, x, dims_to_reduce, meta, dtype)
 
         # Reshape to correct output shape if necessary
         if x.data.size() != output_shape:
@@ -116,22 +124,41 @@ def _reduce_by_sum(self, op, input_node, dims, meta, dtype):
         return super().call_operator(mul_op, (sum, full), {}, meta, True)
 
     def _reduce_by_average_pool(self, op, input_node, dims, meta):
-        if len(dims) == 0:
-            return input_node
+        dims_to_reduce_by_avgpool = [dim for dim in dims if dim >= 2]
+        if len(dims_to_reduce_by_avgpool) == 0:
+            return input_node, dims
+
+        dims_to_reduce_by_sum = [dim for dim in dims if dim < 2]
 
         avgpool_op = get_avgpool(op)
         input_shape = input_node.data.size()
 
         stride = [1, 1]
-        if dims in ([2, 3], [3, 2]):
+        if dims_to_reduce_by_avgpool in ([2, 3], [3, 2]):
             kernel_size = [input_shape[2], input_shape[3]]
-        elif dims == [3]:
+        elif dims_to_reduce_by_avgpool == [3]:
             kernel_size = [1, input_shape[3]]
-        elif dims == [2]:
+        elif dims_to_reduce_by_avgpool == [2]:
             kernel_size = [input_shape[2], 1]
         else:
-            raise RuntimeError(f"Bad dims {dims} for {op} decomposition of mean_dim.")
+            raise RuntimeError(
+                f"Bad dims {dims_to_reduce_by_avgpool} for {op} decomposition of mean_dim."
+            )
 
-        return super().call_operator(
-            avgpool_op, (input_node, kernel_size, stride), {}, meta, True
+        args = (input_node, kernel_size, stride)
+
+        avg_pool_node = self._graph_module.graph.create_node(
+            "call_function", avgpool_op, args
+        )
+        is_supported = self._avg_pool_checker.is_node_tosa_supported(
+            avg_pool_node, self._tosa_spec
         )
+
+        if is_supported:
+            return (
+                super().call_operator(avgpool_op, args, {}, meta, True),
+                dims_to_reduce_by_sum,
+            )
+
+        else:
+            return input_node, dims
diff --git a/backends/arm/operator_support/pool_2d_support.py b/backends/arm/operator_support/pool_2d_support.py
@@ -7,6 +7,7 @@
 
 import torch
 import torch.fx as fx
+from executorch.backends.arm._passes.arm_pass_utils import get_first_fake_tensor
 from executorch.backends.arm.operator_support.tosa_supported_operators import (
     register_tosa_support_check,
     SupportedTOSAOperatorCheck,
@@ -50,7 +51,11 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
             return True
 
         # U55 case, Vela 4.2.0 (25.02 release)
-        shape = cast(torch.Tensor, node.all_input_nodes[0].meta["val"]).shape
+        input_arg = node.args[0]
+        if isinstance(input_arg, torch.fx.Node):
+            input_arg = get_first_fake_tensor(input_arg)
+        shape = input_arg.data.shape  # type: ignore[union-attr]
+
         kernel = cast(tuple[int, int], node.args[1])
         stride = cast(tuple[int, int], node.args[2])
         if len(node.args) > 3:
diff --git a/backends/arm/test/ops/test_mean_dim.py b/backends/arm/test/ops/test_mean_dim.py
@@ -91,52 +91,52 @@ class MeanDim(torch.nn.Module):
             True,
         ),
         "rank_2_keepdim": lambda: (
-            torch.rand(7, 7),
+            torch.rand(7, 3),
             (0, 1),
             True,
         ),
         "rank_3_keepdim": lambda: (
-            torch.rand(7, 7, 7),
+            torch.rand(5, 7, 3),
             (0, 1, 2),
             True,
         ),
         "rand_1_keepdim": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (1),
             True,
         ),
         "rand_2_keepdim": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (2),
             True,
         ),
         "rand_3_keepdim": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (3),
             True,
         ),
         "rand_12_keepdim": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (1, 2),
             True,
         ),
         "rand_13_keepdim": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (1, 3),
             True,
         ),
         "rand_23_keepdim": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (2, 3),
             True,
         ),
         "rand_123_keepdim": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (1, 2, 3),
             True,
         ),
         "rand_0123_keepdim": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (0, 1, 2, 3),
             True,
         ),
@@ -146,55 +146,60 @@ class MeanDim(torch.nn.Module):
             False,
         ),
         "rank_2": lambda: (
-            torch.rand(7, 7),
+            torch.rand(5, 7),
             (-2, -1),
             False,
         ),
         "rank_3": lambda: (
-            torch.rand(7, 7, 7),
+            torch.rand(5, 7, 3),
             (-3, -2, -1),
             False,
         ),
         "rand_1": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (-3),
             False,
         ),
         "rand_2": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (-2),
             False,
         ),
         "rand_3": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (-1),
             False,
         ),
         "rand_12": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (-3, -2),
             False,
         ),
         "rand_13": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (-3, -1),
             False,
         ),
         "rand_23": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (-2, -1),
             False,
         ),
         "rand_123": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (-3, -2, -1),
             False,
         ),
         "rand_0123": lambda: (
-            torch.rand(1, 7, 7, 7),
+            torch.rand(1, 5, 7, 3),
             (-4, -3, -2, -1),
             False,
         ),
+        "u55_avg_pool_not_supported": lambda: (
+            torch.rand(1, 1, 1, 257),
+            (0, 1, 2, 3),
+            True,
+        ),
     }
     torch_op = "torch.ops.aten.mean.dim"
     exir_op = "executorch_exir_dialects_edge__ops_aten_mean_dim"
@@ -241,7 +246,13 @@ def test_mean_dim_u55_BI(test_data):
         [],  # Might be sum, avgpool, or both
         run_on_fvp=True,
         symmetric_io_quantization=True,
-    ).dump_artifact("export")
+    )
+    pipeline.add_stage_after(
+        "export",
+        pipeline.tester.check_not,
+        ["torch.ops.aten.adaptive_avg_pool2d.default"],
+        suffix="avg_pool",
+    )
     pipeline.run()
 
 
diff --git a/backends/arm/test/passes/test_decompose_meandim_pass.py b/backends/arm/test/passes/test_decompose_meandim_pass.py