[QNN EP] MaxPool input rank-3 auto pad bug fix (#24827)

quic-ashwshan · web-flow · commit 577eac9a9e23 · 2025-05-21T13:11:25.000-07:00
- Previously, padding for rank-3 MaxPool was only computed for auto_pad="NOTSET", using the final output shape.
- Identified a broader issue during auto_pad="VALID" implementation: padding must be derived from the recalculated output shape.
- Added unit tests to cover all use cases of auto_pad.
- Enabled the failing unit test in the cpu pool test

### Description
This PR fixes an issue in the padding calculation logic for rank-3 MaxPool operations when using auto_pad. The bug stemmed from using the final output shape (rank-3) to compute padding, rather than the correct intermediate shape (rank-4) that MaxPool actually operates on. The logic has been updated to use the reshaped rank-4 output for accurate padding
computation. Unit tests have been added to validate behavior across all auto_pad modes.

### Motivation and Context
While implementing support for auto_pad="VALID" in MaxPool, we discovered that the padding for MaxPool rank-3 was being calculated using the final output shape, which is rank-3. However, MaxPool internally operates on a reshaped rank-4 tensor (via pre- and post-processing reshapes). As a result, the padding logic was misaligned with the actual shape used during pooling, leading to test failures.
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/pool_op_builder.cc
@@ -103,6 +103,36 @@ Status PoolOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
   return Status::OK();
 }
 
+static std::vector<uint32_t> AmendOutputShapeForRank3Pool(
+    gsl::span<const uint32_t> input_shape,   // {N, H, W, C}
+    gsl::span<const uint32_t> kernel_shape,  // {k_h, k_w}
+    gsl::span<const uint32_t> strides,       // {s_h, s_w}
+    gsl::span<const uint32_t> pads) {
+  assert(input_shape.size() == 4 &&
+         kernel_shape.size() == 2 &&
+         strides.size() == 2 &&
+         pads.size() == 4);
+
+  const uint32_t N = input_shape[0];
+  const uint32_t H = input_shape[1];
+  const uint32_t W = input_shape[2];
+  const uint32_t C = input_shape[3];
+
+  // pad the spatial dims
+  uint32_t padded_H = H + pads[0] + pads[2];
+  uint32_t padded_W = W + pads[1] + pads[3];
+
+  // floor-mode on NHWC
+  uint32_t out_H = (padded_H < kernel_shape[0])
+                       ? 0
+                       : (padded_H - kernel_shape[0]) / strides[0] + 1;
+  uint32_t out_W = (padded_W < kernel_shape[1])
+                       ? 0
+                       : (padded_W - kernel_shape[1]) / strides[1] + 1;
+
+  return {N, out_H, out_W, C};
+}
+
 Status PoolOpBuilder::SetCommonPoolParams(const NodeAttrHelper& node_helper,
                                           std::vector<uint32_t>& filter_size,
                                           std::vector<uint32_t>& pad_amount, std::vector<uint32_t>& strides,
@@ -153,6 +183,14 @@ Status PoolOpBuilder::SetCommonPoolParams(const NodeAttrHelper& node_helper,
       dilations = raw_dilations;
     }
 
+    // Max Pool rank 3 input
+    if (output_shape.size() == 3) {
+      // Calculate MaxPool output for rank-4 when input is rank 3
+      output_shape = AmendOutputShapeForRank3Pool(input_shape,
+                                                  filter_size,
+                                                  strides,
+                                                  pad_amount);
+    }
     auto total_pads_0 = (output_shape[1] - 1) * strides[0] + (filter_size[0] - 1) * dilations[0] + 1 - input_shape[1];
     auto total_pads_1 = (output_shape[2] - 1) * strides[1] + (filter_size[1] - 1) * dilations[1] + 1 - input_shape[2];
     if (auto_pad.compare("SAME_LOWER") != 0) {
@@ -189,36 +227,6 @@ void SetPoolParam(const NodeUnit& node_unit,
   qnn_model_wrapper.AddParamWrapper(std::move(qnn_param));
 }
 
-std::vector<uint32_t> ComputePoolOutputShape(
-    const std::vector<uint32_t>& input_shape,   // {N, H, W, C}
-    const std::vector<uint32_t>& kernel_shape,  // {k_h, k_w}
-    const std::vector<uint32_t>& strides,       // {s_h, s_w}
-    const std::vector<uint32_t>& pads) {
-  assert(input_shape.size() == 4 &&
-         kernel_shape.size() == 2 &&
-         strides.size() == 2 &&
-         pads.size() == 4);
-
-  const uint32_t N = input_shape[0];
-  const uint32_t H = input_shape[1];
-  const uint32_t W = input_shape[2];
-  const uint32_t C = input_shape[3];
-
-  // pad the spatial dims
-  uint32_t padded_H = H + pads[0] + pads[2];
-  uint32_t padded_W = W + pads[1] + pads[3];
-
-  // floor-mode on NHWC
-  uint32_t out_H = (padded_H < kernel_shape[0])
-                       ? 0
-                       : (padded_H - kernel_shape[0]) / strides[0] + 1;
-  uint32_t out_W = (padded_W < kernel_shape[1])
-                       ? 0
-                       : (padded_W - kernel_shape[1]) / strides[1] + 1;
-
-  return {N, out_H, out_W, C};
-}
-
 Status PoolOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
                                                   const NodeUnit& node_unit,
                                                   std::vector<std::string>&& input_names,
@@ -316,10 +324,10 @@ Status PoolOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wra
   }
 
   // Calculate MaxPool output for rank-4 when input is rank 3
-  auto pooled_shape = ComputePoolOutputShape(onnx_in_shape,
-                                             filter_size,
-                                             stride,
-                                             pad_amount);
+  auto pooled_shape = AmendOutputShapeForRank3Pool(onnx_in_shape,
+                                                   filter_size,
+                                                   stride,
+                                                   pad_amount);
 
   SetPoolParam(node_unit, QNN_OP_POOL_MAX_2D_PARAM_FILTER_SIZE, std::move(filter_size_dim), std::move(filter_size), param_tensor_names, qnn_model_wrapper);
   SetPoolParam(node_unit, QNN_OP_POOL_MAX_2D_PARAM_PAD_AMOUNT, std::move(pad_amount_dim), std::move(pad_amount), param_tensor_names, qnn_model_wrapper);
diff --git a/onnxruntime/test/providers/cpu/nn/pool_op_test.cc b/onnxruntime/test/providers/cpu/nn/pool_op_test.cc
@@ -230,8 +230,7 @@ TEST(PoolTest, MaxPool1D_case2) {
   test.AddInput<float>("X", x_dims, x_vals);
   test.AddOutput<float>("Y", expected_dims, expected_vals);
 
-  // QNN test failed. Caused by a combination of most recent changes, will fix it
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kQnnExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
 }
 
 TEST(PoolTest, MaxPool1D_case3) {
diff --git a/onnxruntime/test/providers/qnn/pool_op_test.cpp b/onnxruntime/test/providers/qnn/pool_op_test.cpp
@@ -262,6 +262,51 @@ TEST_F(QnnHTPBackendTests, MaxPool_Rank3_Ceil_HTP_u8) {
       ExpectedEPNodeAssignment::All);
 }
 
+// 1-D MaxPool HTP test for rank-3 with ceil_mode=1 and auto_pad='VALID'
+TEST_F(QnnHTPBackendTests, MaxPool_Rank3_Ceil_HTP_u8_auto_pad_VALID) {
+  RunQDQPoolOpTest<uint8_t>(
+      "MaxPool",
+      TestInputDef<float>({1, 3, 3}, false, -10.0f, 10.0f),
+      {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{3}),
+       utils::MakeAttribute("strides", std::vector<int64_t>{3}),
+       utils::MakeAttribute("pads", std::vector<int64_t>{0, 0}),
+       utils::MakeAttribute("dilations", std::vector<int64_t>{1}),
+       utils::MakeAttribute("ceil_mode", static_cast<int64_t>(1)),
+       utils::MakeAttribute("storage_order", static_cast<int64_t>(0)),
+       utils::MakeAttribute("auto_pad", "VALID")},
+      ExpectedEPNodeAssignment::All);
+}
+
+// 1-D MaxPool HTP test for rank-3 with ceil_mode=1 and auto_pad='SAME_UPPER'
+TEST_F(QnnHTPBackendTests, MaxPool_Rank3_Ceil_HTP_u8_auto_pad_SAME_UPPER) {
+  RunQDQPoolOpTest<uint8_t>(
+      "MaxPool",
+      TestInputDef<float>({1, 3, 3}, false, -10.0f, 10.0f),
+      {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{3}),
+       utils::MakeAttribute("strides", std::vector<int64_t>{3}),
+       utils::MakeAttribute("pads", std::vector<int64_t>{0, 0}),
+       utils::MakeAttribute("dilations", std::vector<int64_t>{1}),
+       utils::MakeAttribute("ceil_mode", static_cast<int64_t>(1)),
+       utils::MakeAttribute("storage_order", static_cast<int64_t>(0)),
+       utils::MakeAttribute("auto_pad", "SAME_UPPER")},
+      ExpectedEPNodeAssignment::All);
+}
+
+// 1-D MaxPool HTP test for rank-3 with ceil_mode=1 and auto_pad='SAME_LOWER'
+TEST_F(QnnHTPBackendTests, MaxPool_Rank3_Ceil_HTP_u8_auto_pad_SAME_LOWER) {
+  RunQDQPoolOpTest<uint8_t>(
+      "MaxPool",
+      TestInputDef<float>({1, 3, 3}, false, -10.0f, 10.0f),
+      {utils::MakeAttribute("kernel_shape", std::vector<int64_t>{3}),
+       utils::MakeAttribute("strides", std::vector<int64_t>{3}),
+       utils::MakeAttribute("pads", std::vector<int64_t>{0, 0}),
+       utils::MakeAttribute("dilations", std::vector<int64_t>{1}),
+       utils::MakeAttribute("ceil_mode", static_cast<int64_t>(1)),
+       utils::MakeAttribute("storage_order", static_cast<int64_t>(0)),
+       utils::MakeAttribute("auto_pad", "SAME_LOWER")},
+      ExpectedEPNodeAssignment::All);
+}
+
 TEST_F(QnnHTPBackendTests, MaxPool_Ceil_HTP_u8) {
   RunQDQPoolOpTest<uint8_t>("MaxPool",
                             TestInputDef<float>({1, 2, 3, 3}, false, -10.0f, 10.0f),  // Dynamic input with range [-10, 10]

Original file line number	Diff line number	Diff line change
`@@ -230,8 +230,7 @@ TEST(PoolTest, MaxPool1D_case2) {`
`230`	`230`	`test.AddInput<float>("X", x_dims, x_vals);`
`231`	`231`	`test.AddOutput<float>("Y", expected_dims, expected_vals);`
`232`	`232`
`233`		`- // QNN test failed. Caused by a combination of most recent changes, will fix it`
`234`		`- test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider, kQnnExecutionProvider});`
	`233`	`+ test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});`
`235`	`234`	`}`
`236`	`235`
`237`	`236`	`TEST(PoolTest, MaxPool1D_case3) {`