Add code to Bundleio to generate error stats (#12051)

zingo · web-flow · commit 59e0476dc4eb · 2025-07-02T16:20:58.000-07:00
Add a way to get error stats/metrics between actual and reference output. cc @digantdesai @freddan80 @per @oscarandersson8218 Signed-off-by: Zingo Andersen <zingo.andersen@arm.com>
diff --git a/devtools/bundled_program/bundled_program.cpp b/devtools/bundled_program/bundled_program.cpp
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
+ * Copyright 2025 Arm Limited and/or its affiliates.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
@@ -346,6 +347,116 @@ ET_NODISCARD Error load_bundled_input(
   return Error::Ok;
 }
 
+ET_NODISCARD ErrorStats compute_method_output_error_stats(
+    Method& method,
+    SerializedBundledProgram* bundled_program_ptr,
+    size_t testset_idx) {
+  if (!bundled_program_flatbuffer::BundledProgramBufferHasIdentifier(
+          bundled_program_ptr)) {
+    // The input buffer should be a bundled program.
+    return {Error::InvalidArgument, 0, 0, 0, 0};
+  }
+
+  auto method_test = get_method_test_suite(
+      bundled_program_flatbuffer::GetBundledProgram(bundled_program_ptr),
+      method);
+
+  if (!method_test.ok()) {
+    return {method_test.error(), 0, 0, 0, 0};
+  }
+
+  auto test_cases = method_test.get()->test_cases();
+
+  if (testset_idx >= test_cases->size()) {
+    return {Error::InvalidArgument, 0, 0, 0, 0};
+  }
+  auto bundled_expected_outputs =
+      test_cases->Get(static_cast<flatbuffers::uoffset_t>(testset_idx))
+          ->expected_outputs();
+
+  if (bundled_expected_outputs->size() == 0) {
+    ET_LOG(
+        Error,
+        "No bundled expected outputs, so we can't verify the method outputs.");
+    return {Error::InvalidArgument, 0, 0, 0, 0};
+  }
+
+  // abs_err = (a - b).abs()
+  // relative_err = (a - b).abs() / torch.maximum(torch.tensor(1e-8),
+  // torch.maximum(a.abs(), b.abs()))
+  double sum_abs = 0.0, max_abs = 0.0;
+  double sum_rel = 0.0, max_rel = 0.0;
+  // Make sure divider is bigger then eps=1e-8f to behave better around 0 values
+  const double eps = 1e-8f;
+
+  int64_t total_elems = 0;
+
+  for (size_t output_idx = 0; output_idx < method.outputs_size();
+       output_idx++) {
+    auto bundled_expected_output =
+        bundled_expected_outputs->GetMutableObject(output_idx);
+    auto method_output = method.get_output(output_idx);
+    switch (bundled_expected_output->val_type()) {
+      case bundled_program_flatbuffer::ValueUnion::Tensor: {
+        auto bundled_expected_output_tensor =
+            static_cast<bundled_program_flatbuffer::Tensor*>(
+                bundled_expected_output->mutable_val());
+        const auto method_output_tensor = method_output.toTensor();
+
+#ifdef USE_ATEN_LIB
+        Tensor expected = tensor_like(bundled_expected_output_tensor);
+#else // !USE_ATEN_LIB
+        TensorImpl impl = impl_like(bundled_expected_output_tensor);
+        Tensor expected = Tensor(&impl);
+#endif
+        // sanity check
+        int64_t nelem = expected.numel();
+        if (method_output_tensor.numel() != nelem) {
+          ET_LOG(Error, "Tensor size mismatch");
+          return {Error::InvalidArgument, 0, 0, 0, 0};
+        }
+
+        // we assume float32 here; adapt for other dtypes as needed
+        const float* e_data = expected.data_ptr<float>();
+        const float* a_data = method_output_tensor.data_ptr<float>();
+
+        for (int64_t k = 0; k < nelem; ++k) {
+          double abs_err = std::abs(a_data[k] - e_data[k]);
+          double relative_divider =
+              std::max(std::abs(a_data[k]), std::abs(e_data[k]));
+          relative_divider = std::max(relative_divider, eps);
+          double relative_err = abs_err / relative_divider;
+
+          sum_abs += abs_err;
+          max_abs = std::max(max_abs, abs_err);
+          sum_rel += relative_err;
+          max_rel = std::max(max_rel, relative_err);
+        }
+        total_elems += nelem;
+        break;
+      }
+      default: {
+        ET_LOG(
+            Error,
+            "Data type %hhd not supported",
+            static_cast<uint8_t>(bundled_expected_output->val_type()));
+        return {Error::NotSupported, 0, 0, 0, 0};
+        break; // Never reached
+      }
+    }
+  }
+
+  if (total_elems == 0) {
+    return {Error::Ok, 0, 0, 0, 0};
+  }
+  return {
+      Error::Ok,
+      sum_abs / total_elems,
+      max_abs,
+      sum_rel / total_elems,
+      max_rel};
+}
+
 ET_NODISCARD Error verify_method_outputs(
     Method& method,
     SerializedBundledProgram* bundled_program_ptr,
diff --git a/devtools/bundled_program/bundled_program.h b/devtools/bundled_program/bundled_program.h
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) Meta Platforms, Inc. and affiliates.
  * All rights reserved.
+ * Copyright 2025 Arm Limited and/or its affiliates.
  *
  * This source code is licensed under the BSD-style license found in the
  * LICENSE file in the root directory of this source tree.
@@ -40,6 +41,31 @@ ET_NODISCARD ::executorch::runtime::Error load_bundled_input(
     SerializedBundledProgram* bundled_program_ptr,
     size_t testset_idx);
 
+struct ErrorStats {
+  ::executorch::runtime::Error status;
+  double mean_abs_error;
+  double max_abs_error;
+  double mean_relative_error;
+  double max_relative_error;
+};
+
+/**
+ * Compute error stats for method.outputs() vs. the bundled "expected_outputs"
+ * for testset_idx.
+ *
+ * @param[in] method The Method to extract outputs from.
+ * @param[in] bundled_program_ptr The bundled program contains expected output.
+ * @param[in] testset_idx  The index of expected output needs to be compared.
+ *
+ * @returns Return ErrorStats with status set to Error::Ok if stats are filled
+ * in.
+ */
+
+ET_NODISCARD ErrorStats compute_method_output_error_stats(
+    Method& method,
+    SerializedBundledProgram* bundled_program_ptr,
+    size_t testset_idx);
+
 /**
  * Compare the Method's output with testset_idx-th bundled expected
  * output in method_idx-th Method test.
diff --git a/examples/arm/executor_runner/arm_executor_runner.cpp b/examples/arm/executor_runner/arm_executor_runner.cpp
@@ -85,6 +85,11 @@ using executorch::runtime::Result;
 using executorch::runtime::Span;
 using executorch::runtime::Tag;
 using executorch::runtime::TensorInfo;
+#if defined(ET_BUNDLE_IO)
+using executorch::bundled_program::compute_method_output_error_stats;
+using executorch::bundled_program::ErrorStats;
+using executorch::bundled_program::verify_method_outputs;
+#endif
 #if defined(ET_EVENT_TRACER_ENABLED)
 using executorch::etdump::ETDumpGen;
 using executorch::etdump::ETDumpResult;
@@ -849,8 +854,25 @@ int main(int argc, const char* argv[]) {
 
 #if defined(ET_BUNDLE_IO)
   if (bundle_io) {
+    // Check result
+    ErrorStats stats =
+        compute_method_output_error_stats(*method, model_pte, testset_idx);
+    if (stats.status == Error::Ok) {
+      ET_LOG(Info, "=== Error stats for testset %d ===", testset_idx);
+      ET_LOG(Info, " mean_absolute_error: %f", stats.mean_abs_error);
+      ET_LOG(Info, " max_absolute_error:  %f", stats.max_abs_error);
+      ET_LOG(Info, " mean_relative_error: %f", stats.mean_relative_error);
+      ET_LOG(Info, " max_relative_error:  %f", stats.max_relative_error);
+    } else {
+      ET_LOG(
+          Info,
+          "=== Error calculating stats for testset %d ERROR:%d ===",
+          testset_idx,
+          stats.status);
+    }
+
     // Verify the result.
-    status = executorch::bundled_program::verify_method_outputs(
+    status = verify_method_outputs(
         *method, model_pte, testset_idx, et_rtol, et_atol);
     if (status == Error::Ok) {
       ET_LOG(Info, "Model output match expected BundleIO bpte ref data.");