Skip to content

Commit 59e0476

Browse files
authored
Add code to Bundleio to generate error stats (#12051)
Add a way to get error stats/metrics between actual and reference output. cc @digantdesai @freddan80 @per @oscarandersson8218 Signed-off-by: Zingo Andersen <[email protected]>
1 parent 02454eb commit 59e0476

File tree

3 files changed

+160
-1
lines changed

3 files changed

+160
-1
lines changed

devtools/bundled_program/bundled_program.cpp

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/*
22
* Copyright (c) Meta Platforms, Inc. and affiliates.
33
* All rights reserved.
4+
* Copyright 2025 Arm Limited and/or its affiliates.
45
*
56
* This source code is licensed under the BSD-style license found in the
67
* LICENSE file in the root directory of this source tree.
@@ -346,6 +347,116 @@ ET_NODISCARD Error load_bundled_input(
346347
return Error::Ok;
347348
}
348349

350+
ET_NODISCARD ErrorStats compute_method_output_error_stats(
351+
Method& method,
352+
SerializedBundledProgram* bundled_program_ptr,
353+
size_t testset_idx) {
354+
if (!bundled_program_flatbuffer::BundledProgramBufferHasIdentifier(
355+
bundled_program_ptr)) {
356+
// The input buffer should be a bundled program.
357+
return {Error::InvalidArgument, 0, 0, 0, 0};
358+
}
359+
360+
auto method_test = get_method_test_suite(
361+
bundled_program_flatbuffer::GetBundledProgram(bundled_program_ptr),
362+
method);
363+
364+
if (!method_test.ok()) {
365+
return {method_test.error(), 0, 0, 0, 0};
366+
}
367+
368+
auto test_cases = method_test.get()->test_cases();
369+
370+
if (testset_idx >= test_cases->size()) {
371+
return {Error::InvalidArgument, 0, 0, 0, 0};
372+
}
373+
auto bundled_expected_outputs =
374+
test_cases->Get(static_cast<flatbuffers::uoffset_t>(testset_idx))
375+
->expected_outputs();
376+
377+
if (bundled_expected_outputs->size() == 0) {
378+
ET_LOG(
379+
Error,
380+
"No bundled expected outputs, so we can't verify the method outputs.");
381+
return {Error::InvalidArgument, 0, 0, 0, 0};
382+
}
383+
384+
// abs_err = (a - b).abs()
385+
// relative_err = (a - b).abs() / torch.maximum(torch.tensor(1e-8),
386+
// torch.maximum(a.abs(), b.abs()))
387+
double sum_abs = 0.0, max_abs = 0.0;
388+
double sum_rel = 0.0, max_rel = 0.0;
389+
// Make sure divider is bigger then eps=1e-8f to behave better around 0 values
390+
const double eps = 1e-8f;
391+
392+
int64_t total_elems = 0;
393+
394+
for (size_t output_idx = 0; output_idx < method.outputs_size();
395+
output_idx++) {
396+
auto bundled_expected_output =
397+
bundled_expected_outputs->GetMutableObject(output_idx);
398+
auto method_output = method.get_output(output_idx);
399+
switch (bundled_expected_output->val_type()) {
400+
case bundled_program_flatbuffer::ValueUnion::Tensor: {
401+
auto bundled_expected_output_tensor =
402+
static_cast<bundled_program_flatbuffer::Tensor*>(
403+
bundled_expected_output->mutable_val());
404+
const auto method_output_tensor = method_output.toTensor();
405+
406+
#ifdef USE_ATEN_LIB
407+
Tensor expected = tensor_like(bundled_expected_output_tensor);
408+
#else // !USE_ATEN_LIB
409+
TensorImpl impl = impl_like(bundled_expected_output_tensor);
410+
Tensor expected = Tensor(&impl);
411+
#endif
412+
// sanity check
413+
int64_t nelem = expected.numel();
414+
if (method_output_tensor.numel() != nelem) {
415+
ET_LOG(Error, "Tensor size mismatch");
416+
return {Error::InvalidArgument, 0, 0, 0, 0};
417+
}
418+
419+
// we assume float32 here; adapt for other dtypes as needed
420+
const float* e_data = expected.data_ptr<float>();
421+
const float* a_data = method_output_tensor.data_ptr<float>();
422+
423+
for (int64_t k = 0; k < nelem; ++k) {
424+
double abs_err = std::abs(a_data[k] - e_data[k]);
425+
double relative_divider =
426+
std::max(std::abs(a_data[k]), std::abs(e_data[k]));
427+
relative_divider = std::max(relative_divider, eps);
428+
double relative_err = abs_err / relative_divider;
429+
430+
sum_abs += abs_err;
431+
max_abs = std::max(max_abs, abs_err);
432+
sum_rel += relative_err;
433+
max_rel = std::max(max_rel, relative_err);
434+
}
435+
total_elems += nelem;
436+
break;
437+
}
438+
default: {
439+
ET_LOG(
440+
Error,
441+
"Data type %hhd not supported",
442+
static_cast<uint8_t>(bundled_expected_output->val_type()));
443+
return {Error::NotSupported, 0, 0, 0, 0};
444+
break; // Never reached
445+
}
446+
}
447+
}
448+
449+
if (total_elems == 0) {
450+
return {Error::Ok, 0, 0, 0, 0};
451+
}
452+
return {
453+
Error::Ok,
454+
sum_abs / total_elems,
455+
max_abs,
456+
sum_rel / total_elems,
457+
max_rel};
458+
}
459+
349460
ET_NODISCARD Error verify_method_outputs(
350461
Method& method,
351462
SerializedBundledProgram* bundled_program_ptr,

devtools/bundled_program/bundled_program.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/*
22
* Copyright (c) Meta Platforms, Inc. and affiliates.
33
* All rights reserved.
4+
* Copyright 2025 Arm Limited and/or its affiliates.
45
*
56
* This source code is licensed under the BSD-style license found in the
67
* LICENSE file in the root directory of this source tree.
@@ -40,6 +41,31 @@ ET_NODISCARD ::executorch::runtime::Error load_bundled_input(
4041
SerializedBundledProgram* bundled_program_ptr,
4142
size_t testset_idx);
4243

44+
struct ErrorStats {
45+
::executorch::runtime::Error status;
46+
double mean_abs_error;
47+
double max_abs_error;
48+
double mean_relative_error;
49+
double max_relative_error;
50+
};
51+
52+
/**
53+
* Compute error stats for method.outputs() vs. the bundled "expected_outputs"
54+
* for testset_idx.
55+
*
56+
* @param[in] method The Method to extract outputs from.
57+
* @param[in] bundled_program_ptr The bundled program contains expected output.
58+
* @param[in] testset_idx The index of expected output needs to be compared.
59+
*
60+
* @returns Return ErrorStats with status set to Error::Ok if stats are filled
61+
* in.
62+
*/
63+
64+
ET_NODISCARD ErrorStats compute_method_output_error_stats(
65+
Method& method,
66+
SerializedBundledProgram* bundled_program_ptr,
67+
size_t testset_idx);
68+
4369
/**
4470
* Compare the Method's output with testset_idx-th bundled expected
4571
* output in method_idx-th Method test.

examples/arm/executor_runner/arm_executor_runner.cpp

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ using executorch::runtime::Result;
8585
using executorch::runtime::Span;
8686
using executorch::runtime::Tag;
8787
using executorch::runtime::TensorInfo;
88+
#if defined(ET_BUNDLE_IO)
89+
using executorch::bundled_program::compute_method_output_error_stats;
90+
using executorch::bundled_program::ErrorStats;
91+
using executorch::bundled_program::verify_method_outputs;
92+
#endif
8893
#if defined(ET_EVENT_TRACER_ENABLED)
8994
using executorch::etdump::ETDumpGen;
9095
using executorch::etdump::ETDumpResult;
@@ -849,8 +854,25 @@ int main(int argc, const char* argv[]) {
849854

850855
#if defined(ET_BUNDLE_IO)
851856
if (bundle_io) {
857+
// Check result
858+
ErrorStats stats =
859+
compute_method_output_error_stats(*method, model_pte, testset_idx);
860+
if (stats.status == Error::Ok) {
861+
ET_LOG(Info, "=== Error stats for testset %d ===", testset_idx);
862+
ET_LOG(Info, " mean_absolute_error: %f", stats.mean_abs_error);
863+
ET_LOG(Info, " max_absolute_error: %f", stats.max_abs_error);
864+
ET_LOG(Info, " mean_relative_error: %f", stats.mean_relative_error);
865+
ET_LOG(Info, " max_relative_error: %f", stats.max_relative_error);
866+
} else {
867+
ET_LOG(
868+
Info,
869+
"=== Error calculating stats for testset %d ERROR:%d ===",
870+
testset_idx,
871+
stats.status);
872+
}
873+
852874
// Verify the result.
853-
status = executorch::bundled_program::verify_method_outputs(
875+
status = verify_method_outputs(
854876
*method, model_pte, testset_idx, et_rtol, et_atol);
855877
if (status == Error::Ok) {
856878
ET_LOG(Info, "Model output match expected BundleIO bpte ref data.");

0 commit comments

Comments
 (0)