Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2678,6 +2678,27 @@ class InferenceContextImpl : public ONNX_NAMESPACE::InferenceContext {
// only return data if it's for a constant initializer. checks for outer scope initializers
// if this is a subgraph and the name isn't found locally.
const TensorProto* initializer = graph_.GetConstantInitializer(def->Name(), true);
if (initializer != nullptr) {
// Check if this is in-memory external data (data stored in OrtValue)
// ONNX shape inference cannot handle external data, so we need to materialize it
if (utils::HasExternalDataInMemory(*initializer)) {
// Try to get the OrtValue for this initializer
OrtValue ort_value;
if (graph_.GetOrtValueInitializer(def->Name(), ort_value, true)) {
// Create a temporary TensorProto with the actual data from the OrtValue
// This allows ONNX shape inference to access the data
const Tensor& tensor = ort_value.Get<Tensor>();
auto temp_tensor_proto = utils::TensorToTensorProto(tensor, initializer->name(), /*use_tensor_buffer=*/false);
// Store the temporary proto so it outlives this call, maintain pointers steady
temp_tensor_protos_.push_back(std::make_unique<ONNX_NAMESPACE::TensorProto>(std::move(temp_tensor_proto)));
return temp_tensor_protos_.back().get();
} else {
// If we can't get the OrtValue, it is a bug
ORT_THROW("Initializer ", def->Name(),
" has in-memory external data but cannot get OrtValue during shape inference");
}
}
}
return initializer;
}

Expand Down Expand Up @@ -2717,6 +2738,11 @@ class InferenceContextImpl : public ONNX_NAMESPACE::InferenceContext {
std::vector<std::unique_ptr<GraphInferencerImpl>> graph_inferencers_;
const Graph& graph_;
const Graph::ResolveOptions& options_;
// Temporary TensorProtos created for in-memory external data during shape inference
// These need to outlive the shape inference call, so we store them here
// Inference is per node and the instance of this context is on the stack,
// so this is safe.
mutable InlinedVector<std::unique_ptr<ONNX_NAMESPACE::TensorProto>> temp_tensor_protos_;
};

Status Graph::InferAndVerifySubgraphTypes(const Node& node, Graph& subgraph,
Expand Down
258 changes: 258 additions & 0 deletions onnxruntime/test/ir/graph_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
// Licensed under the MIT License.

#include <iostream>
#include <fstream>
#include "core/common/inlined_containers.h"
#include "core/common/span_utils.h"
#include "core/framework/tensorprotoutils.h"
#include "core/graph/graph_viewer.h"
#include "core/graph/model.h"
#include "core/graph/op.h"
#include "core/session/inference_session.h"
#include "core/session/environment.h"
#include "test/providers/provider_test_utils.h"
#include "test/test_environment.h"
#include "gtest/gtest.h"
#include "gmock/gmock.h"
#include "onnx/defs/function.h"
Expand Down Expand Up @@ -2573,5 +2577,259 @@ TEST_F(GraphTest, GraphConstruction_MemoryEfficientTopologicalSort_SubgraphGener

#endif

// Test for shape inference with in-memory external data (issue #26261)
// This tests the fix for a regression where Constant nodes with large tensors (>127 bytes)
// stored as in-memory external data would cause shape inference to fail
TEST_F(GraphTest, ShapeInferenceWithInMemoryExternalData) {
// Create a model with a Constant node that produces a tensor larger than kSmallTensorExternalDataThreshold (127 bytes)
// This will trigger the in-memory externalization path
ModelProto model_proto;
model_proto.set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
auto* opset = model_proto.add_opset_import();
opset->set_version(17);

auto* graph_proto = model_proto.mutable_graph();
graph_proto->set_name("test_graph");

// Create a Constant node with a tensor of 16 INT64 values (128 bytes, just over the 127 threshold)
auto* constant_node = graph_proto->add_node();
constant_node->set_op_type("Constant");
constant_node->set_name("const_node");
constant_node->add_output("const_output");

// Add the value attribute with a tensor
auto* attr = constant_node->add_attribute();
attr->set_name("value");
attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_TENSOR);
auto* tensor = attr->mutable_t();
tensor->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
tensor->add_dims(16); // 16 elements * 8 bytes = 128 bytes
// Each split will be size 1, totaling 16
for (int64_t i = 0; i < 16; ++i) {
tensor->add_int64_data(1);
}

// Create a Split node that uses the constant as input
// Split requires constant input for the 'split' parameter, which triggers shape inference
auto* split_node = graph_proto->add_node();
split_node->set_op_type("Split");
split_node->set_name("split_node");
split_node->add_input("input_data");
split_node->add_input("const_output"); // Use constant as split sizes
for (int i = 0; i < 16; ++i) {
split_node->add_output("split_output_" + std::to_string(i));
}

// Add axis attribute
auto* axis_attr = split_node->add_attribute();
axis_attr->set_name("axis");
axis_attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_INT);
axis_attr->set_i(0);

// Add graph input
auto* input = graph_proto->add_input();
input->set_name("input_data");
auto* input_type = input->mutable_type()->mutable_tensor_type();
input_type->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
input_type->mutable_shape()->add_dim()->set_dim_value(16);
input_type->mutable_shape()->add_dim()->set_dim_value(10);

// Add graph outputs
for (int i = 0; i < 16; ++i) {
auto* output = graph_proto->add_output();
output->set_name("split_output_" + std::to_string(i));
}

// Load the model - this should succeed with the fix
// Before the fix, this would fail with:
// "Cannot parse data from external tensors. Please load external data into raw data for tensor"
std::shared_ptr<Model> model;
ASSERT_STATUS_OK(Model::Load(std::move(model_proto), model, nullptr, *logger_));

// Verify the graph was properly constructed
Graph& graph = model->MainGraph();
ASSERT_STATUS_OK(graph.Resolve());

// Verify the constant node was converted to an initializer
const ONNX_NAMESPACE::TensorProto* initializer = nullptr;
ASSERT_TRUE(graph.GetInitializedTensor("const_output", initializer));
ASSERT_NE(initializer, nullptr);

// Verify the Split node can access the constant data during shape inference
const Node* split_node_ptr = nullptr;
for (const auto& node : graph.Nodes()) {
if (node.Name() == "split_node") {
split_node_ptr = &node;
break;
}
}
ASSERT_NE(split_node_ptr, nullptr);

// Verify outputs are properly shaped
ASSERT_EQ(split_node_ptr->OutputDefs().size(), 16u);
}

// Test for shape inference with in-memory external data using InferenceSession
// This test more accurately reproduces the issue by going through the full session initialization
// which includes graph optimizations that trigger the in-memory externalization
TEST_F(GraphTest, ShapeInferenceWithInMemoryExternalDataViaSession) {
// Create the same model as above
ModelProto model_proto;
model_proto.set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
auto* opset = model_proto.add_opset_import();
opset->set_version(17);

auto* graph_proto = model_proto.mutable_graph();
graph_proto->set_name("test_graph");

// Create a Constant node with a tensor of 16 INT64 values (128 bytes)
auto* constant_node = graph_proto->add_node();
constant_node->set_op_type("Constant");
constant_node->set_name("const_node");
constant_node->add_output("const_output");

auto* attr = constant_node->add_attribute();
attr->set_name("value");
attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_TENSOR);
auto* tensor = attr->mutable_t();
tensor->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
tensor->add_dims(16);
for (int64_t i = 0; i < 16; ++i) {
tensor->add_int64_data(1);
}

// Create a Split node
auto* split_node = graph_proto->add_node();
split_node->set_op_type("Split");
split_node->set_name("split_node");
split_node->add_input("input_data");
split_node->add_input("const_output");
for (int i = 0; i < 16; ++i) {
split_node->add_output("split_output_" + std::to_string(i));
}

auto* axis_attr = split_node->add_attribute();
axis_attr->set_name("axis");
axis_attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_INT);
axis_attr->set_i(0);

// Add graph input
auto* input = graph_proto->add_input();
input->set_name("input_data");
auto* input_type = input->mutable_type()->mutable_tensor_type();
input_type->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
input_type->mutable_shape()->add_dim()->set_dim_value(16);
input_type->mutable_shape()->add_dim()->set_dim_value(10);

// Add graph outputs
for (int i = 0; i < 16; ++i) {
auto* output = graph_proto->add_output();
output->set_name("split_output_" + std::to_string(i));
}

// Save to a temporary file
const std::string model_path = "test_in_memory_external_data.onnx";
{
std::ofstream file(model_path, std::ios::binary);
ASSERT_TRUE(file.is_open());
ASSERT_TRUE(model_proto.SerializeToOstream(&file));
}

// Test with ORT_DISABLE_ALL optimization which should trigger the bug without the fix
SessionOptions so;
so.graph_optimization_level = TransformerLevel::Default; // This triggers the issue
so.session_logid = "GraphTest.ShapeInferenceWithInMemoryExternalDataViaSession";

InferenceSession session_object{so, GetEnvironment()};

// This should succeed with the fix, fail without it
ASSERT_STATUS_OK(session_object.Load(model_path));
ASSERT_STATUS_OK(session_object.Initialize());

// Clean up
std::remove(model_path.c_str());
}

// Test that explicitly triggers the in-memory externalization and then shape inference
// This test directly reproduces the bug scenario
TEST_F(GraphTest, ShapeInferenceAfterInitializerExternalization) {
// Create a model with a Split node that depends on a constant initializer
ModelProto model_proto;
model_proto.set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
auto* opset = model_proto.add_opset_import();
opset->set_version(17);

auto* graph_proto = model_proto.mutable_graph();
graph_proto->set_name("test_graph");

// Create initializer directly (not as Constant node) with 128 bytes
auto* initializer = graph_proto->add_initializer();
initializer->set_name("split_sizes");
initializer->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
initializer->add_dims(16); // 16 * 8 = 128 bytes
for (int64_t i = 0; i < 16; ++i) {
initializer->add_int64_data(1);
}

// Create a Split node that uses this initializer
auto* split_node = graph_proto->add_node();
split_node->set_op_type("Split");
split_node->set_name("split_node");
split_node->add_input("input_data");
split_node->add_input("split_sizes"); // Uses the large initializer
for (int i = 0; i < 16; ++i) {
split_node->add_output("split_output_" + std::to_string(i));
}

auto* axis_attr = split_node->add_attribute();
axis_attr->set_name("axis");
axis_attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_INT);
axis_attr->set_i(0);

// Add graph input
auto* input = graph_proto->add_input();
input->set_name("input_data");
auto* input_type = input->mutable_type()->mutable_tensor_type();
input_type->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
input_type->mutable_shape()->add_dim()->set_dim_value(16);
input_type->mutable_shape()->add_dim()->set_dim_value(10);

// Add graph outputs
for (int i = 0; i < 16; ++i) {
auto* output = graph_proto->add_output();
output->set_name("split_output_" + std::to_string(i));
}

// Load model
std::shared_ptr<Model> model;
ASSERT_STATUS_OK(Model::Load(std::move(model_proto), model, nullptr, *logger_));

Graph& graph = model->MainGraph();
// First resolve should succeed
ASSERT_STATUS_OK(graph.Resolve());

// Now trigger the in-memory externalization
// This converts initializers > 127 bytes to OrtValues with external data references
Status convert_status = graph.ConvertInitializersIntoOrtValues();
ASSERT_TRUE(convert_status.IsOK()) << "ConvertInitializersIntoOrtValues failed: " << convert_status.ErrorMessage();

// Check if the initializer was actually externalized
const ONNX_NAMESPACE::TensorProto* initializer_after = nullptr;
ASSERT_TRUE(graph.GetInitializedTensor("split_sizes", initializer_after));
ASSERT_NE(initializer_after, nullptr);
// Debug: verify it was externalized
ASSERT_TRUE(utils::HasExternalDataInMemory(*initializer_after))
<< "Initializer was not externalized to in-memory external data";

// Mark the graph as needing resolve to force shape inference to run again
graph.SetGraphResolveNeeded();

// Resolve again - this should trigger shape inference with the externalized initializer
// Without the fix, this will fail with "Cannot parse data from external tensors"
// With the fix, getInputData() materializes the external data for shape inference
Status second_resolve = graph.Resolve();
ASSERT_TRUE(second_resolve.IsOK()) << "Second resolve failed: " << second_resolve.ErrorMessage();
}

} // namespace test
} // namespace onnxruntime
Loading