microsoft · yuslepukhin · Oct 14, 2025 · Oct 8, 2025 · Oct 8, 2025 · Oct 8, 2025
diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
@@ -2678,6 +2678,27 @@ class InferenceContextImpl : public ONNX_NAMESPACE::InferenceContext {
     // only return data if it's for a constant initializer. checks for outer scope initializers
     // if this is a subgraph and the name isn't found locally.
     const TensorProto* initializer = graph_.GetConstantInitializer(def->Name(), true);
+    if (initializer != nullptr) {
+      // Check if this is in-memory external data (data stored in OrtValue)
+      // ONNX shape inference cannot handle external data, so we need to materialize it
+      if (utils::HasExternalDataInMemory(*initializer)) {
+        // Try to get the OrtValue for this initializer
+        OrtValue ort_value;
+        if (graph_.GetOrtValueInitializer(def->Name(), ort_value, true)) {
+          // Create a temporary TensorProto with the actual data from the OrtValue
+          // This allows ONNX shape inference to access the data
+          const Tensor& tensor = ort_value.Get<Tensor>();
+          auto temp_tensor_proto = utils::TensorToTensorProto(tensor, initializer->name(), /*use_tensor_buffer=*/false);
+          // Store the temporary proto so it outlives this call, maintain pointers steady
+          temp_tensor_protos_.push_back(std::make_unique<ONNX_NAMESPACE::TensorProto>(std::move(temp_tensor_proto)));
+          return temp_tensor_protos_.back().get();
+        } else {
+          // If we can't get the OrtValue, it is a bug
+          ORT_THROW("Initializer ", def->Name(),
+                    " has in-memory external data but cannot get OrtValue during shape inference");
+        }
+      }
+    }
     return initializer;
   }
 
@@ -2717,6 +2738,11 @@ class InferenceContextImpl : public ONNX_NAMESPACE::InferenceContext {
   std::vector<std::unique_ptr<GraphInferencerImpl>> graph_inferencers_;
   const Graph& graph_;
   const Graph::ResolveOptions& options_;
+  // Temporary TensorProtos created for in-memory external data during shape inference
+  // These need to outlive the shape inference call, so we store them here
+  // Inference is per node and the instance of this context is on the stack,
+  // so this is safe.
+  mutable InlinedVector<std::unique_ptr<ONNX_NAMESPACE::TensorProto>> temp_tensor_protos_;
 };
 
 Status Graph::InferAndVerifySubgraphTypes(const Node& node, Graph& subgraph,

diff --git a/onnxruntime/test/ir/graph_test.cc b/onnxruntime/test/ir/graph_test.cc
@@ -2,13 +2,17 @@
 // Licensed under the MIT License.
 
 #include <iostream>
+#include <fstream>
 #include "core/common/inlined_containers.h"
 #include "core/common/span_utils.h"
 #include "core/framework/tensorprotoutils.h"
 #include "core/graph/graph_viewer.h"
 #include "core/graph/model.h"
 #include "core/graph/op.h"
+#include "core/session/inference_session.h"
+#include "core/session/environment.h"
 #include "test/providers/provider_test_utils.h"
+#include "test/test_environment.h"
 #include "gtest/gtest.h"
 #include "gmock/gmock.h"
 #include "onnx/defs/function.h"
@@ -2573,5 +2577,259 @@ TEST_F(GraphTest, GraphConstruction_MemoryEfficientTopologicalSort_SubgraphGener
 
 #endif
 
+// Test for shape inference with in-memory external data (issue #26261)
+// This tests the fix for a regression where Constant nodes with large tensors (>127 bytes)
+// stored as in-memory external data would cause shape inference to fail
+TEST_F(GraphTest, ShapeInferenceWithInMemoryExternalData) {
+  // Create a model with a Constant node that produces a tensor larger than kSmallTensorExternalDataThreshold (127 bytes)
+  // This will trigger the in-memory externalization path
+  ModelProto model_proto;
+  model_proto.set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
+  auto* opset = model_proto.add_opset_import();
+  opset->set_version(17);
+
+  auto* graph_proto = model_proto.mutable_graph();
+  graph_proto->set_name("test_graph");
+
+  // Create a Constant node with a tensor of 16 INT64 values (128 bytes, just over the 127 threshold)
+  auto* constant_node = graph_proto->add_node();
+  constant_node->set_op_type("Constant");
+  constant_node->set_name("const_node");
+  constant_node->add_output("const_output");
+
+  // Add the value attribute with a tensor
+  auto* attr = constant_node->add_attribute();
+  attr->set_name("value");
+  attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_TENSOR);
+  auto* tensor = attr->mutable_t();
+  tensor->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
+  tensor->add_dims(16);  // 16 elements * 8 bytes = 128 bytes
+  // Each split will be size 1, totaling 16
+  for (int64_t i = 0; i < 16; ++i) {
+    tensor->add_int64_data(1);
+  }
+
+  // Create a Split node that uses the constant as input
+  // Split requires constant input for the 'split' parameter, which triggers shape inference
+  auto* split_node = graph_proto->add_node();
+  split_node->set_op_type("Split");
+  split_node->set_name("split_node");
+  split_node->add_input("input_data");
+  split_node->add_input("const_output");  // Use constant as split sizes
+  for (int i = 0; i < 16; ++i) {
+    split_node->add_output("split_output_" + std::to_string(i));
+  }
+
+  // Add axis attribute
+  auto* axis_attr = split_node->add_attribute();
+  axis_attr->set_name("axis");
+  axis_attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_INT);
+  axis_attr->set_i(0);
+
+  // Add graph input
+  auto* input = graph_proto->add_input();
+  input->set_name("input_data");
+  auto* input_type = input->mutable_type()->mutable_tensor_type();
+  input_type->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+  input_type->mutable_shape()->add_dim()->set_dim_value(16);
+  input_type->mutable_shape()->add_dim()->set_dim_value(10);
+
+  // Add graph outputs
+  for (int i = 0; i < 16; ++i) {
+    auto* output = graph_proto->add_output();
+    output->set_name("split_output_" + std::to_string(i));
+  }
+
+  // Load the model - this should succeed with the fix
+  // Before the fix, this would fail with:
+  // "Cannot parse data from external tensors. Please load external data into raw data for tensor"
+  std::shared_ptr<Model> model;
+  ASSERT_STATUS_OK(Model::Load(std::move(model_proto), model, nullptr, *logger_));
+
+  // Verify the graph was properly constructed
+  Graph& graph = model->MainGraph();
+  ASSERT_STATUS_OK(graph.Resolve());
+
+  // Verify the constant node was converted to an initializer
+  const ONNX_NAMESPACE::TensorProto* initializer = nullptr;
+  ASSERT_TRUE(graph.GetInitializedTensor("const_output", initializer));
+  ASSERT_NE(initializer, nullptr);
+
+  // Verify the Split node can access the constant data during shape inference
+  const Node* split_node_ptr = nullptr;
+  for (const auto& node : graph.Nodes()) {
+    if (node.Name() == "split_node") {
+      split_node_ptr = &node;
+      break;
+    }
+  }
+  ASSERT_NE(split_node_ptr, nullptr);
+
+  // Verify outputs are properly shaped
+  ASSERT_EQ(split_node_ptr->OutputDefs().size(), 16u);
+}
+
+// Test for shape inference with in-memory external data using InferenceSession
+// This test more accurately reproduces the issue by going through the full session initialization
+// which includes graph optimizations that trigger the in-memory externalization
+TEST_F(GraphTest, ShapeInferenceWithInMemoryExternalDataViaSession) {
+  // Create the same model as above
+  ModelProto model_proto;
+  model_proto.set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
+  auto* opset = model_proto.add_opset_import();
+  opset->set_version(17);
+
+  auto* graph_proto = model_proto.mutable_graph();
+  graph_proto->set_name("test_graph");
+
+  // Create a Constant node with a tensor of 16 INT64 values (128 bytes)
+  auto* constant_node = graph_proto->add_node();
+  constant_node->set_op_type("Constant");
+  constant_node->set_name("const_node");
+  constant_node->add_output("const_output");
+
+  auto* attr = constant_node->add_attribute();
+  attr->set_name("value");
+  attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_TENSOR);
+  auto* tensor = attr->mutable_t();
+  tensor->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
+  tensor->add_dims(16);
+  for (int64_t i = 0; i < 16; ++i) {
+    tensor->add_int64_data(1);
+  }
+
+  // Create a Split node
+  auto* split_node = graph_proto->add_node();
+  split_node->set_op_type("Split");
+  split_node->set_name("split_node");
+  split_node->add_input("input_data");
+  split_node->add_input("const_output");
+  for (int i = 0; i < 16; ++i) {
+    split_node->add_output("split_output_" + std::to_string(i));
+  }
+
+  auto* axis_attr = split_node->add_attribute();
+  axis_attr->set_name("axis");
+  axis_attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_INT);
+  axis_attr->set_i(0);
+
+  // Add graph input
+  auto* input = graph_proto->add_input();
+  input->set_name("input_data");
+  auto* input_type = input->mutable_type()->mutable_tensor_type();
+  input_type->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+  input_type->mutable_shape()->add_dim()->set_dim_value(16);
+  input_type->mutable_shape()->add_dim()->set_dim_value(10);
+
+  // Add graph outputs
+  for (int i = 0; i < 16; ++i) {
+    auto* output = graph_proto->add_output();
+    output->set_name("split_output_" + std::to_string(i));
+  }
+
+  // Save to a temporary file
+  const std::string model_path = "test_in_memory_external_data.onnx";
+  {
+    std::ofstream file(model_path, std::ios::binary);
+    ASSERT_TRUE(file.is_open());
+    ASSERT_TRUE(model_proto.SerializeToOstream(&file));
+  }
+
+  // Test with ORT_DISABLE_ALL optimization which should trigger the bug without the fix
+  SessionOptions so;
+  so.graph_optimization_level = TransformerLevel::Default;  // This triggers the issue
+  so.session_logid = "GraphTest.ShapeInferenceWithInMemoryExternalDataViaSession";
+
+  InferenceSession session_object{so, GetEnvironment()};
+
+  // This should succeed with the fix, fail without it
+  ASSERT_STATUS_OK(session_object.Load(model_path));
+  ASSERT_STATUS_OK(session_object.Initialize());
+
+  // Clean up
+  std::remove(model_path.c_str());
+}
+
+// Test that explicitly triggers the in-memory externalization and then shape inference
+// This test directly reproduces the bug scenario
+TEST_F(GraphTest, ShapeInferenceAfterInitializerExternalization) {
+  // Create a model with a Split node that depends on a constant initializer
+  ModelProto model_proto;
+  model_proto.set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
+  auto* opset = model_proto.add_opset_import();
+  opset->set_version(17);
+
+  auto* graph_proto = model_proto.mutable_graph();
+  graph_proto->set_name("test_graph");
+
+  // Create initializer directly (not as Constant node) with 128 bytes
+  auto* initializer = graph_proto->add_initializer();
+  initializer->set_name("split_sizes");
+  initializer->set_data_type(ONNX_NAMESPACE::TensorProto_DataType_INT64);
+  initializer->add_dims(16);  // 16 * 8 = 128 bytes
+  for (int64_t i = 0; i < 16; ++i) {
+    initializer->add_int64_data(1);
+  }
+
+  // Create a Split node that uses this initializer
+  auto* split_node = graph_proto->add_node();
+  split_node->set_op_type("Split");
+  split_node->set_name("split_node");
+  split_node->add_input("input_data");
+  split_node->add_input("split_sizes");  // Uses the large initializer
+  for (int i = 0; i < 16; ++i) {
+    split_node->add_output("split_output_" + std::to_string(i));
+  }
+
+  auto* axis_attr = split_node->add_attribute();
+  axis_attr->set_name("axis");
+  axis_attr->set_type(ONNX_NAMESPACE::AttributeProto_AttributeType_INT);
+  axis_attr->set_i(0);
+
+  // Add graph input
+  auto* input = graph_proto->add_input();
+  input->set_name("input_data");
+  auto* input_type = input->mutable_type()->mutable_tensor_type();
+  input_type->set_elem_type(ONNX_NAMESPACE::TensorProto_DataType_FLOAT);
+  input_type->mutable_shape()->add_dim()->set_dim_value(16);
+  input_type->mutable_shape()->add_dim()->set_dim_value(10);
+
+  // Add graph outputs
+  for (int i = 0; i < 16; ++i) {
+    auto* output = graph_proto->add_output();
+    output->set_name("split_output_" + std::to_string(i));
+  }
+
+  // Load model
+  std::shared_ptr<Model> model;
+  ASSERT_STATUS_OK(Model::Load(std::move(model_proto), model, nullptr, *logger_));
+
+  Graph& graph = model->MainGraph();
+  // First resolve should succeed
+  ASSERT_STATUS_OK(graph.Resolve());
+
+  // Now trigger the in-memory externalization
+  // This converts initializers > 127 bytes to OrtValues with external data references
+  Status convert_status = graph.ConvertInitializersIntoOrtValues();
+  ASSERT_TRUE(convert_status.IsOK()) << "ConvertInitializersIntoOrtValues failed: " << convert_status.ErrorMessage();
+
+  // Check if the initializer was actually externalized
+  const ONNX_NAMESPACE::TensorProto* initializer_after = nullptr;
+  ASSERT_TRUE(graph.GetInitializedTensor("split_sizes", initializer_after));
+  ASSERT_NE(initializer_after, nullptr);
+  // Debug: verify it was externalized
+  ASSERT_TRUE(utils::HasExternalDataInMemory(*initializer_after))
+      << "Initializer was not externalized to in-memory external data";
+
+  // Mark the graph as needing resolve to force shape inference to run again
+  graph.SetGraphResolveNeeded();
+
+  // Resolve again - this should trigger shape inference with the externalized initializer
+  // Without the fix, this will fail with "Cannot parse data from external tensors"
+  // With the fix, getInputData() materializes the external data for shape inference
+  Status second_resolve = graph.Resolve();
+  ASSERT_TRUE(second_resolve.IsOK()) << "Second resolve failed: " << second_resolve.ErrorMessage();
+}
+
 }  // namespace test
 }  // namespace onnxruntime