Skip to content
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
e4c8244
Adjust InlineIfNode
yuslepukhin Jul 2, 2025
5dc217e
Merge branch 'main' into yusleoukhin/ort_initializers_ii
yuslepukhin Jul 8, 2025
ffb9bbe
Fix a bug in Initializer::ToProtoWithOrtValue
yuslepukhin Jul 8, 2025
e19b938
Address type conversion
yuslepukhin Jul 8, 2025
564c29f
Adjust ToProto() handling of in external data in memory
yuslepukhin Jul 9, 2025
507eb56
Make CoreML accept external initializers
yuslepukhin Jul 9, 2025
5e89498
Fix handling of external data in ToProto*()
yuslepukhin Jul 10, 2025
fdcdc12
Fix compiler error
yuslepukhin Jul 10, 2025
6e29d73
Merge branch 'yuslepukhin/ort_initializers_mac' into yusleoukhin/ort_…
yuslepukhin Jul 10, 2025
a0635b7
Address compile error in Mac code
yuslepukhin Jul 10, 2025
e81b06c
Merge branch 'main' into yusleoukhin/ort_initializers_ii
yuslepukhin Jul 10, 2025
4cbddc9
Adjust in memory references when saving optimized model
yuslepukhin Jul 12, 2025
6ab9839
Merge branch 'main' into yusleoukhin/ort_initializers_ii
yuslepukhin Jul 14, 2025
582f27c
Address ToGraphProto() issues
yuslepukhin Jul 15, 2025
a636579
Fix ToGraphProto() and recreate test databases for test_embedlayer_fu…
yuslepukhin Jul 16, 2025
83c313f
GCC not happy about attr placement
yuslepukhin Jul 16, 2025
2c0f97e
Address build error
yuslepukhin Jul 16, 2025
5c15725
Address compiler error
yuslepukhin Jul 16, 2025
a6b2576
Adjust test data for fastgelu fusion
yuslepukhin Jul 17, 2025
bf4e81c
[EP ABI] Load external initializer OrtValues on demand
adrianlizarraga Jul 21, 2025
7077654
Add comment about unique_ptr<OrtValue>
adrianlizarraga Jul 21, 2025
f59c9c6
Address review comments
yuslepukhin Jul 21, 2025
64de315
Add comment and resolve compiler error
yuslepukhin Jul 21, 2025
ff49046
Update utility to only load OrtValue for external initializers
adrianlizarraga Jul 22, 2025
820400e
Add unit tests that load external initializers as OrtValues on deman
adrianlizarraga Jul 22, 2025
38de20f
Dont require non-minimal build to load an external initializer into a…
adrianlizarraga Jul 22, 2025
582d6ba
Add draft of APIs to get external initializer info
adrianlizarraga Jul 22, 2025
eb6a4a1
Address some review comments
adrianlizarraga Jul 22, 2025
b3b384c
Clean up before refactor to incrementally build ext_initializer_infos_
adrianlizarraga Jul 22, 2025
397fd13
Address review comments
yuslepukhin Jul 22, 2025
a9446ba
Test the new API functions to get external initializer information
adrianlizarraga Jul 22, 2025
74adc0c
Address ReplaceInitializedTensor
adrianlizarraga Jul 23, 2025
b6073aa
incrementally add elements to external_data_info_ as needed
adrianlizarraga Jul 23, 2025
93d5614
Address review comments
yuslepukhin Jul 23, 2025
5b2f994
Clean up
adrianlizarraga Jul 23, 2025
8fe5d11
Merge branch 'yusleoukhin/ort_initializers_ii' into adrianl/ep-plugin…
adrianlizarraga Jul 23, 2025
15e6d5f
Correct tensor size calculation
adrianlizarraga Jul 23, 2025
9fd06a0
Test and fix cases where we add an entry to external_data_infos_ for …
adrianlizarraga Jul 23, 2025
9a8778b
Use HasExternalDataInFile just to be more general
adrianlizarraga Jul 23, 2025
647e659
Merge main and fix conflicts
adrianlizarraga Jul 23, 2025
42aba9b
Apply suggestions from code review
adrianlizarraga Jul 23, 2025
786e6d7
Address more review comments
adrianlizarraga Jul 23, 2025
e65855d
Address review comment: do not cache OrtValue over external initializ…
adrianlizarraga Jul 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 51 additions & 1 deletion include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "core/common/inlined_containers.h"
#endif
#include "core/common/span_utils.h"
#include "core/common/safeint.h"
#include "core/common/status.h"
#include "core/common/logging/logging.h"
#include "core/framework/ort_value.h"
Expand Down Expand Up @@ -688,6 +689,15 @@ class Node {
bool can_be_saved_;
};

// Stores information on an initializer (TensorProto) that is stored in an external file.
// Graph tracks this information because most initializers are now TensorProtos that refer to an
// OrtValue Tensor, which looks like an external initializer, but isn't.
struct ExternalInitializerInfo {
std::basic_string<ORTCHAR_T> file_path;
int64_t file_offset = 0;
SafeInt<size_t> tensor_byte_size = 0;
};

/**
@class Graph
The Graph representation containing the graph inputs and outputs, the Node instances,
Expand Down Expand Up @@ -788,6 +798,26 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
*/
bool GetOrtValueInitializer(const std::string& name, OrtValue& value, bool check_outer_scope = false) const;

/// <summary>
/// Loads an initializer with data in an external file into an OrtValue.
/// </summary>
/// <param name="name">The name of the initializer.</param>
/// <param name="value">Output parameter set to the loaded OrtValue.
/// Is set to an existing OrtValue if already loaded.</param>
/// <returns>A status indicating an error or success. An error occurs if `name` is not an initializer
/// with external data.</returns>
Status LoadExternalInitializerAsOrtValue(const std::string& name, OrtValue& value);

/// <summary>
/// Gets information (external filepath, file offset, num bytes) for an initializer with data in an external file.
/// </summary>
/// <param name="name">The initializer's name.</param>
/// <param name="ext_info">Output parameter set to the location information of the external data.</param>
/// <param name="check_outer_scope">Set to true if parent graphs should be checked.</param>
/// <returns>True if `name` refers to an initializer with data in an external file. Otherwise, returns false</returns>
bool GetExternalInitializerInfo(const std::string& name, ExternalInitializerInfo& ext_info,
bool check_outer_scope = false) const;

/** Gets all the initializer tensors in this Graph. */
const InitializedTensorSet& GetAllInitializedTensors() const noexcept { return name_to_initial_tensor_; }

Expand Down Expand Up @@ -1198,8 +1228,16 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
#endif

#if !defined(ORT_MINIMAL_BUILD)
/** Gets the GraphProto representation of this Graph. */
/** Gets the GraphProto representation of this Graph only. */
const ONNX_NAMESPACE::GraphProto& ToGraphProto();

/// <summary>
// This function recurses subgraphs and examines each initializer
// If initializer data points to in-memory location, it is inlined
// otherwise, the initializer is copied as is including any
// external data references.
/// </summary>
/// <returns>GraphProto</returns>
ONNX_NAMESPACE::GraphProto ToGraphProto() const;

/** Gets the GraphProto representation of this Graph
Expand Down Expand Up @@ -1561,6 +1599,16 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
Status AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& constant_node_proto,
std::optional<std::string_view> new_name);

/// <summary>
/// This is a helper function to ::ToGraphProto()
/// It examines the subgraph initializers and converts data in memory references to inline.
/// Subgraph initializers are already copied using Node::ToPtoro()
/// </summary>
/// <param name="output_graph_proto"></param>
/// <param name="process_main">process main graph if true</param>
/// <returns></returns>
Status ProcessSubgraphsInmemoryData(ONNX_NAMESPACE::GraphProto& output_graph_proto, bool process_main) const;

/// <summary>
/// This function traverses the graph bottom up and externalizes
/// constant initializers along with their pre-packed blobs from different
Expand Down Expand Up @@ -1783,6 +1831,8 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
// in the Graph instance and retrieve during session state finalization.
std::unordered_map<std::string, OrtValue> ortvalue_initializers_;

std::unordered_map<std::string, ExternalInitializerInfo> ext_initializers_;

std::unordered_set<std::reference_wrapper<const std::string>,
std::hash<std::string>, std::equal_to<std::string>>
sparse_tensor_names_;
Expand Down
3 changes: 3 additions & 0 deletions include/onnxruntime/core/providers/utils/ort_graph_to_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -456,11 +456,14 @@ Ort::Status OrtGraphToProto(const OrtGraph& ort_graph,
auto* ext_data_entries = tensor_proto->mutable_external_data();
onnx::StringStringEntryProto* location_entry = ext_data_entries->Add();
onnx::StringStringEntryProto* offset_entry = ext_data_entries->Add();
onnx::StringStringEntryProto* length_entry = ext_data_entries->Add();

location_entry->set_key("location");
location_entry->set_value(ext_location);
offset_entry->set_key("offset");
offset_entry->set_value(std::to_string(ext_offset));
length_entry->set_key("length");
length_entry->set_value(std::to_string(data_bytes));
} else {
// User wants to store data inline the TensorProto's raw_data
tensor_proto->set_data_location(onnx::TensorProto_DataLocation_DEFAULT);
Expand Down
31 changes: 26 additions & 5 deletions onnxruntime/core/framework/tensorprotoutils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ DEFINE_INT4_UNPACK_TENSOR_WITH_RAW_DATA_IMPL(UInt4x2)
Status ReadExternalDataForTensor(const ONNX_NAMESPACE::TensorProto& tensor_proto,
const std::filesystem::path& tensor_proto_dir,
std::vector<uint8_t>& unpacked_tensor) {
ORT_RETURN_IF(utils::HasString(tensor_proto), "This function does not support string data");
PathString external_file_path;
onnxruntime::FileOffsetType file_offset;
SafeInt<size_t> tensor_byte_size;
Expand Down Expand Up @@ -264,12 +265,32 @@ Status TensorProtoWithExternalDataToTensorProto(
result.set_data_type(ten_proto.data_type());
result.mutable_dims()->CopyFrom(ten_proto.dims());

// Load the external data into memory
std::vector<uint8_t> unpacked_data;
ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(ten_proto, model_path, unpacked_data));
// Strings can only be in memory
if (utils::HasString(ten_proto)) {
ORT_RETURN_IF_NOT(HasExternalDataInMemory(ten_proto),
"TensorProto with string data can only be in memory");

std::unique_ptr<onnxruntime::ExternalDataInfo> external_data_info;
ORT_RETURN_IF_ERROR(onnxruntime::ExternalDataInfo::Create(ten_proto.external_data(), external_data_info));

// file_offset is address
if (utils::HasString(ten_proto)) {
auto tensor_shape = utils::GetTensorShapeFromTensorProto(ten_proto);
std::string* data = reinterpret_cast<std::string*>(external_data_info->GetOffset());
for (size_t i = 0, lim = narrow<size_t>(tensor_shape.Size()); i < lim; ++i) {
// set in raw data
result.add_string_data(*data);
++data;
}
}
} else {
// Load the external data into memory
std::vector<uint8_t> unpacked_data;
ORT_RETURN_IF_ERROR(ReadExternalDataForTensor(ten_proto, model_path, unpacked_data));

// Set the raw data in the new tensor
result.set_raw_data(unpacked_data.data(), unpacked_data.size());
// Set the raw data in the new tensor
result.set_raw_data(unpacked_data.data(), unpacked_data.size());
}

new_tensor_proto = std::move(result);

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/framework/tensorprotoutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ inline bool HasName(const ONNX_NAMESPACE::TypeProto_Opaque& op_proto) {
/// </summary>
/// <param name="tensor_proto">tensor_proto</param>
/// <returns>true if ten_proto has external data and it is in memory</returns>
bool HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& tensor_proto);
[[nodiscard]] bool HasExternalDataInMemory(const ONNX_NAMESPACE::TensorProto& tensor_proto);

/// <summary>
/// This function converts TensorProto with external data to TensorProto with inline data.
Expand Down
64 changes: 49 additions & 15 deletions onnxruntime/core/graph/ep_api_types.cc
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ Status EpValueInfo::GetInitializerValue(const OrtValue*& result) const {

// This gets an initializer value defined in this graph or in a parent graph (as long as the value
// is used in this graph).
result = graph_->GetInitializerValue(name_);
ORT_RETURN_IF_ERROR(graph_->GetInitializerValue(name_, result));
ORT_RETURN_IF(result == nullptr, "Unable to find initializer value named '", name_, "'.");
return Status::OK();
}
Expand Down Expand Up @@ -593,15 +593,22 @@ Status EpGraph::CreateImpl(std::unique_ptr<EpGraph> ep_graph, const GraphViewer&
initializer_value_infos.push_back(value_info);

// Initialize OrtValue for the initializer.
// Note: using std::unique_ptr<OrtValue> because we return a OrtValue* to the user and we want it to be stable.
auto initializer_value = std::make_unique<OrtValue>();
bool graph_has_ortvalue = graph_viewer.GetGraph().GetOrtValueInitializer(initializer_name, *initializer_value,
/*check_outer_scope*/ false);

if (!graph_has_ortvalue) {
// onnxruntime::Graph does not have an OrtValue for this initializer, so create one from the TensorProto.
// This should only happen for small initializers that are needed for ONNX shape inferencing.
ORT_RETURN_IF_ERROR(utils::TensorProtoToOrtValue(Env::Default(), graph_viewer.ModelPath(), *tensor_proto,
initializer_allocator, *initializer_value));
ExternalInitializerInfo ext_info = {};

if (graph_viewer.GetGraph().GetExternalInitializerInfo(initializer_name, ext_info, /*check_outer_scope*/ false)) {
// Do nothing for external initializers. Will load/mmap into an OrtValue on demand.
assert(!initializer_value->IsAllocated());
} else {
// Copy to OrtValue. This should only happen for small initializers.
ORT_RETURN_IF_ERROR(utils::TensorProtoToOrtValue(Env::Default(), graph_viewer.ModelPath(), *tensor_proto,
initializer_allocator, *initializer_value));
}
}

initializer_values.emplace(value_info->GetName(), std::move(initializer_value));
Expand Down Expand Up @@ -650,8 +657,10 @@ Status EpGraph::CreateImpl(std::unique_ptr<EpGraph> ep_graph, const GraphViewer&
}

EpValueInfo* outer_value_info = value_info_iter->second.get();
bool is_constant = false;

// Note: using std::unique_ptr<OrtValue> because we return a OrtValue* to the user and we want it to be stable.
auto outer_initializer_value = std::make_unique<OrtValue>();
bool is_constant = false;
const ONNX_NAMESPACE::TensorProto* outer_initializer = parent_graph->GetInitializer(implicit_name,
*outer_initializer_value,
is_constant,
Expand All @@ -665,11 +674,16 @@ Status EpGraph::CreateImpl(std::unique_ptr<EpGraph> ep_graph, const GraphViewer&
// Add the OrtValue if this is an initializer.
if (outer_initializer != nullptr) {
if (!outer_initializer_value->IsAllocated()) {
// onnxruntime::Graph does not have an OrtValue for this initializer, so create one from the TensorProto.
// This should only happen for small initializers that are needed for ONNX shape inferencing.
ORT_RETURN_IF_ERROR(utils::TensorProtoToOrtValue(Env::Default(), parent_graph->ModelPath(),
*outer_initializer, initializer_allocator,
*outer_initializer_value));
ExternalInitializerInfo ext_info = {};

if (parent_graph->GetExternalInitializerInfo(implicit_name, ext_info, /*check_outer_scope*/ true)) {
// Do nothing for external initializers. Will load/mmap into an OrtValue on demand.
} else {
// Copy to OrtValue. This should only happen for small initializers.
ORT_RETURN_IF_ERROR(utils::TensorProtoToOrtValue(Env::Default(), parent_graph->ModelPath(),
*outer_initializer, initializer_allocator,
*outer_initializer_value));
}
}
outer_scope_initializer_values.emplace(outer_value_info->GetName(), std::move(outer_initializer_value));
}
Expand Down Expand Up @@ -804,20 +818,40 @@ const EpNode* EpGraph::GetNode(NodeIndex node_index) const {
return index_to_ep_node_.GetEpNode(node_index);
}

const OrtValue* EpGraph::GetInitializerValue(std::string_view name) const {
Status EpGraph::GetInitializerValue(std::string_view name, const OrtValue*& result) const {
auto ensure_ort_value_loaded = [&](const std::unique_ptr<OrtValue>& ort_value) -> Status {
if (!ort_value->IsAllocated()) {
// Lazy load the OrtValue. This happens for external initializers.
Graph& graph = const_cast<Graph&>(graph_viewer_.GetGraph());
ORT_RETURN_IF_ERROR(graph.LoadExternalInitializerAsOrtValue(std::string(name),
const_cast<OrtValue&>(*ort_value)));
}

return Status::OK();
};

// Check for initializer value in the graph's scope.
if (auto iter = initializer_values_.find(name);
iter != initializer_values_.end()) {
return iter->second.get();
const std::unique_ptr<OrtValue>& ort_value = iter->second;
ORT_RETURN_IF_ERROR(ensure_ort_value_loaded(ort_value));

result = ort_value.get();
return Status::OK();
}

// Check for the initializer value in an outer scope.
// Only finds a value if the outer initializer value is used within this graph.
if (auto iter = outer_scope_initializer_values_.find(name);
iter != outer_scope_initializer_values_.end()) {
return iter->second.get();
const std::unique_ptr<OrtValue>& ort_value = iter->second;
ORT_RETURN_IF_ERROR(ensure_ort_value_loaded(ort_value));

result = ort_value.get();
return Status::OK();
}

return nullptr;
result = nullptr;
return Status::OK();
}
} // namespace onnxruntime
2 changes: 1 addition & 1 deletion onnxruntime/core/graph/ep_api_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ struct EpGraph : public OrtGraph {
// Considers both constant and non-constant initializers.
// Supports initializers defined in an outer scope as long as that initializer is used
// within this graph.
const OrtValue* GetInitializerValue(std::string_view name) const;
Status GetInitializerValue(std::string_view name, const OrtValue*& value) const;

private:
/// <summary>
Expand Down
Loading
Loading