Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/snnn/retire_t4' into snnn/vcpkg
Browse files Browse the repository at this point in the history
  • Loading branch information
snnn committed Jan 2, 2025
2 parents 0d33d19 + 577e398 commit 08a0280
Show file tree
Hide file tree
Showing 342 changed files with 3,475 additions and 2,526 deletions.
2 changes: 1 addition & 1 deletion cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "595228d99e3977ac27cb79d5963adda262af99ad",
"commitHash": "b8baa8446686496da4cc8fda09f2b6fe65c2a02c",
"repositoryUrl": "https://github.com/onnx/onnx.git"
},
"comments": "git submodule at cmake/external/onnx"
Expand Down
3 changes: 1 addition & 2 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,7 @@ option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS "Dump debug information about node
cmake_dependent_option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS_ENABLE_DUMP_TO_SQLDB "Build dump debug information about node inputs and outputs with support for sql database." OFF "onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS" OFF)

# When loading a delay loaded DLL, Windows searches the main EXE's folder first.
# In a Python process, it searches where python.exe lives, but it doesn't search the python package's installation folder. Therefore we cannot enable this flag when Python is enabled.
cmake_dependent_option(onnxruntime_ENABLE_DELAY_LOADING_WIN_DLLS "Delay load some of the dependent DLls that are part of the OS" ON "WIN32;NOT GDK_PLATFORM;NOT onnxruntime_ENABLE_PYTHON" OFF)
cmake_dependent_option(onnxruntime_ENABLE_DELAY_LOADING_WIN_DLLS "Delay load some of the dependent DLls that are part of the OS" ON "WIN32;NOT GDK_PLATFORM" OFF)
option(onnxruntime_USE_DML "Build with DirectML support" OFF)
option(onnxruntime_USE_MIGRAPHX "Build with AMDMIGraphX support" OFF)
option(onnxruntime_USE_WINML "Build with WinML support" OFF)
Expand Down
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.16.1.zip;2eb9198bb352757d5ff13977cbe0634898e0837c
onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.17.0.zip;13a60ac5217c104139ce0fd024f48628e7bcf5bc
# Use the latest commit of 10.7-GA
onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/9c69a24bc2e20c8a511a4e6b06fd49639ec5300a.zip;ff1fe9af78eb129b4a4cdcb7450b7390b4436dd3
protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/onnx
Submodule onnx updated 908 files
3 changes: 3 additions & 0 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ endif()
onnxruntime_add_shared_library_module(onnxruntime_pybind11_state ${onnxruntime_pybind_srcs})

if(MSVC)
# The following source file is only needed for the EPs that use delayloading. Namely, DML and WebGPU.
target_sources(onnxruntime_pybind11_state PRIVATE "${ONNXRUNTIME_ROOT}/core/dll/delay_load_hook.cc")

target_compile_options(onnxruntime_pybind11_state PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>" "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
target_compile_options(onnxruntime_pybind11_state PRIVATE "/bigobj")
endif()
Expand Down
941 changes: 0 additions & 941 deletions cmake/patches/onnx/onnx.patch

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/ContribOperators.md
Original file line number Diff line number Diff line change
Expand Up @@ -1625,7 +1625,7 @@ This version of the operator has been available since version 1 of the 'com.micr
#### Type Constraints

<dl>
<dt><tt>T</tt> : tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(float16), tensor(float), tensor(double)</dt>
<dt><tt>T</tt> : tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(float16), tensor(float), tensor(double), tensor(bfloat16)</dt>
<dd>Constrain input and output types.</dd>
</dl>

Expand Down
1 change: 1 addition & 0 deletions include/onnxruntime/core/framework/op_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

// It is safe to include the below header even if SHARED_PROVIDER macro is enabled
// as it doesn't include any pb headers.
#include "core/framework/buffer_deleter.h"
#include "core/framework/prepacked_weights_container.h"

#ifndef SHARED_PROVIDER
Expand Down
92 changes: 58 additions & 34 deletions include/onnxruntime/core/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@

#pragma once

#include <filesystem>
#include <functional>
#include <limits>
#include <memory>
#include <optional>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <filesystem>

#include "core/common/flatbuffers.h"

Expand All @@ -19,13 +20,14 @@
#include "core/common/common.h"
#include "core/common/path_string.h"
#include "core/common/const_pointer_container.h"
#include "core/common/inlined_containers_fwd.h"
#if !defined(ORT_MINIMAL_BUILD)
#include "core/common/inlined_containers.h"
#endif
#include "core/common/inlined_containers_fwd.h"
#include "core/common/span_utils.h"
#include "core/common/status.h"
#include "core/common/logging/logging.h"
#include "core/framework/prepacked_weights_container.h"
#include "core/graph/onnx_protobuf.h"
#include "core/graph/basic_types.h"
#include "core/graph/constants.h"
Expand All @@ -41,6 +43,7 @@ namespace onnxruntime {
class Graph;
struct IndexedSubGraph;
class Model;
struct ModelSavingOptions;
class OpSignature;

#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
Expand Down Expand Up @@ -1153,29 +1156,6 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
const ONNX_NAMESPACE::GraphProto& ToGraphProto();
ONNX_NAMESPACE::GraphProto ToGraphProto() const;

// Options to align external initializer offset.
// For models running on CPU, ORT will try to use mmap to load external initializers.
// To use mmap, external initializer need to be offset aligned.
// ORT saves external initializers into signle data file, each initializer is accessed with
// offset(start position of initializer) and length(byte length of initializer) of the data file.
// To use mmap, each offset need to be aligned which means offset need to divisible by
// allocation granularity(64KB for windows and 4K for other OSes).
// With align_offset to true, ORT will align offset for large initializer when
// save ONNX model with external data file.
struct OffsetAlignmentInfo {
// Offset will always be page aligned and allocation granularity aligned for mmap support.
// This is done by padding previous tensor data with zeros keeping same length.
bool align_offset = false;
// Alignment threshold for size of data.
// Having a low threshold will waste file space for small initializers.
// Only when tensor's data size is > the page_align_threshold it will be force aligned.
// Default to 1MB.
int64_t align_threshold = 1048576;
// The allocation Granularity for mmap() support.
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
int64_t allocation_granularity = 65536;
};

/** Gets the GraphProto representation of this Graph
@param external_file_path File path of the binary file to use for initializers.
@param model_file_path path of the model file.
Expand All @@ -1186,15 +1166,7 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
*/
ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold,
const OffsetAlignmentInfo& align_info) const;

ONNX_NAMESPACE::GraphProto ToGraphProtoWithExternalInitializers(const std::filesystem::path& external_file_path,
const std::filesystem::path& model_file_path,
size_t initializer_size_threshold) const {
OffsetAlignmentInfo default_options;
return ToGraphProtoWithExternalInitializers(external_file_path, model_file_path, initializer_size_threshold, default_options);
}
const ModelSavingOptions& model_saving_options) const;

/** Gets the ISchemaRegistry instances being used with this Graph. */
IOnnxRuntimeOpSchemaCollectionPtr GetSchemaRegistry() const;
Expand Down Expand Up @@ -1400,6 +1372,18 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi

#endif // !defined(ORT_MINIMAL_BUILD)

// This function constructs PrepackedSharedContainer in the root graph only
// and initializes a reference to it in all (sub)graphs
void ConstructPrepackedSharedContainerAndSetMode(bool saving_mode_on);

const PrepackedWeightsForGraph& GetPrepacked() const noexcept {
return *prepacked_weights_for_graph_;
}

PrepackedWeightsForGraph& GetPrepacked() noexcept {
return *prepacked_weights_for_graph_;
}

/** Returns the Node containing the GraphProto for this Graph instance if IsSubgraph is true */
const Node* ParentNode() const { return parent_node_; }

Expand Down Expand Up @@ -1519,6 +1503,31 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
Status AddConstantProtoAsInitializer(const ONNX_NAMESPACE::NodeProto& constant_node_proto,
std::optional<std::string_view> new_name);

/// <summary>
/// This function traverses the graph bottom up and externalizes
/// constant initializers along with their pre-packed blobs from different
/// kernels. Writes constant initializers to the external file with any pre-packed
/// blobs (if enabled and produced for this initializer) and then modifies TensorProto
/// entry with external data references.
/// </summary>
/// <param name="model_path">model file path from Model</param>
/// <param name="external_file_path">a binary file path for relative to the model file path
/// where the initializers data is written</param>
/// <param name="model_external_file_path">model file folder path with external file path appended</param>
/// <param name="model_saving_options">model saving options including alignment and pre-packs</param>
/// <param name="output_graph_proto">The graph proto to be modified</param>
/// <param name="external_stream">external file stream</param>
/// <param name="external_offset">current external file offset updated with each write</param>
/// <returns>Status instance</returns>
Status AddExternalInitializersToGraphProtoImpl(
const std::filesystem::path& model_path,
const std::filesystem::path& external_file_path,
const std::filesystem::path& model_external_file_path,
const ModelSavingOptions& model_saving_options,
ONNX_NAMESPACE::GraphProto& output_graph_proto,
std::ostream& external_stream,
int64_t& external_offset) const;

#endif

Version IrVersion() const noexcept {
Expand Down Expand Up @@ -1703,6 +1712,21 @@ class Graph { // NOLINT(clang-analyzer-optin.performance.Padding): preserve exi
std::hash<std::string>, std::equal_to<std::string>>
sparse_tensor_names_;

// Prepacked blobs container that stored pre-packed initializers
// data that is:
// - mem-mapped from disk
// - shared within the session
// - shared across sessions by transferring the ownership of loaded data entries to
// SessionState::PrepackedWeightsContainer* if one is present.
// This container is optional because it is present only in the root graph.
std::optional<PrepackedKeyToBlobMap> prepacked_key_to_blobs_;

// This container contains a reference to the root prepacked_key_to_blobs_
// and also (in the save mode) records association between the initializer
// names and their pre-packed blobs (via keys).
// This is optional due to delayed construction.
std::optional<PrepackedWeightsForGraph> prepacked_weights_for_graph_;

#if !defined(ORT_MINIMAL_BUILD) || defined(ORT_EXTENDED_MINIMAL_BUILD)
// Runtime optimization storage.
// Note: runtime_optimizations_ == *runtime_optimizations_ptr_ and must be initialized
Expand Down
44 changes: 44 additions & 0 deletions include/onnxruntime/core/graph/model_saving_options.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

namespace onnxruntime {

class PrepackedWeightsForGraph;

// These options affect how the model initializers are written to the external file.
// This includes options to align external initializer offset.
// For models running on CPU, ORT will try to use mmap to load external
// initializers. To use mmap, external initializer need to be offset aligned.
// ORT saves external initializers into single data file, each initializer is
// accessed with offset(start position of initializer) and length(byte length of
// initializer) of the data file. To use mmap, each offset need to be aligned
// which means offset need to divisible by allocation granularity(64KB for
// windows and 4K for other OSes). With align_offset to true, ORT will align
// offset for large initializer when save ONNX model with external data file.
struct ModelSavingOptions {
explicit ModelSavingOptions(size_t size_threshold)
: initializer_size_threshold(size_threshold) {}

// Mimimal initializer size in bytes to be externalized on disk
size_t initializer_size_threshold;
// Offset will always be page aligned and allocation granularity aligned for
// mmap support. This is done by padding previous tensor data with zeros
// keeping same length.
bool align_offset = false;
// Alignment threshold for size of data.
// Having a low threshold will waste file space for small initializers.
// Only when tensor's data size is > the page_align_threshold it will be force
// aligned. Default to 1MB.
int64_t align_threshold = 1048576;
// The allocation Granularity for mmap() support.
// Typically 64KB for Windows & 4KB for other OSes. Default to 64KB.
#ifdef _WIN32
int64_t allocation_granularity = 65536;
#else
int64_t allocation_granularity = 4096;
#endif
};

} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,22 @@ static const char* const kCoremlProviderOption_SpecializationStrategy = "Special
static const char* const kCoremlProviderOption_ProfileComputePlan = "ProfileComputePlan";
// please refer to https://developer.apple.com/documentation/coreml/mlmodelconfiguration/allowlowprecisionaccumulationongpu
static const char* const kCoremlProviderOption_AllowLowPrecisionAccumulationOnGPU = "AllowLowPrecisionAccumulationOnGPU";
// Specify the directory to cache any CoreML models created from the ONNX model in.
// CoreML EP will convert onnx subgraph to CoreML model and save to disk.
// If this path is not specified, the model will be saved to a temp directory and deleted after the session is closed.
// otherwise, the model will be saved to the specified path and User should manage to delete the model.

// we do NOT detect if the onnx model has changed and no longer matches the cached model.
// the user should carefully manage the cache if modifying/replacing a model.
// The cache key is generated by
// 1. User provided key in metadata_props if found (preferred)
// 2. Hash of the model url the inference session was created with
// 3. Hash of the input/output names of the model
// Please find out how to set metadata_props in the onnxruntime API documentation. https://onnxruntime.ai/docs/execution-providers/CoreML-ExecutionProvider.html#configuration-options
static const char* const kCoremlProviderOption_ModelCacheDirectory = "ModelCacheDirectory";

// User provided cache-key in metadata_props.
static const char* const kCOREML_CACHE_KEY = "COREML_CACHE_KEY";

#ifdef __cplusplus
extern "C" {
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ extern "C" {

//! @}
// SAL2 Definitions
#ifndef _WIN32
#ifndef _MSC_VER
#define _In_
#define _In_z_
#define _In_opt_
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,17 @@ static const char* const kOrtSessionOptionsOptimizedModelExternalInitializersFil
static const char* const kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes =
"session.optimized_model_external_initializers_min_size_in_bytes";

// Use this config when saving pre-packed constant initializers to an external data file.
// This allows you to memory map pre-packed initializers on model load and leave it to
// to the OS the amount of memory consumed by the pre-packed initializers. Otherwise,
// pre-packed data resides on the heap.
//
// - "0": Default is not save pre-packed initializers to a data file.
// - "1": Save pre-packed constant initializers to an external data file.
// Sample usage: sess_options.add_session_config_entry(kOrtSessionOptionsSavePrePackedConstantInitializers, "1")
static const char* const kOrtSessionOptionsSavePrePackedConstantInitializers =
"session.save_external_prepacked_constant_initializers";

// Enable EP context feature to dump the partitioned graph which includes the EP context into Onnx file.
// The dumped Onnx model with EP context can be used for future inference to avoid the EP graph partitioning/compile overhead.
// "0": disable. (default)
Expand Down
Loading

0 comments on commit 08a0280

Please sign in to comment.